├── .gitignore
├── .travis.yml
├── LICENSE
├── NOTICE
├── README.md
├── data
    └── .gitignore
├── exp
    └── .gitignore
├── experiments
    ├── ctdet_coco_dla_1x.sh
    ├── ctdet_coco_dla_2x.sh
    ├── ctdet_coco_hg.sh
    ├── ctdet_coco_resdcn101.sh
    ├── ctdet_coco_resdcn18.sh
    ├── ctdet_pascal_dla_384.sh
    ├── ctdet_pascal_dla_512.sh
    ├── ctdet_pascal_resdcn101_384.sh
    ├── ctdet_pascal_resdcn101_512.sh
    ├── ctdet_pascal_resdcn18_384.sh
    ├── ctdet_pascal_resdcn18_512.sh
    ├── ddd_3dop.sh
    ├── ddd_sub.sh
    ├── exdet_coco_dla.sh
    ├── exdet_coco_hg.sh
    ├── multi_pose_dla_1x.sh
    ├── multi_pose_dla_3x.sh
    ├── multi_pose_hg_1x.sh
    └── multi_pose_hg_3x.sh
├── images
    ├── 16004479832_a748d55f21_k.jpg
    ├── 17790319373_bd19b24cfc_k.jpg
    ├── 18124840932_e42b3e377c_k.jpg
    ├── 19064748793_bb942deea1_k.jpg
    ├── 24274813513_0cfd2ce6d0_k.jpg
    ├── 33823288584_1d21cf0a26_k.jpg
    ├── 33887522274_eebd074106_k.jpg
    ├── 34501842524_3c858b3080_k.jpg
    └── NOTICE
├── models
    └── .gitignore
├── readme
    ├── DATA.md
    ├── DEVELOP.md
    ├── GETTING_STARTED.md
    ├── INSTALL.md
    ├── MODEL_ZOO.md
    ├── det1.png
    ├── det2.png
    ├── fig2.png
    ├── pose1.png
    ├── pose2.png
    └── pose3.png
├── requirements.txt
└── src
    ├── _init_paths.py
    ├── demo.py
    ├── lib
        ├── datasets
        │   ├── dataset
        │   │   ├── coco.py
        │   │   ├── coco_hp.py
        │   │   ├── kitti.py
        │   │   └── pascal.py
        │   ├── dataset_factory.py
        │   └── sample
        │   │   ├── ctdet.py
        │   │   ├── ddd.py
        │   │   ├── exdet.py
        │   │   └── multi_pose.py
        ├── detectors
        │   ├── base_detector.py
        │   ├── ctdet.py
        │   ├── ddd.py
        │   ├── detector_factory.py
        │   ├── exdet.py
        │   └── multi_pose.py
        ├── external
        │   ├── .gitignore
        │   ├── Makefile
        │   ├── __init__.py
        │   ├── nms.pyx
        │   └── setup.py
        ├── logger.py
        ├── models
        │   ├── data_parallel.py
        │   ├── decode.py
        │   ├── losses.py
        │   ├── model.py
        │   ├── networks
        │   │   ├── DCNv2
        │   │   │   ├── .gitignore
        │   │   │   ├── LICENSE
        │   │   │   ├── README.md
        │   │   │   ├── __init__.py
        │   │   │   ├── build.py
        │   │   │   ├── build_double.py
        │   │   │   ├── dcn_v2.py
        │   │   │   ├── dcn_v2_func.py
        │   │   │   ├── make.sh
        │   │   │   ├── src
        │   │   │   │   ├── cuda
        │   │   │   │   │   ├── dcn_v2_im2col_cuda.cu
        │   │   │   │   │   ├── dcn_v2_im2col_cuda.h
        │   │   │   │   │   ├── dcn_v2_im2col_cuda_double.cu
        │   │   │   │   │   ├── dcn_v2_im2col_cuda_double.h
        │   │   │   │   │   ├── dcn_v2_psroi_pooling_cuda.cu
        │   │   │   │   │   ├── dcn_v2_psroi_pooling_cuda.h
        │   │   │   │   │   ├── dcn_v2_psroi_pooling_cuda_double.cu
        │   │   │   │   │   └── dcn_v2_psroi_pooling_cuda_double.h
        │   │   │   │   ├── dcn_v2.c
        │   │   │   │   ├── dcn_v2.h
        │   │   │   │   ├── dcn_v2_cuda.c
        │   │   │   │   ├── dcn_v2_cuda.h
        │   │   │   │   ├── dcn_v2_cuda_double.c
        │   │   │   │   ├── dcn_v2_cuda_double.h
        │   │   │   │   ├── dcn_v2_double.c
        │   │   │   │   └── dcn_v2_double.h
        │   │   │   └── test.py
        │   │   ├── dlav0.py
        │   │   ├── large_hourglass.py
        │   │   ├── msra_resnet.py
        │   │   ├── pose_dla_dcn.py
        │   │   └── resnet_dcn.py
        │   ├── scatter_gather.py
        │   └── utils.py
        ├── opts.py
        ├── trains
        │   ├── base_trainer.py
        │   ├── ctdet.py
        │   ├── ddd.py
        │   ├── exdet.py
        │   ├── multi_pose.py
        │   └── train_factory.py
        └── utils
        │   ├── __init__.py
        │   ├── ddd_utils.py
        │   ├── debugger.py
        │   ├── image.py
        │   ├── oracle_utils.py
        │   ├── post_process.py
        │   └── utils.py
    ├── main.py
    ├── test.py
    └── tools
        ├── _init_paths.py
        ├── calc_coco_overlap.py
        ├── convert_hourglass_weight.py
        ├── convert_kitti_to_coco.py
        ├── eval_coco.py
        ├── eval_coco_hp.py
        ├── get_kitti.sh
        ├── get_pascal_voc.sh
        ├── kitti_eval
            ├── README.md
            ├── evaluate_object_3d.cpp
            ├── evaluate_object_3d_offline
            ├── evaluate_object_3d_offline.cpp
            └── mail.h
        ├── merge_pascal_json.py
        ├── reval.py
        ├── vis_pred.py
        └── voc_eval_lib
            ├── LICENSE
            ├── Makefile
            ├── __init__.py
            ├── datasets
                ├── __init__.py
                ├── bbox.pyx
                ├── ds_utils.py
                ├── imdb.py
                ├── pascal_voc.py
                └── voc_eval.py
            ├── model
                ├── __init__.py
                ├── bbox_transform.py
                ├── config.py
                ├── nms_wrapper.py
                └── test.py
            ├── nms
                ├── .gitignore
                ├── __init__.py
                ├── cpu_nms.c
                ├── cpu_nms.pyx
                ├── gpu_nms.cpp
                ├── gpu_nms.hpp
                ├── gpu_nms.pyx
                ├── nms_kernel.cu
                └── py_cpu_nms.py
            ├── setup.py
            └── utils
                ├── .gitignore
                ├── __init__.py
                ├── bbox.pyx
                ├── blob.py
                ├── timer.py
                └── visualization.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | legacy/*
  2 | .DS_Store
  3 | debug/*
  4 | *.DS_Store
  5 | *.json
  6 | *.mat
  7 | src/.vscode/*
  8 | preds/*
  9 | *.h5
 10 | *.pth
 11 | *.checkpoint
 12 | # Byte-compiled / optimized / DLL files
 13 | __pycache__/
 14 | *.py[cod]
 15 | *$py.class
 16 | 
 17 | # C extensions
 18 | *.so
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | env/
 23 | build/
 24 | develop-eggs/
 25 | dist/
 26 | downloads/
 27 | eggs/
 28 | .eggs/
 29 | lib64/
 30 | parts/
 31 | sdist/
 32 | var/
 33 | wheels/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | 
 38 | # PyInstaller
 39 | #  Usually these files are written by a python script from a template
 40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 41 | *.manifest
 42 | *.spec
 43 | 
 44 | # Installer logs
 45 | pip-log.txt
 46 | pip-delete-this-directory.txt
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | .hypothesis/
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # celery beat schedule file
 87 | celerybeat-schedule
 88 | 
 89 | # SageMath parsed files
 90 | *.sage.py
 91 | 
 92 | # dotenv
 93 | .env
 94 | 
 95 | # virtualenv
 96 | .venv
 97 | venv/
 98 | ENV/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # mkdocs documentation
108 | /site
109 | 
110 | # mypy
111 | .mypy_cache/
112 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | group: travis_latest
 2 | dist: xenial  # ubuntu-16.04
 3 | language: python
 4 | cache: pip
 5 | python:
 6 |   - 3.6
 7 |   - 3.7
 8 | install:
 9 |   - pip install flake8
10 |   - pip install -r requirements.txt
11 | before_script:
12 |   # stop the build if there are Python syntax errors or undefined names
13 |   - flake8 . --count --select=E9,F63,F72,F82 --show-source --statistics
14 |   # exit-zero treats all errors as warnings.  The GitHub editor is 127 chars wide
15 |   - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
16 | script:
17 |   - true  # add other tests here
18 | notifications:
19 |   on_success: change
20 |   on_failure: change  # `always` will be the setting once code changes slow down
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Xingyi Zhou
 4 | All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 
24 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/exp/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/experiments/ctdet_coco_dla_1x.sh:
--------------------------------------------------------------------------------
 1 | cd src
 2 | # train
 3 | python main.py ctdet --exp_id coco_dla_1x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16
 4 | # test
 5 | python test.py ctdet --exp_id coco_dla_1x --keep_res --resume
 6 | # flip test
 7 | python test.py ctdet --exp_id coco_dla_1x --keep_res --resume --flip_test 
 8 | # multi scale test
 9 | python test.py ctdet --exp_id coco_dla_1x --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..
11 | 


--------------------------------------------------------------------------------
/experiments/ctdet_coco_dla_2x.sh:
--------------------------------------------------------------------------------
 1 | cd src
 2 | # train
 3 | python main.py ctdet --exp_id coco_dla_2x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --num_epochs 230 lr_step 180,210
 4 | # or use the following command if your have coco_s2_dla_1x trained
 5 | # python main.py ctdet --exp_id coco_dla_2x --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --load_model ../exp/ctdet/coco_dla_1x/model_90.pth --resume
 6 | # test
 7 | python test.py ctdet --exp_id coco_dla_2x --keep_res --resume
 8 | # flip test
 9 | python test.py ctdet --exp_id coco_dla_2x --keep_res --resume --flip_test
10 | # multi scale test
11 | python test.py ctdet --exp_id coco_dla_2x --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
12 | cd ..
13 | 


--------------------------------------------------------------------------------
/experiments/ctdet_coco_hg.sh:
--------------------------------------------------------------------------------
 1 | cd src
 2 | # train
 3 | python main.py ctdet --exp_id coco_hg --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --load_model ../models/ExtremeNet_500000.pth --gpus 0,1,2,3,4
 4 | # test
 5 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume
 6 | # flip test
 7 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test 
 8 | # multi scale test
 9 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..


--------------------------------------------------------------------------------
/experiments/ctdet_coco_resdcn101.sh:
--------------------------------------------------------------------------------
 1 | cd src
 2 | # train
 3 | python main.py ctdet --exp_id coco_resdcn101 --arch resdcn_101 --batch_size 96 --master_batch 5 --lr 3.75e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16
 4 | # test
 5 | python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume
 6 | # flip test
 7 | python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume --flip_test 
 8 | # multi scale test
 9 | python test.py ctdet --exp_id coco_resdcn101 --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..
11 | 


--------------------------------------------------------------------------------
/experiments/ctdet_coco_resdcn18.sh:
--------------------------------------------------------------------------------
 1 | cd src
 2 | # train
 3 | python main.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --batch_size 114 --master_batch 18 --lr 5e-4 --gpus 0,1,2,3 --num_workers 16
 4 | # test
 5 | python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume
 6 | # flip test
 7 | python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume --flip_test 
 8 | # multi scale test
 9 | python test.py ctdet --exp_id coco_resdcn18 --arch resdcn_18 --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..
11 | 


--------------------------------------------------------------------------------
/experiments/ctdet_pascal_dla_384.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_dla_384 --dataset pascal --num_epochs 70 --lr_step 45,60
4 | # test
5 | python test.py ctdet --exp_id pascal_dla_384 --dataset pascal --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_dla_384 --dataset pascal --resume --flip_test
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/experiments/ctdet_pascal_dla_512.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60 --gpus 0,1
4 | # test
5 | python test.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_dla_512 --dataset pascal --input_res 512 --resume --flip_test
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/experiments/ctdet_pascal_resdcn101_384.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --num_epochs 70 --lr_step 45,60 --gpus 0,1
4 | # test
5 | python test.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_resdcn101_384 --arch resdcn_101 --dataset pascal --resume --flip_test
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/experiments/ctdet_pascal_resdcn101_512.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60 --gpus 0,1,2,3
4 | # test
5 | python test.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_resdcn101_512 --arch resdcn_101 --dataset pascal --input_res 512 --resume --flip_test
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/experiments/ctdet_pascal_resdcn18_384.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --num_epochs 70 --lr_step 45,60
4 | # test
5 | python test.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_resdcn18_384 --arch resdcn_18 --dataset pascal --resume --flip_test
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/experiments/ctdet_pascal_resdcn18_512.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --num_epochs 70 --lr_step 45,60
4 | # test
5 | python test.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --resume
6 | # flip test
7 | python test.py ctdet --exp_id pascal_resdcn18_512 --arch resdcn_18 --dataset pascal --input_res 512 --resume --flip_test
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/experiments/ddd_3dop.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --batch_size 16 --master_batch 7 --num_epochs 70 --lr_step 45,60 --gpus 0,1
4 | # test
5 | python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --resume
6 | cd ..
7 | 


--------------------------------------------------------------------------------
/experiments/ddd_sub.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ddd --exp_id sub --dataset kitti --kitti_split subcnn --batch_size 16 --master_batch 7 --num_epochs 70 --lr_step 45,60 --gpus 0,1
4 | # test
5 | python test.py ddd --exp_id sub --dataset kitti --kitti_split subcnn --resume
6 | cd ..
7 | 


--------------------------------------------------------------------------------
/experiments/exdet_coco_dla.sh:
--------------------------------------------------------------------------------
 1 | cd src
 2 | # train
 3 | python main.py exdet --exp_id coco_dla --batch_size 64 --master_batch 1 --lr 2.5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 8
 4 | # test
 5 | python test.py exdet --exp_id coco_dla --keep_res --resume
 6 | # flip test
 7 | python test.py exdet --exp_id coco_dla --keep_res --resume --flip_test 
 8 | # multi scale test
 9 | python test.py exdet --exp_id coco_dla --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..
11 | 


--------------------------------------------------------------------------------
/experiments/exdet_coco_hg.sh:
--------------------------------------------------------------------------------
 1 | cd src
 2 | # train
 3 | python main.py exdet --exp_id coco_hg --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --gpus 0,1,2,3,4
 4 | # test
 5 | python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume
 6 | # flip test
 7 | python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test 
 8 | # multi scale test
 9 | python test.py exdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
10 | cd ..
11 | 


--------------------------------------------------------------------------------
/experiments/multi_pose_dla_1x.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py multi_pose --exp_id dla_1x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --load_model ../models/ctdet_coco_dla_2x.pth --gpus 0,1,2,3,4,5,6,7 --num_workers 16
4 | # test
5 | python test.py multi_pose --exp_id dla_1x --dataset coco_hp --keep_res --resume
6 | # flip test
7 | python test.py multi_pose --exp_id dla_1x --dataset coco_hp --keep_res --resume --flip_test
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/experiments/multi_pose_dla_3x.sh:
--------------------------------------------------------------------------------
 1 | cd src
 2 | # train
 3 | python main.py multi_pose --exp_id dla_3x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --load_model ../models/ctdet_coco_dla_2x.pth --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --num_epochs 320 lr_step 270,300
 4 | # or use the following command if your have dla_1x trained
 5 | # python main.py multi_pose --exp_id dla_3x --dataset coco_hp --batch_size 128 --master_batch 9 --lr 5e-4 --gpus 0,1,2,3,4,5,6,7 --num_workers 16 --load_model ../exp/multi_pose/dla_1x/model_90.pth --resume
 6 | # test
 7 | python test.py multi_pose --exp_id dla_3x --dataset coco_hp --keep_res --resume
 8 | # flip test
 9 | python test.py multi_pose --exp_id dla_3x --dataset coco_hp --keep_res --resume --flip_test
10 | cd ..
11 | 


--------------------------------------------------------------------------------
/experiments/multi_pose_hg_1x.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --load_model ../models/ctdet_coco_hg.pth --gpus 0,1,2,3,4 --num_epochs 50 --lr_step 40
4 | # test
5 | python test.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --keep_res --resume
6 | # flip test
7 | python test.py multi_pose --exp_id hg_1x --dataset coco_hp --arch hourglass --keep_res --resume --flip_test
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/experiments/multi_pose_hg_3x.sh:
--------------------------------------------------------------------------------
 1 | cd src
 2 | # train
 3 | python main.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 -load_model ../models/ctdet_coco_hg.pth --gpus 0,1,2,3,4 --num_epochs 150 --lr_step 130
 4 | # or use the following command if your have dla_1x trained
 5 | # python main.py multi_pose --exp_id hg_3x --dataset coco_hp  --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --gpus 0,1,2,3,4 --num_epochs 150 --lr_step 130 --load_model ../exp/multi_pose/hg_1x/model_40.pth --resume
 6 | # test
 7 | python test.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --keep_res --resume
 8 | # flip test
 9 | python test.py multi_pose --exp_id hg_3x --dataset coco_hp --arch hourglass --keep_res --resume --flip_test
10 | cd ..
11 | 


--------------------------------------------------------------------------------
/images/16004479832_a748d55f21_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/16004479832_a748d55f21_k.jpg


--------------------------------------------------------------------------------
/images/17790319373_bd19b24cfc_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/17790319373_bd19b24cfc_k.jpg


--------------------------------------------------------------------------------
/images/18124840932_e42b3e377c_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/18124840932_e42b3e377c_k.jpg


--------------------------------------------------------------------------------
/images/19064748793_bb942deea1_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/19064748793_bb942deea1_k.jpg


--------------------------------------------------------------------------------
/images/24274813513_0cfd2ce6d0_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/24274813513_0cfd2ce6d0_k.jpg


--------------------------------------------------------------------------------
/images/33823288584_1d21cf0a26_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/33823288584_1d21cf0a26_k.jpg


--------------------------------------------------------------------------------
/images/33887522274_eebd074106_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/33887522274_eebd074106_k.jpg


--------------------------------------------------------------------------------
/images/34501842524_3c858b3080_k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/images/34501842524_3c858b3080_k.jpg


--------------------------------------------------------------------------------
/images/NOTICE:
--------------------------------------------------------------------------------
 1 | The demo images are licensed as United States government work:
 2 | https://www.usa.gov/government-works
 3 | 
 4 | The image files were obtained on Jan 13, 2018 from the following
 5 | URLs.
 6 | 
 7 | 16004479832_a748d55f21_k.jpg
 8 | https://www.flickr.com/photos/archivesnews/16004479832
 9 | 
10 | 18124840932_e42b3e377c_k.jpg
11 | https://www.flickr.com/photos/usnavy/18124840932
12 | 
13 | 33887522274_eebd074106_k.jpg
14 | https://www.flickr.com/photos/usaid_pakistan/33887522274
15 | 
16 | 15673749081_767a7fa63a_k.jpg
17 | https://www.flickr.com/photos/usnavy/15673749081
18 | 
19 | 34501842524_3c858b3080_k.jpg
20 | https://www.flickr.com/photos/departmentofenergy/34501842524
21 | 
22 | 24274813513_0cfd2ce6d0_k.jpg
23 | https://www.flickr.com/photos/dhsgov/24274813513
24 | 
25 | 19064748793_bb942deea1_k.jpg
26 | https://www.flickr.com/photos/statephotos/19064748793
27 | 
28 | 33823288584_1d21cf0a26_k.jpg
29 | https://www.flickr.com/photos/cbpphotos/33823288584
30 | 
31 | 17790319373_bd19b24cfc_k.jpg
32 | https://www.flickr.com/photos/secdef/17790319373
33 | 


--------------------------------------------------------------------------------
/models/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/readme/DATA.md:
--------------------------------------------------------------------------------
  1 | # Dataset preparation
  2 | 
  3 | If you want to reproduce the results in the paper for benchmark evaluation and training, you will need to setup dataset.
  4 | 
  5 | 
  6 | ### COCO
  7 | - Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download).
  8 | - Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download). 
  9 | - Place the data (or create symlinks) to make the data folder like:
 10 | 
 11 |   ~~~
 12 |   ${CenterNet_ROOT}
 13 |   |-- data
 14 |   `-- |-- coco
 15 |       `-- |-- annotations
 16 |           |   |-- instances_train2017.json
 17 |           |   |-- instances_val2017.json
 18 |           |   |-- person_keypoints_train2017.json
 19 |           |   |-- person_keypoints_val2017.json
 20 |           |   |-- image_info_test-dev2017.json
 21 |           |---|-- train2017
 22 |           |---|-- val2017
 23 |           `---|-- test2017
 24 |   ~~~
 25 | 
 26 | - [Optional] If you want to train ExtremeNet, generate extreme point annotation from segmentation:
 27 |     
 28 |     ~~~
 29 |     cd $CenterNet_ROOT/tools/
 30 |     python gen_coco_extreme_points.py
 31 |     ~~~
 32 |   It generates `instances_extreme_train2017.json` and `instances_extreme_val2017.json` in `data/coco/annotations/`. 
 33 | 
 34 | ### Pascal VOC
 35 | 
 36 | - Run
 37 | 
 38 |     ~~~
 39 |     cd $CenterNet_ROOT/tools/
 40 |     bash get_pascal_voc.sh
 41 |     ~~~
 42 | - The above script includes:
 43 |     - Download, unzip, and move Pascal VOC images from the [VOC website](http://host.robots.ox.ac.uk/pascal/VOC/). 
 44 |     - [Download](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip) Pascal VOC annotation in COCO format (from [Detectron](https://github.com/facebookresearch/Detectron/tree/master/detectron/datasets/data)). 
 45 |     - Combine train/val 2007/2012 annotation files into a single json. 
 46 | 
 47 | 
 48 | - Move the created `voc` folder to `data` (or create symlinks) to make the data folder like:
 49 | 
 50 |   ~~~
 51 |   ${CenterNet_ROOT}
 52 |   |-- data
 53 |   `-- |-- voc
 54 |       `-- |-- annotations
 55 |           |   |-- pascal_trainval0712.json
 56 |           |   |-- pascal_test2017.json
 57 |           |-- images
 58 |           |   |-- 000001.jpg
 59 |           |   ......
 60 |           `-- VOCdevkit
 61 |   
 62 |   ~~~
 63 |   The `VOCdevkit` folder is needed to run the evaluation script from [faster rcnn](https://github.com/rbgirshick/py-faster-rcnn/blob/master/tools/reval.py).
 64 | 
 65 | ### KITTI
 66 | 
 67 | - Download [images](http://www.cvlibs.net/download.php?file=data_object_image_2.zip), [annotations](http://www.cvlibs.net/download.php?file=data_object_label_2.zip), and [calibrations](http://www.cvlibs.net/download.php?file=data_object_calib.zip) from [KITTI website](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d) and unzip.
 68 | 
 69 | - Download the train-val split of [3DOP](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz) and [SubCNN](https://github.com/tanshen/SubCNN/tree/master/fast-rcnn/data/KITTI) and place the data as below
 70 | 
 71 |   ~~~
 72 |   ${CenterNet_ROOT}
 73 |   |-- data
 74 |   `-- |-- kitti
 75 |       `-- |-- training
 76 |           |   |-- image_2
 77 |           |   |-- label_2
 78 |           |   |-- calib
 79 |           |-- ImageSets_3dop
 80 |           |   |-- test.txt
 81 |           |   |-- train.txt
 82 |           |   |-- val.txt
 83 |           |   |-- trainval.txt
 84 |           `-- ImageSets_subcnn
 85 |               |-- test.txt
 86 |               |-- train.txt
 87 |               |-- val.txt
 88 |               |-- trainval.txt
 89 |   ~~~
 90 | 
 91 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. You can set `DEBUG=True` in `line 5` to visualize the annotation.
 92 | 
 93 | - Link image folder
 94 | 
 95 |   ~~~
 96 |   cd ${CenterNet_ROOT}/data/kitti/
 97 |   mkdir images
 98 |   ln -s training/image_2 images/trainval
 99 |   ~~~
100 | 
101 | - The data structure should look like:
102 | 
103 |   ~~~
104 |   ${CenterNet_ROOT}
105 |   |-- data
106 |   `-- |-- kitti
107 |       `-- |-- annotations
108 |           |   |-- kitti_3dop_train.json
109 |           |   |-- kitti_3dop_val.json
110 |           |   |-- kitti_subcnn_train.json
111 |           |   |-- kitti_subcnn_val.json
112 |           `-- images
113 |               |-- trainval
114 |               |-- test
115 |   ~~~
116 | 


--------------------------------------------------------------------------------
/readme/DEVELOP.md:
--------------------------------------------------------------------------------
 1 | # Develop
 2 | 
 3 | This document provides tutorials to develop CenterNet. `lib/src/opts` lists a few more options that the current version supports.
 4 | 
 5 | ## New dataset
 6 | Basically there are three steps:
 7 | 
 8 | - Convert the dataset annotation to [COCO format](http://cocodataset.org/#format-data). Please refer to [src/tools/convert_kitti_to_coco.py](../src/tools/convert_kitti_to_coco.py) for an example to convert kitti format to coco format.
 9 | - Create a dataset intilization file in `src/lib/datasets/dataset`. In most cases you can just copy `src/lib/datasets/dataset/coco.py` to your dataset name and change the category information, and annotation path.
10 | - Import your dataset at `src/lib/datasets/dataset_factory`.
11 | 
12 | ## New task
13 | 
14 | You will need to add files to `src/lib/datasets/sample/`, `src/lib/datasets/trains/`, and `src/lib/datasets/detectors/`, which specify the data generation during training, the training targets, and the testing, respectively.
15 | 
16 | ## New architecture
17 | 
18 | - Add your model file to `src/lib/models/networks/`. The model should accept a dict `heads` of `{name: channels}`, which specify the name of each network output and its number of channels. Make sure your model returns a list (for multiple stages. Single stage model should return a list containing a single element.). The element of the list is a dict contraining the same keys with `heads`.
19 | - Add your model in `model_factory` of `src/lib/models/model.py`.
20 | 


--------------------------------------------------------------------------------
/readme/GETTING_STARTED.md:
--------------------------------------------------------------------------------
 1 | # Getting Started
 2 | 
 3 | This document provides tutorials to train and evaluate CenterNet. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md).
 4 | 
 5 | ## Benchmark evaluation
 6 | 
 7 | First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`. 
 8 | 
 9 | ### COCO
10 | 
11 | To evaluate COCO object detection with DLA
12 | run
13 | 
14 | ~~~
15 | python test.py ctdet --exp_id coco_dla --keep_res --load_model ../models/ctdet_coco_dla_2x.pth
16 | ~~~
17 | 
18 | This will give an AP of `37.4` if setup correctly. `--keep_res` is for keep the original image resolution. Without `--keep_res` it will resize the images to `512 x 512`. You can add `--flip_test` and `--flip_test --test_scales 0.5,0.75,1,1.25,1.5` to the above commend, for flip test and multi_scale test, respectively. The expected APs are `39.2` and `41.7`, respectively.
19 | 
20 | To test with hourglass net, run
21 | 
22 | ~~~
23 | python test.py ctdet --exp_id coco_hg --arch hourglass --fix_res --load_model ../models/ctdet_coco_hg.pth
24 | ~~~
25 | 
26 | Similarly, to evaluate human pose estimation, run the following command for dla
27 | 
28 | ~~~
29 | python test.py multi_pose --exp_id dla --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
30 | ~~~
31 | 
32 | and the following for hourglass
33 | 
34 | ~~~
35 | python test.py multi_pose --exp_id hg --arch hourglass --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
36 | ~~~
37 | 
38 | The expected results can be found in the model zoo.
39 | 
40 | ### Pascal
41 | 
42 | To evaluate object detection on Pascal VOC (test2007), run
43 | 
44 | ~~~
45 | python test.py ctdet --exp_id dla --dataset pascal --load_model ../models/ctdet_pascal_dla.pth --flip_test
46 | ~~~
47 | 
48 | Note that we fix the resolution during testing.
49 | And you can change to other network architectures and resolutions by specifying `--arch` and `--input_res 512`.
50 | 
51 | ### KITTI
52 | 
53 | To evaluate the kitti dataset, first compile the evaluation tool (from [here](https://github.com/prclibo/kitti_eval)):
54 | 
55 | ~~~
56 | cd CenterNet_ROOT/src/tools/kitti_eval
57 | g++ -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp -O3
58 | ~~~
59 | 
60 | Then run the evaluation with pretrained model:
61 | 
62 | ~~~
63 | python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --load_model ../models/ddd_3dop.pth
64 | ~~~
65 | 
66 | to evaluate the 3DOP split. For the subcnn split, change `--kitti_split` to `subcnn` and load the corresponding models.
67 | Note that test time augmentation is not trivially applicable for 3D orientation.
68 | 
69 | ## Training
70 | 
71 | We have packed all the training scripts in the [experiments](../experiments) folder.
72 | The experiment names are correspond to the model name in the [model zoo](MODEL_ZOO.md).
73 | The number of GPUs for each experiments can be found in the scripts and the model zoo.
74 | In the case that you don't have 8 GPUs, you can follow the [linear learning rate rule](https://arxiv.org/abs/1706.02677) to scale the learning rate as batch size.
75 | For example, to train COCO object detection with dla on 2 GPUs, run
76 | 
77 | ~~~
78 | python main.py ctdet --exp_id coco_dla --batch_size 32 --master_batch 15 --lr 1.25e-4  --gpus 0,1
79 | ~~~
80 | 
81 | The default learning rate is `1.25e-4` for batch size `32` (on 2 GPUs).
82 | By default, pytorch evenly splits the total batch size to each GPUs.
83 | `--master_batch` allows using different batchsize for the master GPU, which usually costs more memory than other GPUs.
84 | If it encounters GPU memory out, using slightly less batch size (e.g., `112` of `128`) with the same learning is fine.
85 | 
86 | If the training is terminated before finishing, you can use the same commond with `--resume` to resume training. It will found the lastest model with the same `exp_id`.
87 | 
88 | Our HourglassNet model is finetuned from the pretrained [ExtremeNet model](https://drive.google.com/file/d/1JMbHgN4uLkP9MAyJU5EeHrgxwe101hwO) (from the [ExtremeNet repo](https://github.com/xingyizhou/ExtremeNet)).
89 | You will need to download the model, run `python convert_hourglass_weight.py` to convert the model format, and load the model for training (see the [script](../experiments/ctdet_coco_hg.sh)).
90 | 


--------------------------------------------------------------------------------
/readme/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | 
 4 | The code was tested on Ubuntu 16.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v0.4.1. NVIDIA GPUs are needed for both training and testing.
 5 | After install Anaconda:
 6 | 
 7 | 0. [Optional but recommended] create a new conda environment. 
 8 | 
 9 |     ~~~
10 |     conda create --name CenterNet python=3.6
11 |     ~~~
12 |     And activate the environment.
13 |     
14 |     ~~~
15 |     conda activate CenterNet
16 |     ~~~
17 | 
18 | 1. Install pytorch0.4.1:
19 | 
20 |     ~~~
21 |     conda install pytorch=0.4.1 torchvision -c pytorch
22 |     ~~~
23 |     
24 |     And disable cudnn batch normalization(Due to [this issue](https://github.com/xingyizhou/pytorch-pose-hg-3d/issues/16)).
25 |     
26 |      ~~~
27 |     # PYTORCH=/path/to/pytorch # usually ~/anaconda3/envs/CenterNet/lib/python3.6/site-packages/
28 |     # for pytorch v0.4.0
29 |     sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
30 |     # for pytorch v0.4.1
31 |     sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
32 |      ~~~
33 |      
34 |      For other pytorch version, you can manually open `torch/nn/functional.py` and find the line with `torch.batch_norm` and replace the `torch.backends.cudnn.enabled` with `False`. We observed slight worse training results without doing so. 
35 |      
36 | 2. Install [COCOAPI](https://github.com/cocodataset/cocoapi):
37 | 
38 |     ~~~
39 |     # COCOAPI=/path/to/clone/cocoapi
40 |     git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
41 |     cd $COCOAPI/PythonAPI
42 |     make
43 |     python setup.py install --user
44 |     ~~~
45 | 
46 | 3. Clone this repo:
47 | 
48 |     ~~~
49 |     CenterNet_ROOT=/path/to/clone/CenterNet
50 |     git clone https://github.com/xingyizhou/CenterNet $CenterNet_ROOT
51 |     ~~~
52 | 
53 | 
54 | 4. Install the requirements
55 | 
56 |     ~~~
57 |     pip install -r requirements.txt
58 |     ~~~
59 |     
60 |     
61 | 5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_0.4)).
62 | 
63 |     ~~~
64 |     cd $CenterNet_ROOT/src/lib/models/networks/DCNv2
65 |     ./make.sh
66 |     ~~~
67 | 6. [Optional, only required if you are using extremenet or multi-scale testing] Compile NMS if your want to use multi-scale testing or test ExtremeNet.
68 | 
69 |     ~~~
70 |     cd $CenterNet_ROOT/src/lib/external
71 |     make
72 |     ~~~
73 | 
74 | 7. Download pertained models for [detection]() or [pose estimation]() and move them to `$CenterNet_ROOT/models/`. More models can be found in [Model zoo](MODEL_ZOO.md).
75 | 


--------------------------------------------------------------------------------
/readme/det1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/det1.png


--------------------------------------------------------------------------------
/readme/det2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/det2.png


--------------------------------------------------------------------------------
/readme/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/fig2.png


--------------------------------------------------------------------------------
/readme/pose1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/pose1.png


--------------------------------------------------------------------------------
/readme/pose2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/pose2.png


--------------------------------------------------------------------------------
/readme/pose3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/readme/pose3.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | Cython
3 | numba
4 | progress
5 | matplotlib
6 | easydict
7 | scipy
8 | 


--------------------------------------------------------------------------------
/src/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 | 


--------------------------------------------------------------------------------
/src/demo.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import _init_paths
 6 | 
 7 | import os
 8 | import cv2
 9 | 
10 | from opts import opts
11 | from detectors.detector_factory import detector_factory
12 | 
13 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
14 | video_ext = ['mp4', 'mov', 'avi', 'mkv']
15 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
16 | 
17 | def demo(opt):
18 |   os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
19 |   opt.debug = max(opt.debug, 1)
20 |   Detector = detector_factory[opt.task]
21 |   detector = Detector(opt)
22 | 
23 |   if opt.demo == 'webcam' or \
24 |     opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext:
25 |     cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo)
26 |     detector.pause = False
27 |     while True:
28 |         _, img = cam.read()
29 |         cv2.imshow('input', img)
30 |         ret = detector.run(img)
31 |         time_str = ''
32 |         for stat in time_stats:
33 |           time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
34 |         print(time_str)
35 |         if cv2.waitKey(1) == 27:
36 |             return  # esc to quit
37 |   else:
38 |     if os.path.isdir(opt.demo):
39 |       image_names = []
40 |       ls = os.listdir(opt.demo)
41 |       for file_name in sorted(ls):
42 |           ext = file_name[file_name.rfind('.') + 1:].lower()
43 |           if ext in image_ext:
44 |               image_names.append(os.path.join(opt.demo, file_name))
45 |     else:
46 |       image_names = [opt.demo]
47 |     
48 |     for (image_name) in image_names:
49 |       ret = detector.run(image_name)
50 |       time_str = ''
51 |       for stat in time_stats:
52 |         time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
53 |       print(time_str)
54 | if __name__ == '__main__':
55 |   opt = opts().init()
56 |   demo(opt)
57 | 


--------------------------------------------------------------------------------
/src/lib/datasets/dataset/coco.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | from pycocotools.cocoeval import COCOeval
  7 | import numpy as np
  8 | import json
  9 | import os
 10 | 
 11 | import torch.utils.data as data
 12 | 
 13 | class COCO(data.Dataset):
 14 |   num_classes = 80
 15 |   default_resolution = [512, 512]
 16 |   mean = np.array([0.40789654, 0.44719302, 0.47026115],
 17 |                    dtype=np.float32).reshape(1, 1, 3)
 18 |   std  = np.array([0.28863828, 0.27408164, 0.27809835],
 19 |                    dtype=np.float32).reshape(1, 1, 3)
 20 | 
 21 |   def __init__(self, opt, split):
 22 |     super(COCO, self).__init__()
 23 |     self.data_dir = os.path.join(opt.data_dir, 'coco')
 24 |     self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
 25 |     if split == 'test':
 26 |       self.annot_path = os.path.join(
 27 |           self.data_dir, 'annotations', 
 28 |           'image_info_test-dev2017.json').format(split)
 29 |     else:
 30 |       if opt.task == 'exdet':
 31 |         self.annot_path = os.path.join(
 32 |           self.data_dir, 'annotations', 
 33 |           'instances_extreme_{}2017.json').format(split)
 34 |       else:
 35 |         self.annot_path = os.path.join(
 36 |           self.data_dir, 'annotations', 
 37 |           'instances_{}2017.json').format(split)
 38 |     self.max_objs = 128
 39 |     self.class_name = [
 40 |       '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
 41 |       'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
 42 |       'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
 43 |       'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
 44 |       'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
 45 |       'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
 46 |       'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
 47 |       'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 48 |       'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
 49 |       'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
 50 |       'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
 51 |       'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
 52 |       'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 53 |     self._valid_ids = [
 54 |       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 
 55 |       14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 
 56 |       24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 
 57 |       37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 
 58 |       48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 
 59 |       58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 
 60 |       72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 
 61 |       82, 84, 85, 86, 87, 88, 89, 90]
 62 |     self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
 63 |     self.voc_color = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) \
 64 |                       for v in range(1, self.num_classes + 1)]
 65 |     self._data_rng = np.random.RandomState(123)
 66 |     self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
 67 |                              dtype=np.float32)
 68 |     self._eig_vec = np.array([
 69 |         [-0.58752847, -0.69563484, 0.41340352],
 70 |         [-0.5832747, 0.00994535, -0.81221408],
 71 |         [-0.56089297, 0.71832671, 0.41158938]
 72 |     ], dtype=np.float32)
 73 |     # self.mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
 74 |     # self.std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
 75 | 
 76 |     self.split = split
 77 |     self.opt = opt
 78 | 
 79 |     print('==> initializing coco 2017 {} data.'.format(split))
 80 |     self.coco = coco.COCO(self.annot_path)
 81 |     self.images = self.coco.getImgIds()
 82 |     self.num_samples = len(self.images)
 83 | 
 84 |     print('Loaded {} {} samples'.format(split, self.num_samples))
 85 | 
 86 |   def _to_float(self, x):
 87 |     return float("{:.2f}".format(x))
 88 | 
 89 |   def convert_eval_format(self, all_bboxes):
 90 |     # import pdb; pdb.set_trace()
 91 |     detections = []
 92 |     for image_id in all_bboxes:
 93 |       for cls_ind in all_bboxes[image_id]:
 94 |         category_id = self._valid_ids[cls_ind - 1]
 95 |         for bbox in all_bboxes[image_id][cls_ind]:
 96 |           bbox[2] -= bbox[0]
 97 |           bbox[3] -= bbox[1]
 98 |           score = bbox[4]
 99 |           bbox_out  = list(map(self._to_float, bbox[0:4]))
100 | 
101 |           detection = {
102 |               "image_id": int(image_id),
103 |               "category_id": int(category_id),
104 |               "bbox": bbox_out,
105 |               "score": float("{:.2f}".format(score))
106 |           }
107 |           if len(bbox) > 5:
108 |               extreme_points = list(map(self._to_float, bbox[5:13]))
109 |               detection["extreme_points"] = extreme_points
110 |           detections.append(detection)
111 |     return detections
112 | 
113 |   def __len__(self):
114 |     return self.num_samples
115 | 
116 |   def save_results(self, results, save_dir):
117 |     json.dump(self.convert_eval_format(results), 
118 |                 open('{}/results.json'.format(save_dir), 'w'))
119 |   
120 |   def run_eval(self, results, save_dir):
121 |     # result_json = os.path.join(save_dir, "results.json")
122 |     # detections  = self.convert_eval_format(results)
123 |     # json.dump(detections, open(result_json, "w"))
124 |     self.save_results(results, save_dir)
125 |     coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
126 |     coco_eval = COCOeval(self.coco, coco_dets, "bbox")
127 |     coco_eval.evaluate()
128 |     coco_eval.accumulate()
129 |     coco_eval.summarize()
130 | 


--------------------------------------------------------------------------------
/src/lib/datasets/dataset/coco_hp.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | from pycocotools.cocoeval import COCOeval
  7 | import numpy as np
  8 | import json
  9 | import os
 10 | 
 11 | import torch.utils.data as data
 12 | 
 13 | class COCOHP(data.Dataset):
 14 |   num_classes = 1
 15 |   num_joints = 17
 16 |   default_resolution = [512, 512]
 17 |   mean = np.array([0.40789654, 0.44719302, 0.47026115],
 18 |                    dtype=np.float32).reshape(1, 1, 3)
 19 |   std  = np.array([0.28863828, 0.27408164, 0.27809835],
 20 |                    dtype=np.float32).reshape(1, 1, 3)
 21 |   flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 
 22 |               [11, 12], [13, 14], [15, 16]]
 23 |   def __init__(self, opt, split):
 24 |     super(COCOHP, self).__init__()
 25 |     self.edges = [[0, 1], [0, 2], [1, 3], [2, 4], 
 26 |                   [4, 6], [3, 5], [5, 6], 
 27 |                   [5, 7], [7, 9], [6, 8], [8, 10], 
 28 |                   [6, 12], [5, 11], [11, 12], 
 29 |                   [12, 14], [14, 16], [11, 13], [13, 15]]
 30 |     
 31 |     self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
 32 |     self.data_dir = os.path.join(opt.data_dir, 'coco')
 33 |     self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
 34 |     if split == 'test':
 35 |       self.annot_path = os.path.join(
 36 |           self.data_dir, 'annotations', 
 37 |           'image_info_test-dev2017.json').format(split)
 38 |     else:
 39 |       self.annot_path = os.path.join(
 40 |         self.data_dir, 'annotations', 
 41 |         'person_keypoints_{}2017.json').format(split)
 42 |     self.max_objs = 32
 43 |     self._data_rng = np.random.RandomState(123)
 44 |     self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
 45 |                              dtype=np.float32)
 46 |     self._eig_vec = np.array([
 47 |         [-0.58752847, -0.69563484, 0.41340352],
 48 |         [-0.5832747, 0.00994535, -0.81221408],
 49 |         [-0.56089297, 0.71832671, 0.41158938]
 50 |     ], dtype=np.float32)
 51 |     self.split = split
 52 |     self.opt = opt
 53 | 
 54 |     print('==> initializing coco 2017 {} data.'.format(split))
 55 |     self.coco = coco.COCO(self.annot_path)
 56 |     image_ids = self.coco.getImgIds()
 57 | 
 58 |     if split == 'train':
 59 |       self.images = []
 60 |       for img_id in image_ids:
 61 |         idxs = self.coco.getAnnIds(imgIds=[img_id])
 62 |         if len(idxs) > 0:
 63 |           self.images.append(img_id)
 64 |     else:
 65 |       self.images = image_ids
 66 |     self.num_samples = len(self.images)
 67 |     print('Loaded {} {} samples'.format(split, self.num_samples))
 68 | 
 69 |   def _to_float(self, x):
 70 |     return float("{:.2f}".format(x))
 71 | 
 72 |   def convert_eval_format(self, all_bboxes):
 73 |     # import pdb; pdb.set_trace()
 74 |     detections = []
 75 |     for image_id in all_bboxes:
 76 |       for cls_ind in all_bboxes[image_id]:
 77 |         category_id = 1
 78 |         for dets in all_bboxes[image_id][cls_ind]:
 79 |           bbox = dets[:4]
 80 |           bbox[2] -= bbox[0]
 81 |           bbox[3] -= bbox[1]
 82 |           score = dets[4]
 83 |           bbox_out  = list(map(self._to_float, bbox))
 84 |           keypoints = np.concatenate([
 85 |             np.array(dets[5:39], dtype=np.float32).reshape(-1, 2), 
 86 |             np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
 87 |           keypoints  = list(map(self._to_float, keypoints))
 88 | 
 89 |           detection = {
 90 |               "image_id": int(image_id),
 91 |               "category_id": int(category_id),
 92 |               "bbox": bbox_out,
 93 |               "score": float("{:.2f}".format(score)),
 94 |               "keypoints": keypoints
 95 |           }
 96 |           detections.append(detection)
 97 |     return detections
 98 | 
 99 |   def __len__(self):
100 |     return self.num_samples
101 | 
102 |   def save_results(self, results, save_dir):
103 |     json.dump(self.convert_eval_format(results), 
104 |               open('{}/results.json'.format(save_dir), 'w'))
105 | 
106 | 
107 |   def run_eval(self, results, save_dir):
108 |     # result_json = os.path.join(opt.save_dir, "results.json")
109 |     # detections  = convert_eval_format(all_boxes)
110 |     # json.dump(detections, open(result_json, "w"))
111 |     self.save_results(results, save_dir)
112 |     coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
113 |     coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
114 |     coco_eval.evaluate()
115 |     coco_eval.accumulate()
116 |     coco_eval.summarize()
117 |     coco_eval = COCOeval(self.coco, coco_dets, "bbox")
118 |     coco_eval.evaluate()
119 |     coco_eval.accumulate()
120 |     coco_eval.summarize()


--------------------------------------------------------------------------------
/src/lib/datasets/dataset/kitti.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch.utils.data as data
 6 | import pycocotools.coco as coco
 7 | import numpy as np
 8 | import torch
 9 | import json
10 | import cv2
11 | import os
12 | import math
13 | 
14 | import torch.utils.data as data
15 | 
16 | 
17 | class KITTI(data.Dataset):
18 |   num_classes = 3
19 |   default_resolution = [384, 1280]
20 |   mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
21 |   std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
22 | 
23 |   def __init__(self, opt, split):
24 |     super(KITTI, self).__init__()
25 |     self.data_dir = os.path.join(opt.data_dir, 'kitti')
26 |     self.img_dir = os.path.join(self.data_dir, 'images', 'trainval')
27 |     if opt.trainval:
28 |       split = 'trainval' if split == 'train' else 'test'
29 |       self.img_dir = os.path.join(self.data_dir, 'images', split)
30 |       self.annot_path = os.path.join(
31 |         self.data_dir, 'annotations', 'kitti_{}.json').format(split)
32 |     else:
33 |       self.annot_path = os.path.join(self.data_dir, 
34 |         'annotations', 'kitti_{}_{}.json').format(opt.kitti_split, split)
35 |     self.max_objs = 50
36 |     self.class_name = [
37 |       '__background__', 'Pedestrian', 'Car', 'Cyclist']
38 |     self.cat_ids = {1:0, 2:1, 3:2, 4:-3, 5:-3, 6:-2, 7:-99, 8:-99, 9:-1}
39 |     
40 |     self._data_rng = np.random.RandomState(123)
41 |     self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
42 |                              dtype=np.float32)
43 |     self._eig_vec = np.array([
44 |         [-0.58752847, -0.69563484, 0.41340352],
45 |         [-0.5832747, 0.00994535, -0.81221408],
46 |         [-0.56089297, 0.71832671, 0.41158938]
47 |     ], dtype=np.float32)
48 |     self.split = split
49 |     self.opt = opt
50 |     self.alpha_in_degree = False
51 | 
52 |     print('==> initializing kitti {}, {} data.'.format(opt.kitti_split, split))
53 |     self.coco = coco.COCO(self.annot_path)
54 |     self.images = self.coco.getImgIds()
55 |     self.num_samples = len(self.images)
56 | 
57 |     print('Loaded {} {} samples'.format(split, self.num_samples))
58 | 
59 |   def __len__(self):
60 |     return self.num_samples
61 | 
62 |   def _to_float(self, x):
63 |     return float("{:.2f}".format(x))
64 | 
65 |   def convert_eval_format(self, all_bboxes):
66 |     pass
67 | 
68 |   def save_results(self, results, save_dir):
69 |     results_dir = os.path.join(save_dir, 'results')
70 |     if not os.path.exists(results_dir):
71 |       os.mkdir(results_dir)
72 |     for img_id in results.keys():
73 |       out_path = os.path.join(results_dir, '{:06d}.txt'.format(img_id))
74 |       f = open(out_path, 'w')
75 |       for cls_ind in results[img_id]:
76 |         for j in range(len(results[img_id][cls_ind])):
77 |           class_name = self.class_name[cls_ind]
78 |           f.write('{} 0.0 0'.format(class_name))
79 |           for i in range(len(results[img_id][cls_ind][j])):
80 |             f.write(' {:.2f}'.format(results[img_id][cls_ind][j][i]))
81 |           f.write('\n')
82 |       f.close()
83 | 
84 |   def run_eval(self, results, save_dir):
85 |     self.save_results(results, save_dir)
86 |     os.system('./tools/kitti_eval/evaluate_object_3d_offline ' + \
87 |               '../data/kitti/training/label_val ' + \
88 |               '{}/results/'.format(save_dir))
89 |     
90 | 


--------------------------------------------------------------------------------
/src/lib/datasets/dataset/pascal.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | import numpy as np
 7 | import torch
 8 | import json
 9 | import os
10 | 
11 | import torch.utils.data as data
12 | 
13 | class PascalVOC(data.Dataset):
14 |   num_classes = 20
15 |   default_resolution = [384, 384]
16 |   mean = np.array([0.485, 0.456, 0.406],
17 |                    dtype=np.float32).reshape(1, 1, 3)
18 |   std  = np.array([0.229, 0.224, 0.225],
19 |                    dtype=np.float32).reshape(1, 1, 3)
20 |   
21 |   def __init__(self, opt, split):
22 |     super(PascalVOC, self).__init__()
23 |     self.data_dir = os.path.join(opt.data_dir, 'voc')
24 |     self.img_dir = os.path.join(self.data_dir, 'images')
25 |     _ann_name = {'train': 'trainval0712', 'val': 'test2007'}
26 |     self.annot_path = os.path.join(
27 |       self.data_dir, 'annotations', 
28 |       'pascal_{}.json').format(_ann_name[split])
29 |     self.max_objs = 50
30 |     self.class_name = ['__background__', "aeroplane", "bicycle", "bird", "boat",
31 |      "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", 
32 |      "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", 
33 |      "train", "tvmonitor"]
34 |     self._valid_ids = np.arange(1, 21, dtype=np.int32)
35 |     self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
36 |     self._data_rng = np.random.RandomState(123)
37 |     self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
38 |                              dtype=np.float32)
39 |     self._eig_vec = np.array([
40 |         [-0.58752847, -0.69563484, 0.41340352],
41 |         [-0.5832747, 0.00994535, -0.81221408],
42 |         [-0.56089297, 0.71832671, 0.41158938]
43 |     ], dtype=np.float32)
44 |     self.split = split
45 |     self.opt = opt
46 | 
47 |     print('==> initializing pascal {} data.'.format(_ann_name[split]))
48 |     self.coco = coco.COCO(self.annot_path)
49 |     self.images = sorted(self.coco.getImgIds())
50 |     self.num_samples = len(self.images)
51 | 
52 |     print('Loaded {} {} samples'.format(split, self.num_samples))
53 | 
54 |   def _to_float(self, x):
55 |     return float("{:.2f}".format(x))
56 | 
57 |   def convert_eval_format(self, all_bboxes):
58 |     detections = [[[] for __ in range(self.num_samples)] \
59 |                   for _ in range(self.num_classes + 1)]
60 |     for i in range(self.num_samples):
61 |       img_id = self.images[i]
62 |       for j in range(1, self.num_classes + 1):
63 |         if isinstance(all_bboxes[img_id][j], np.ndarray):
64 |           detections[j][i] = all_bboxes[img_id][j].tolist()
65 |         else:
66 |           detections[j][i] = all_bboxes[img_id][j]
67 |     return detections
68 | 
69 |   def __len__(self):
70 |     return self.num_samples
71 | 
72 |   def save_results(self, results, save_dir):
73 |     json.dump(self.convert_eval_format(results), 
74 |               open('{}/results.json'.format(save_dir), 'w'))
75 | 
76 |   def run_eval(self, results, save_dir):
77 |     # result_json = os.path.join(save_dir, "results.json")
78 |     # detections  = self.convert_eval_format(results)
79 |     # json.dump(detections, open(result_json, "w"))
80 |     self.save_results(results, save_dir)
81 |     os.system('python tools/reval.py ' + \
82 |               '{}/results.json'.format(save_dir))
83 | 


--------------------------------------------------------------------------------
/src/lib/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from .sample.ddd import DddDataset
 6 | from .sample.exdet import EXDetDataset
 7 | from .sample.ctdet import CTDetDataset
 8 | from .sample.multi_pose import MultiPoseDataset
 9 | 
10 | from .dataset.coco import COCO
11 | from .dataset.pascal import PascalVOC
12 | from .dataset.kitti import KITTI
13 | from .dataset.coco_hp import COCOHP
14 | 
15 | 
16 | dataset_factory = {
17 |   'coco': COCO,
18 |   'pascal': PascalVOC,
19 |   'kitti': KITTI,
20 |   'coco_hp': COCOHP
21 | }
22 | 
23 | _sample_factory = {
24 |   'exdet': EXDetDataset,
25 |   'ctdet': CTDetDataset,
26 |   'ddd': DddDataset,
27 |   'multi_pose': MultiPoseDataset
28 | }
29 | 
30 | 
31 | def get_dataset(dataset, task):
32 |   class Dataset(dataset_factory[dataset], _sample_factory[task]):
33 |     pass
34 |   return Dataset
35 |   
36 | 


--------------------------------------------------------------------------------
/src/lib/detectors/base_detector.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from progress.bar import Bar
  8 | import time
  9 | import torch
 10 | 
 11 | from models.model import create_model, load_model
 12 | from utils.image import get_affine_transform
 13 | from utils.debugger import Debugger
 14 | 
 15 | 
 16 | class BaseDetector(object):
 17 |   def __init__(self, opt):
 18 |     if opt.gpus[0] >= 0:
 19 |       opt.device = torch.device('cuda')
 20 |     else:
 21 |       opt.device = torch.device('cpu')
 22 |     
 23 |     print('Creating model...')
 24 |     self.model = create_model(opt.arch, opt.heads, opt.head_conv)
 25 |     self.model = load_model(self.model, opt.load_model)
 26 |     self.model = self.model.to(opt.device)
 27 |     self.model.eval()
 28 | 
 29 |     self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3)
 30 |     self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3)
 31 |     self.max_per_image = 100
 32 |     self.num_classes = opt.num_classes
 33 |     self.scales = opt.test_scales
 34 |     self.opt = opt
 35 |     self.pause = True
 36 | 
 37 |   def pre_process(self, image, scale, meta=None):
 38 |     height, width = image.shape[0:2]
 39 |     new_height = int(height * scale)
 40 |     new_width  = int(width * scale)
 41 |     if self.opt.fix_res:
 42 |       inp_height, inp_width = self.opt.input_h, self.opt.input_w
 43 |       c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
 44 |       s = max(height, width) * 1.0
 45 |     else:
 46 |       inp_height = (new_height | self.opt.pad) + 1
 47 |       inp_width = (new_width | self.opt.pad) + 1
 48 |       c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
 49 |       s = np.array([inp_width, inp_height], dtype=np.float32)
 50 | 
 51 |     trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
 52 |     resized_image = cv2.resize(image, (new_width, new_height))
 53 |     inp_image = cv2.warpAffine(
 54 |       resized_image, trans_input, (inp_width, inp_height),
 55 |       flags=cv2.INTER_LINEAR)
 56 |     inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32)
 57 | 
 58 |     images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width)
 59 |     if self.opt.flip_test:
 60 |       images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
 61 |     images = torch.from_numpy(images)
 62 |     meta = {'c': c, 's': s, 
 63 |             'out_height': inp_height // self.opt.down_ratio, 
 64 |             'out_width': inp_width // self.opt.down_ratio}
 65 |     return images, meta
 66 | 
 67 |   def process(self, images, return_time=False):
 68 |     raise NotImplementedError
 69 | 
 70 |   def post_process(self, dets, meta, scale=1):
 71 |     raise NotImplementedError
 72 | 
 73 |   def merge_outputs(self, detections):
 74 |     raise NotImplementedError
 75 | 
 76 |   def debug(self, debugger, images, dets, output, scale=1):
 77 |     raise NotImplementedError
 78 | 
 79 |   def show_results(self, debugger, image, results):
 80 |    raise NotImplementedError
 81 | 
 82 |   def run(self, image_or_path_or_tensor, meta=None):
 83 |     load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
 84 |     merge_time, tot_time = 0, 0
 85 |     debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3),
 86 |                         theme=self.opt.debugger_theme)
 87 |     start_time = time.time()
 88 |     pre_processed = False
 89 |     if isinstance(image_or_path_or_tensor, np.ndarray):
 90 |       image = image_or_path_or_tensor
 91 |     elif type(image_or_path_or_tensor) == type (''): 
 92 |       image = cv2.imread(image_or_path_or_tensor)
 93 |     else:
 94 |       image = image_or_path_or_tensor['image'][0].numpy()
 95 |       pre_processed_images = image_or_path_or_tensor
 96 |       pre_processed = True
 97 |     
 98 |     loaded_time = time.time()
 99 |     load_time += (loaded_time - start_time)
100 |     
101 |     detections = []
102 |     for scale in self.scales:
103 |       scale_start_time = time.time()
104 |       if not pre_processed:
105 |         images, meta = self.pre_process(image, scale, meta)
106 |       else:
107 |         # import pdb; pdb.set_trace()
108 |         images = pre_processed_images['images'][scale][0]
109 |         meta = pre_processed_images['meta'][scale]
110 |         meta = {k: v.numpy()[0] for k, v in meta.items()}
111 |       images = images.to(self.opt.device)
112 |       torch.cuda.synchronize()
113 |       pre_process_time = time.time()
114 |       pre_time += pre_process_time - scale_start_time
115 |       
116 |       output, dets, forward_time = self.process(images, return_time=True)
117 | 
118 |       torch.cuda.synchronize()
119 |       net_time += forward_time - pre_process_time
120 |       decode_time = time.time()
121 |       dec_time += decode_time - forward_time
122 |       
123 |       if self.opt.debug >= 2:
124 |         self.debug(debugger, images, dets, output, scale)
125 |       
126 |       dets = self.post_process(dets, meta, scale)
127 |       torch.cuda.synchronize()
128 |       post_process_time = time.time()
129 |       post_time += post_process_time - decode_time
130 | 
131 |       detections.append(dets)
132 |     
133 |     results = self.merge_outputs(detections)
134 |     torch.cuda.synchronize()
135 |     end_time = time.time()
136 |     merge_time += end_time - post_process_time
137 |     tot_time += end_time - start_time
138 | 
139 |     if self.opt.debug >= 1:
140 |       self.show_results(debugger, image, results)
141 |     
142 |     return {'results': results, 'tot': tot_time, 'load': load_time,
143 |             'pre': pre_time, 'net': net_time, 'dec': dec_time,
144 |             'post': post_time, 'merge': merge_time}


--------------------------------------------------------------------------------
/src/lib/detectors/ctdet.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import cv2
 6 | import numpy as np
 7 | from progress.bar import Bar
 8 | import time
 9 | import torch
10 | 
11 | try:
12 |   from external.nms import soft_nms
13 | except:
14 |   print('NMS not imported! If you need it,'
15 |         ' do \n cd $CenterNet_ROOT/src/lib/external \n make')
16 | from models.decode import ctdet_decode
17 | from models.utils import flip_tensor
18 | from utils.image import get_affine_transform
19 | from utils.post_process import ctdet_post_process
20 | from utils.debugger import Debugger
21 | 
22 | from .base_detector import BaseDetector
23 | 
24 | class CtdetDetector(BaseDetector):
25 |   def __init__(self, opt):
26 |     super(CtdetDetector, self).__init__(opt)
27 |   
28 |   def process(self, images, return_time=False):
29 |     with torch.no_grad():
30 |       output = self.model(images)[-1]
31 |       hm = output['hm'].sigmoid_()
32 |       wh = output['wh']
33 |       reg = output['reg'] if self.opt.reg_offset else None
34 |       if self.opt.flip_test:
35 |         hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2
36 |         wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2
37 |         reg = reg[0:1] if reg is not None else None
38 |       torch.cuda.synchronize()
39 |       forward_time = time.time()
40 |       dets = ctdet_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)
41 |       
42 |     if return_time:
43 |       return output, dets, forward_time
44 |     else:
45 |       return output, dets
46 | 
47 |   def post_process(self, dets, meta, scale=1):
48 |     dets = dets.detach().cpu().numpy()
49 |     dets = dets.reshape(1, -1, dets.shape[2])
50 |     dets = ctdet_post_process(
51 |         dets.copy(), [meta['c']], [meta['s']],
52 |         meta['out_height'], meta['out_width'], self.opt.num_classes)
53 |     for j in range(1, self.num_classes + 1):
54 |       dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5)
55 |       dets[0][j][:, :4] /= scale
56 |     return dets[0]
57 | 
58 |   def merge_outputs(self, detections):
59 |     results = {}
60 |     for j in range(1, self.num_classes + 1):
61 |       results[j] = np.concatenate(
62 |         [detection[j] for detection in detections], axis=0).astype(np.float32)
63 |       if len(self.scales) > 1 or self.opt.nms:
64 |          soft_nms(results[j], Nt=0.5, method=2)
65 |     scores = np.hstack(
66 |       [results[j][:, 4] for j in range(1, self.num_classes + 1)])
67 |     if len(scores) > self.max_per_image:
68 |       kth = len(scores) - self.max_per_image
69 |       thresh = np.partition(scores, kth)[kth]
70 |       for j in range(1, self.num_classes + 1):
71 |         keep_inds = (results[j][:, 4] >= thresh)
72 |         results[j] = results[j][keep_inds]
73 |     return results
74 | 
75 |   def debug(self, debugger, images, dets, output, scale=1):
76 |     detection = dets.detach().cpu().numpy().copy()
77 |     detection[:, :, :4] *= self.opt.down_ratio
78 |     for i in range(1):
79 |       img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
80 |       img = ((img * self.std + self.mean) * 255).astype(np.uint8)
81 |       pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
82 |       debugger.add_blend_img(img, pred, 'pred_hm_{:.1f}'.format(scale))
83 |       debugger.add_img(img, img_id='out_pred_{:.1f}'.format(scale))
84 |       for k in range(len(dets[i])):
85 |         if detection[i, k, 4] > self.opt.center_thresh:
86 |           debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
87 |                                  detection[i, k, 4], 
88 |                                  img_id='out_pred_{:.1f}'.format(scale))
89 | 
90 |   def show_results(self, debugger, image, results):
91 |     debugger.add_img(image, img_id='ctdet')
92 |     for j in range(1, self.num_classes + 1):
93 |       for bbox in results[j]:
94 |         if bbox[4] > self.opt.vis_thresh:
95 |           debugger.add_coco_bbox(bbox[:4], j - 1, bbox[4], img_id='ctdet')
96 |     debugger.show_all_imgs(pause=self.pause)
97 | 


--------------------------------------------------------------------------------
/src/lib/detectors/ddd.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from progress.bar import Bar
  8 | import time
  9 | import torch
 10 | 
 11 | 
 12 | from models.decode import ddd_decode
 13 | from models.utils import flip_tensor
 14 | from utils.image import get_affine_transform
 15 | from utils.post_process import ddd_post_process
 16 | from utils.debugger import Debugger
 17 | from utils.ddd_utils import compute_box_3d, project_to_image, alpha2rot_y
 18 | from utils.ddd_utils import draw_box_3d, unproject_2d_to_3d
 19 | 
 20 | from .base_detector import BaseDetector
 21 | 
 22 | class DddDetector(BaseDetector):
 23 |   def __init__(self, opt):
 24 |     super(DddDetector, self).__init__(opt)
 25 |     self.calib = np.array([[707.0493, 0, 604.0814, 45.75831],
 26 |                            [0, 707.0493, 180.5066, -0.3454157],
 27 |                            [0, 0, 1., 0.004981016]], dtype=np.float32)
 28 | 
 29 | 
 30 |   def pre_process(self, image, scale, calib=None):
 31 |     height, width = image.shape[0:2]
 32 |     
 33 |     inp_height, inp_width = self.opt.input_h, self.opt.input_w
 34 |     c = np.array([width / 2, height / 2], dtype=np.float32)
 35 |     if self.opt.keep_res:
 36 |       s = np.array([inp_width, inp_height], dtype=np.int32)
 37 |     else:
 38 |       s = np.array([width, height], dtype=np.int32)
 39 | 
 40 |     trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
 41 |     resized_image = image #cv2.resize(image, (width, height))
 42 |     inp_image = cv2.warpAffine(
 43 |       resized_image, trans_input, (inp_width, inp_height),
 44 |       flags=cv2.INTER_LINEAR)
 45 |     inp_image = (inp_image.astype(np.float32) / 255.)
 46 |     inp_image = (inp_image - self.mean) / self.std
 47 |     images = inp_image.transpose(2, 0, 1)[np.newaxis, ...]
 48 |     calib = np.array(calib, dtype=np.float32) if calib is not None \
 49 |             else self.calib
 50 |     images = torch.from_numpy(images)
 51 |     meta = {'c': c, 's': s, 
 52 |             'out_height': inp_height // self.opt.down_ratio, 
 53 |             'out_width': inp_width // self.opt.down_ratio,
 54 |             'calib': calib}
 55 |     return images, meta
 56 |   
 57 |   def process(self, images, return_time=False):
 58 |     with torch.no_grad():
 59 |       torch.cuda.synchronize()
 60 |       output = self.model(images)[-1]
 61 |       output['hm'] = output['hm'].sigmoid_()
 62 |       output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1.
 63 |       wh = output['wh'] if self.opt.reg_bbox else None
 64 |       reg = output['reg'] if self.opt.reg_offset else None
 65 |       torch.cuda.synchronize()
 66 |       forward_time = time.time()
 67 |       
 68 |       dets = ddd_decode(output['hm'], output['rot'], output['dep'],
 69 |                           output['dim'], wh=wh, reg=reg, K=self.opt.K)
 70 |     if return_time:
 71 |       return output, dets, forward_time
 72 |     else:
 73 |       return output, dets
 74 | 
 75 |   def post_process(self, dets, meta, scale=1):
 76 |     dets = dets.detach().cpu().numpy()
 77 |     detections = ddd_post_process(
 78 |       dets.copy(), [meta['c']], [meta['s']], [meta['calib']], self.opt)
 79 |     self.this_calib = meta['calib']
 80 |     return detections[0]
 81 | 
 82 |   def merge_outputs(self, detections):
 83 |     results = detections[0]
 84 |     for j in range(1, self.num_classes + 1):
 85 |       if len(results[j] > 0):
 86 |         keep_inds = (results[j][:, -1] > self.opt.peak_thresh)
 87 |         results[j] = results[j][keep_inds]
 88 |     return results
 89 | 
 90 |   def debug(self, debugger, images, dets, output, scale=1):
 91 |     dets = dets.detach().cpu().numpy()
 92 |     img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
 93 |     img = ((img * self.std + self.mean) * 255).astype(np.uint8)
 94 |     pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
 95 |     debugger.add_blend_img(img, pred, 'pred_hm')
 96 |     debugger.add_ct_detection(
 97 |       img, dets[0], show_box=self.opt.reg_bbox, 
 98 |       center_thresh=self.opt.vis_thresh, img_id='det_pred')
 99 |   
100 |   def show_results(self, debugger, image, results):
101 |     debugger.add_3d_detection(
102 |       image, results, self.this_calib,
103 |       center_thresh=self.opt.vis_thresh, img_id='add_pred')
104 |     debugger.add_bird_view(
105 |       results, center_thresh=self.opt.vis_thresh, img_id='bird_pred')
106 |     debugger.show_all_imgs(pause=self.pause)


--------------------------------------------------------------------------------
/src/lib/detectors/detector_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from .exdet import ExdetDetector
 6 | from .ddd import DddDetector
 7 | from .ctdet import CtdetDetector
 8 | from .multi_pose import MultiPoseDetector
 9 | 
10 | detector_factory = {
11 |   'exdet': ExdetDetector, 
12 |   'ddd': DddDetector,
13 |   'ctdet': CtdetDetector,
14 |   'multi_pose': MultiPoseDetector, 
15 | }
16 | 


--------------------------------------------------------------------------------
/src/lib/detectors/exdet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import _init_paths
  6 | 
  7 | import os
  8 | 
  9 | import cv2
 10 | import numpy as np
 11 | from progress.bar import Bar
 12 | import time
 13 | import torch
 14 | 
 15 | from models.decode import exct_decode, agnex_ct_decode
 16 | from models.utils import flip_tensor
 17 | from utils.image import get_affine_transform, transform_preds
 18 | from utils.post_process import ctdet_post_process
 19 | from utils.debugger import Debugger
 20 | 
 21 | from .base_detector import BaseDetector
 22 | 
 23 | class ExdetDetector(BaseDetector):
 24 |   def __init__(self, opt):
 25 |     super(ExdetDetector, self).__init__(opt)
 26 |     self.decode = agnex_ct_decode if opt.agnostic_ex else exct_decode
 27 | 
 28 |   def process(self, images, return_time=False):
 29 |     with torch.no_grad():
 30 |       torch.cuda.synchronize()
 31 |       output = self.model(images)[-1]
 32 |       t_heat = output['hm_t'].sigmoid_()
 33 |       l_heat = output['hm_l'].sigmoid_()
 34 |       b_heat = output['hm_b'].sigmoid_()
 35 |       r_heat = output['hm_r'].sigmoid_()
 36 |       c_heat = output['hm_c'].sigmoid_()
 37 |       torch.cuda.synchronize()
 38 |       forward_time = time.time()
 39 |       if self.opt.reg_offset:
 40 |         dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat, 
 41 |                       output['reg_t'], output['reg_l'],
 42 |                       output['reg_b'], output['reg_r'], 
 43 |                       K=self.opt.K,
 44 |                       scores_thresh=self.opt.scores_thresh,
 45 |                       center_thresh=self.opt.center_thresh,
 46 |                       aggr_weight=self.opt.aggr_weight)
 47 |       else:
 48 |         dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat, K=self.opt.K,
 49 |                       scores_thresh=self.opt.scores_thresh,
 50 |                       center_thresh=self.opt.center_thresh,
 51 |                       aggr_weight=self.opt.aggr_weight)
 52 |     if return_time:
 53 |       return output, dets, forward_time
 54 |     else:
 55 |       return output, dets
 56 | 
 57 |   def debug(self, debugger, images, dets, output, scale=1):
 58 |     detection = dets.detach().cpu().numpy().copy()
 59 |     detection[:, :, :4] *= self.opt.down_ratio
 60 |     for i in range(1):
 61 |       inp_height, inp_width = images.shape[2], images.shape[3]
 62 |       pred_hm = np.zeros((inp_height, inp_width, 3), dtype=np.uint8)
 63 |       img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
 64 |       img = ((img * self.std + self.mean) * 255).astype(np.uint8)
 65 |       parts = ['t', 'l', 'b', 'r', 'c']
 66 |       for p in parts:
 67 |         tag = 'hm_{}'.format(p)
 68 |         pred = debugger.gen_colormap(
 69 |           output[tag][i].detach().cpu().numpy(), (inp_height, inp_width))
 70 |         if p != 'c':
 71 |           pred_hm = np.maximum(pred_hm, pred)
 72 |         else:
 73 |           debugger.add_blend_img(
 74 |             img, pred, 'pred_{}_{:.1f}'.format(p, scale))
 75 |       debugger.add_blend_img(img, pred_hm, 'pred_{:.1f}'.format(scale))
 76 |       debugger.add_img(img, img_id='out_{:.1f}'.format(scale))
 77 |       for k in range(len(detection[i])):
 78 |         # print('detection', detection[i, k, 4], detection[i, k])
 79 |         if detection[i, k, 4] > 0.01:
 80 |           # print('detection', detection[i, k, 4], detection[i, k])
 81 |           debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
 82 |                                  detection[i, k, 4], 
 83 |                                  img_id='out_{:.1f}'.format(scale))
 84 | 
 85 |   def post_process(self, dets, meta, scale=1):
 86 |     out_width, out_height = meta['out_width'], meta['out_height']
 87 |     dets = dets.detach().cpu().numpy().reshape(2, -1, 14)
 88 |     dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
 89 |     dets = dets.reshape(1, -1, 14)
 90 |     dets[0, :, 0:2] = transform_preds(
 91 |       dets[0, :, 0:2], meta['c'], meta['s'], (out_width, out_height))
 92 |     dets[0, :, 2:4] = transform_preds(
 93 |       dets[0, :, 2:4], meta['c'], meta['s'], (out_width, out_height))
 94 |     dets[:, :, 0:4] /= scale
 95 |     return dets[0]
 96 | 
 97 |   def merge_outputs(self, detections):
 98 |     detections = np.concatenate(
 99 |         [detection for detection in detections], axis=0).astype(np.float32)
100 |     classes = detections[..., -1]
101 |     keep_inds = (detections[:, 4] > 0)
102 |     detections = detections[keep_inds]
103 |     classes = classes[keep_inds]
104 | 
105 |     results = {}
106 |     for j in range(self.num_classes):
107 |       keep_inds = (classes == j)
108 |       results[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
109 |       soft_nms(results[j + 1], Nt=0.5, method=2)
110 |       results[j + 1] = results[j + 1][:, 0:5]
111 | 
112 |     scores = np.hstack([
113 |       results[j][:, -1] 
114 |       for j in range(1, self.num_classes + 1)
115 |     ])
116 |     if len(scores) > self.max_per_image:
117 |       kth = len(scores) - self.max_per_image
118 |       thresh = np.partition(scores, kth)[kth]
119 |       for j in range(1, self.num_classes + 1):
120 |         keep_inds = (results[j][:, -1] >= thresh)
121 |         results[j] = results[j][keep_inds]
122 |     return results
123 | 
124 | 
125 |   def show_results(self, debugger, image, results):
126 |     debugger.add_img(image, img_id='exdet')
127 |     for j in range(1, self.num_classes + 1):
128 |       for bbox in results[j]:
129 |         if bbox[4] > self.opt.vis_thresh:
130 |           debugger.add_coco_bbox(bbox[:4], j - 1, bbox[4], img_id='exdet')
131 |     debugger.show_all_imgs(pause=self.pause)
132 | 


--------------------------------------------------------------------------------
/src/lib/detectors/multi_pose.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from progress.bar import Bar
  8 | import time
  9 | import torch
 10 | 
 11 | try:
 12 |   from external.nms import soft_nms_39
 13 | except:
 14 |   print('NMS not imported! If you need it,'
 15 |         ' do \n cd $CenterNet_ROOT/src/lib/external \n make')
 16 | from models.decode import multi_pose_decode
 17 | from models.utils import flip_tensor, flip_lr_off, flip_lr
 18 | from utils.image import get_affine_transform
 19 | from utils.post_process import multi_pose_post_process
 20 | from utils.debugger import Debugger
 21 | 
 22 | from .base_detector import BaseDetector
 23 | 
 24 | class MultiPoseDetector(BaseDetector):
 25 |   def __init__(self, opt):
 26 |     super(MultiPoseDetector, self).__init__(opt)
 27 |     self.flip_idx = opt.flip_idx
 28 | 
 29 |   def process(self, images, return_time=False):
 30 |     with torch.no_grad():
 31 |       torch.cuda.synchronize()
 32 |       output = self.model(images)[-1]
 33 |       output['hm'] = output['hm'].sigmoid_()
 34 |       if self.opt.hm_hp and not self.opt.mse_loss:
 35 |         output['hm_hp'] = output['hm_hp'].sigmoid_()
 36 | 
 37 |       reg = output['reg'] if self.opt.reg_offset else None
 38 |       hm_hp = output['hm_hp'] if self.opt.hm_hp else None
 39 |       hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None
 40 |       torch.cuda.synchronize()
 41 |       forward_time = time.time()
 42 |       
 43 |       if self.opt.flip_test:
 44 |         output['hm'] = (output['hm'][0:1] + flip_tensor(output['hm'][1:2])) / 2
 45 |         output['wh'] = (output['wh'][0:1] + flip_tensor(output['wh'][1:2])) / 2
 46 |         output['hps'] = (output['hps'][0:1] + 
 47 |           flip_lr_off(output['hps'][1:2], self.flip_idx)) / 2
 48 |         hm_hp = (hm_hp[0:1] + flip_lr(hm_hp[1:2], self.flip_idx)) / 2 \
 49 |                 if hm_hp is not None else None
 50 |         reg = reg[0:1] if reg is not None else None
 51 |         hp_offset = hp_offset[0:1] if hp_offset is not None else None
 52 |       
 53 |       dets = multi_pose_decode(
 54 |         output['hm'], output['wh'], output['hps'],
 55 |         reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K)
 56 | 
 57 |     if return_time:
 58 |       return output, dets, forward_time
 59 |     else:
 60 |       return output, dets
 61 | 
 62 |   def post_process(self, dets, meta, scale=1):
 63 |     dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
 64 |     dets = multi_pose_post_process(
 65 |       dets.copy(), [meta['c']], [meta['s']],
 66 |       meta['out_height'], meta['out_width'])
 67 |     for j in range(1, self.num_classes + 1):
 68 |       dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 39)
 69 |       # import pdb; pdb.set_trace()
 70 |       dets[0][j][:, :4] /= scale
 71 |       dets[0][j][:, 5:] /= scale
 72 |     return dets[0]
 73 | 
 74 |   def merge_outputs(self, detections):
 75 |     results = {}
 76 |     results[1] = np.concatenate(
 77 |         [detection[1] for detection in detections], axis=0).astype(np.float32)
 78 |     if self.opt.nms or len(self.opt.test_scales) > 1:
 79 |       soft_nms_39(results[1], Nt=0.5, method=2)
 80 |     results[1] = results[1].tolist()
 81 |     return results
 82 | 
 83 |   def debug(self, debugger, images, dets, output, scale=1):
 84 |     dets = dets.detach().cpu().numpy().copy()
 85 |     dets[:, :, :4] *= self.opt.down_ratio
 86 |     dets[:, :, 5:39] *= self.opt.down_ratio
 87 |     img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
 88 |     img = np.clip(((
 89 |       img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8)
 90 |     pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
 91 |     debugger.add_blend_img(img, pred, 'pred_hm')
 92 |     if self.opt.hm_hp:
 93 |       pred = debugger.gen_colormap_hp(
 94 |         output['hm_hp'][0].detach().cpu().numpy())
 95 |       debugger.add_blend_img(img, pred, 'pred_hmhp')
 96 |   
 97 |   def show_results(self, debugger, image, results):
 98 |     debugger.add_img(image, img_id='multi_pose')
 99 |     for bbox in results[1]:
100 |       if bbox[4] > self.opt.vis_thresh:
101 |         debugger.add_coco_bbox(bbox[:4], 0, bbox[4], img_id='multi_pose')
102 |         debugger.add_coco_hp(bbox[5:39], img_id='multi_pose')
103 |     debugger.show_all_imgs(pause=self.pause)


--------------------------------------------------------------------------------
/src/lib/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 | 
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 | 


--------------------------------------------------------------------------------
/src/lib/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/src/lib/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/lib/external/__init__.py


--------------------------------------------------------------------------------
/src/lib/external/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from distutils.core import setup
 3 | from distutils.extension import Extension
 4 | from Cython.Build import cythonize
 5 | 
 6 | extensions = [
 7 |     Extension(
 8 |         "nms", 
 9 |         ["nms.pyx"],
10 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 |     )
12 | ]
13 | 
14 | setup(
15 |     name="coco",
16 |     ext_modules=cythonize(extensions),
17 |     include_dirs=[numpy.get_include()]
18 | )
19 | 


--------------------------------------------------------------------------------
/src/lib/logger.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 6 | import os
 7 | import time
 8 | import sys
 9 | import torch
10 | USE_TENSORBOARD = True
11 | try:
12 |   import tensorboardX
13 |   print('Using tensorboardX')
14 | except:
15 |   USE_TENSORBOARD = False
16 | 
17 | class Logger(object):
18 |   def __init__(self, opt):
19 |     """Create a summary writer logging to log_dir."""
20 |     if not os.path.exists(opt.save_dir):
21 |       os.makedirs(opt.save_dir)
22 |     if not os.path.exists(opt.debug_dir):
23 |       os.makedirs(opt.debug_dir)
24 |    
25 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
26 | 
27 |     args = dict((name, getattr(opt, name)) for name in dir(opt)
28 |                 if not name.startswith('_'))
29 |     file_name = os.path.join(opt.save_dir, 'opt.txt')
30 |     with open(file_name, 'wt') as opt_file:
31 |       opt_file.write('==> torch version: {}\n'.format(torch.__version__))
32 |       opt_file.write('==> cudnn version: {}\n'.format(
33 |         torch.backends.cudnn.version()))
34 |       opt_file.write('==> Cmd:\n')
35 |       opt_file.write(str(sys.argv))
36 |       opt_file.write('\n==> Opt:\n')
37 |       for k, v in sorted(args.items()):
38 |         opt_file.write('  %s: %s\n' % (str(k), str(v)))
39 |           
40 |     log_dir = opt.save_dir + '/logs_{}'.format(time_str)
41 |     if USE_TENSORBOARD:
42 |       self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
43 |     else:
44 |       if not os.path.exists(os.path.dirname(log_dir)):
45 |         os.mkdir(os.path.dirname(log_dir))
46 |       if not os.path.exists(log_dir):
47 |         os.mkdir(log_dir)
48 |     self.log = open(log_dir + '/log.txt', 'w')
49 |     try:
50 |       os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
51 |     except:
52 |       pass
53 |     self.start_line = True
54 | 
55 |   def write(self, txt):
56 |     if self.start_line:
57 |       time_str = time.strftime('%Y-%m-%d-%H-%M')
58 |       self.log.write('{}: {}'.format(time_str, txt))
59 |     else:
60 |       self.log.write(txt)  
61 |     self.start_line = False
62 |     if '\n' in txt:
63 |       self.start_line = True
64 |       self.log.flush()
65 |   
66 |   def close(self):
67 |     self.log.close()
68 |   
69 |   def scalar_summary(self, tag, value, step):
70 |     """Log a scalar variable."""
71 |     if USE_TENSORBOARD:
72 |       self.writer.add_scalar(tag, value, step)
73 | 


--------------------------------------------------------------------------------
/src/lib/models/data_parallel.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn.modules import Module
  3 | from torch.nn.parallel.scatter_gather import gather
  4 | from torch.nn.parallel.replicate import replicate
  5 | from torch.nn.parallel.parallel_apply import parallel_apply
  6 | 
  7 | 
  8 | from .scatter_gather import scatter_kwargs
  9 | 
 10 | class _DataParallel(Module):
 11 |     r"""Implements data parallelism at the module level.
 12 | 
 13 |     This container parallelizes the application of the given module by
 14 |     splitting the input across the specified devices by chunking in the batch
 15 |     dimension. In the forward pass, the module is replicated on each device,
 16 |     and each replica handles a portion of the input. During the backwards
 17 |     pass, gradients from each replica are summed into the original module.
 18 | 
 19 |     The batch size should be larger than the number of GPUs used. It should
 20 |     also be an integer multiple of the number of GPUs so that each chunk is the
 21 |     same size (so that each GPU processes the same number of samples).
 22 | 
 23 |     See also: :ref:`cuda-nn-dataparallel-instead`
 24 | 
 25 |     Arbitrary positional and keyword inputs are allowed to be passed into
 26 |     DataParallel EXCEPT Tensors. All variables will be scattered on dim
 27 |     specified (default 0). Primitive types will be broadcasted, but all
 28 |     other types will be a shallow copy and can be corrupted if written to in
 29 |     the model's forward pass.
 30 | 
 31 |     Args:
 32 |         module: module to be parallelized
 33 |         device_ids: CUDA devices (default: all devices)
 34 |         output_device: device location of output (default: device_ids[0])
 35 | 
 36 |     Example::
 37 | 
 38 |         >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
 39 |         >>> output = net(input_var)
 40 |     """
 41 | 
 42 |     # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
 43 | 
 44 |     def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
 45 |         super(_DataParallel, self).__init__()
 46 | 
 47 |         if not torch.cuda.is_available():
 48 |             self.module = module
 49 |             self.device_ids = []
 50 |             return
 51 | 
 52 |         if device_ids is None:
 53 |             device_ids = list(range(torch.cuda.device_count()))
 54 |         if output_device is None:
 55 |             output_device = device_ids[0]
 56 |         self.dim = dim
 57 |         self.module = module
 58 |         self.device_ids = device_ids
 59 |         self.chunk_sizes = chunk_sizes
 60 |         self.output_device = output_device
 61 |         if len(self.device_ids) == 1:
 62 |             self.module.cuda(device_ids[0])
 63 | 
 64 |     def forward(self, *inputs, **kwargs):
 65 |         if not self.device_ids:
 66 |             return self.module(*inputs, **kwargs)
 67 |         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
 68 |         if len(self.device_ids) == 1:
 69 |             return self.module(*inputs[0], **kwargs[0])
 70 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
 71 |         outputs = self.parallel_apply(replicas, inputs, kwargs)
 72 |         return self.gather(outputs, self.output_device)
 73 | 
 74 |     def replicate(self, module, device_ids):
 75 |         return replicate(module, device_ids)
 76 | 
 77 |     def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
 78 |         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
 79 | 
 80 |     def parallel_apply(self, replicas, inputs, kwargs):
 81 |         return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
 82 | 
 83 |     def gather(self, outputs, output_device):
 84 |         return gather(outputs, output_device, dim=self.dim)
 85 | 
 86 | 
 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
 88 |     r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
 89 | 
 90 |     This is the functional version of the DataParallel module.
 91 | 
 92 |     Args:
 93 |         module: the module to evaluate in parallel
 94 |         inputs: inputs to the module
 95 |         device_ids: GPU ids on which to replicate module
 96 |         output_device: GPU location of the output  Use -1 to indicate the CPU.
 97 |             (default: device_ids[0])
 98 |     Returns:
 99 |         a Variable containing the result of module(input) located on
100 |         output_device
101 |     """
102 |     if not isinstance(inputs, tuple):
103 |         inputs = (inputs,)
104 | 
105 |     if device_ids is None:
106 |         device_ids = list(range(torch.cuda.device_count()))
107 | 
108 |     if output_device is None:
109 |         output_device = device_ids[0]
110 | 
111 |     inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
112 |     if len(device_ids) == 1:
113 |         return module(*inputs[0], **module_kwargs[0])
114 |     used_device_ids = device_ids[:len(inputs)]
115 |     replicas = replicate(module, used_device_ids)
116 |     outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
117 |     return gather(outputs, output_device, dim)
118 | 
119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
120 |     if chunk_sizes is None:
121 |         return torch.nn.DataParallel(module, device_ids, output_device, dim)
122 |     standard_size = True
123 |     for i in range(1, len(chunk_sizes)):
124 |         if chunk_sizes[i] != chunk_sizes[0]:
125 |             standard_size = False
126 |     if standard_size:
127 |         return torch.nn.DataParallel(module, device_ids, output_device, dim)
128 |     return _DataParallel(module, device_ids, output_device, dim, chunk_sizes)


--------------------------------------------------------------------------------
/src/lib/models/model.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torchvision.models as models
 6 | import torch
 7 | import torch.nn as nn
 8 | import os
 9 | 
10 | from .networks.msra_resnet import get_pose_net
11 | from .networks.dlav0 import get_pose_net as get_dlav0
12 | from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
13 | from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
14 | from .networks.large_hourglass import get_large_hourglass_net
15 | 
16 | _model_factory = {
17 |   'res': get_pose_net, # default Resnet with deconv
18 |   'dlav0': get_dlav0, # default DLAup
19 |   'dla': get_dla_dcn,
20 |   'resdcn': get_pose_net_dcn,
21 |   'hourglass': get_large_hourglass_net,
22 | }
23 | 
24 | def create_model(arch, heads, head_conv):
25 |   num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
26 |   arch = arch[:arch.find('_')] if '_' in arch else arch
27 |   get_model = _model_factory[arch]
28 |   model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv)
29 |   return model
30 | 
31 | def load_model(model, model_path, optimizer=None, resume=False, 
32 |                lr=None, lr_step=None):
33 |   start_epoch = 0
34 |   checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
35 |   print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
36 |   state_dict_ = checkpoint['state_dict']
37 |   state_dict = {}
38 |   
39 |   # convert data_parallal to model
40 |   for k in state_dict_:
41 |     if k.startswith('module') and not k.startswith('module_list'):
42 |       state_dict[k[7:]] = state_dict_[k]
43 |     else:
44 |       state_dict[k] = state_dict_[k]
45 |   model_state_dict = model.state_dict()
46 | 
47 |   # check loaded parameters and created model parameters
48 |   msg = 'If you see this, your model does not fully load the ' + \
49 |         'pre-trained weight. Please make sure ' + \
50 |         'you have correctly specified --arch xxx ' + \
51 |         'or set the correct --num_classes for your own dataset.'
52 |   for k in state_dict:
53 |     if k in model_state_dict:
54 |       if state_dict[k].shape != model_state_dict[k].shape:
55 |         print('Skip loading parameter {}, required shape{}, '\
56 |               'loaded shape{}. {}'.format(
57 |           k, model_state_dict[k].shape, state_dict[k].shape, msg))
58 |         state_dict[k] = model_state_dict[k]
59 |     else:
60 |       print('Drop parameter {}.'.format(k) + msg)
61 |   for k in model_state_dict:
62 |     if not (k in state_dict):
63 |       print('No param {}.'.format(k) + msg)
64 |       state_dict[k] = model_state_dict[k]
65 |   model.load_state_dict(state_dict, strict=False)
66 | 
67 |   # resume optimizer parameters
68 |   if optimizer is not None and resume:
69 |     if 'optimizer' in checkpoint:
70 |       optimizer.load_state_dict(checkpoint['optimizer'])
71 |       start_epoch = checkpoint['epoch']
72 |       start_lr = lr
73 |       for step in lr_step:
74 |         if start_epoch >= step:
75 |           start_lr *= 0.1
76 |       for param_group in optimizer.param_groups:
77 |         param_group['lr'] = start_lr
78 |       print('Resumed optimizer with start lr', start_lr)
79 |     else:
80 |       print('No optimizer parameters in checkpoint.')
81 |   if optimizer is not None:
82 |     return model, optimizer, start_epoch
83 |   else:
84 |     return model
85 | 
86 | def save_model(path, epoch, model, optimizer=None):
87 |   if isinstance(model, torch.nn.DataParallel):
88 |     state_dict = model.module.state_dict()
89 |   else:
90 |     state_dict = model.state_dict()
91 |   data = {'epoch': epoch,
92 |           'state_dict': state_dict}
93 |   if not (optimizer is None):
94 |     data['optimizer'] = optimizer.state_dict()
95 |   torch.save(data, path)
96 | 
97 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .idea
3 | *.so
4 | *.o
5 | *pyc
6 | _ext


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Charles Shang
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/README.md:
--------------------------------------------------------------------------------
 1 | ## Deformable Convolutional Networks V2 with Pytorch
 2 | 
 3 | ### Build
 4 | ```bash
 5 |     ./make.sh         # build
 6 |     python test.py    # run examples and gradient check 
 7 | ```
 8 | 
 9 | ### An Example
10 | - deformable conv
11 | ```python
12 |     from dcn_v2 import DCN
13 |     input = torch.randn(2, 64, 128, 128).cuda()
14 |     # wrap all things (offset and mask) in DCN
15 |     dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
16 |     output = dcn(input)
17 |     print(output.shape)
18 | ```
19 | - deformable roi pooling
20 | ```python
21 |     from dcn_v2 import DCNPooling
22 |     input = torch.randn(2, 32, 64, 64).cuda()
23 |     batch_inds = torch.randint(2, (20, 1)).cuda().float()
24 |     x = torch.randint(256, (20, 1)).cuda().float()
25 |     y = torch.randint(256, (20, 1)).cuda().float()
26 |     w = torch.randint(64, (20, 1)).cuda().float()
27 |     h = torch.randint(64, (20, 1)).cuda().float()
28 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
29 | 
30 |     # mdformable pooling (V2)
31 |     # wrap all things (offset and mask) in DCNPooling
32 |     dpooling = DCNPooling(spatial_scale=1.0 / 4,
33 |                          pooled_size=7,
34 |                          output_dim=32,
35 |                          no_trans=False,
36 |                          group_size=1,
37 |                          trans_std=0.1).cuda()
38 | 
39 |     dout = dpooling(input, rois)
40 | ```
41 | 
42 | ### Known Issues:
43 | 
44 | - [x] Gradient check w.r.t offset (solved)
45 | - [ ] Backward is not reentrant (minor)
46 | 
47 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
48 | 
49 | <s>I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
50 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
51 | non-differential points? </s>
52 | 
53 | Update: all gradient check passes with double precision. 
54 | 
55 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 
56 | float `<1e-15` for double), 
57 | so it may not be a serious problem (?)
58 | 
59 | Please post an issue or PR if you have any comments.
60 |     


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/lib/models/networks/DCNv2/__init__.py


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/dcn_v2.c']
 7 | headers = ['src/dcn_v2.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | extra_objects = []
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/dcn_v2_cuda.c']
15 |     headers += ['src/dcn_v2_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     extra_objects += ['src/cuda/dcn_v2_im2col_cuda.cu.o']
18 |     extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda.cu.o']
19 |     with_cuda = True
20 | else:
21 |     raise ValueError('CUDA is not available')
22 | 
23 | extra_compile_args = ['-fopenmp', '-std=c99']
24 | 
25 | this_file = os.path.dirname(os.path.realpath(__file__))
26 | print(this_file)
27 | sources = [os.path.join(this_file, fname) for fname in sources]
28 | headers = [os.path.join(this_file, fname) for fname in headers]
29 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
30 | 
31 | ffi = create_extension(
32 |     '_ext.dcn_v2',
33 |     headers=headers,
34 |     sources=sources,
35 |     define_macros=defines,
36 |     relative_to=__file__,
37 |     with_cuda=with_cuda,
38 |     extra_objects=extra_objects,
39 |     extra_compile_args=extra_compile_args
40 | )
41 | 
42 | if __name__ == '__main__':
43 |     ffi.build()
44 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/build_double.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/dcn_v2_double.c']
 7 | headers = ['src/dcn_v2_double.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | extra_objects = []
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/dcn_v2_cuda_double.c']
15 |     headers += ['src/dcn_v2_cuda_double.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     extra_objects += ['src/cuda/dcn_v2_im2col_cuda_double.cu.o']
18 |     extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda_double.cu.o']
19 |     with_cuda = True
20 | else:
21 |     raise ValueError('CUDA is not available')
22 | 
23 | extra_compile_args = ['-fopenmp', '-std=c99']
24 | 
25 | this_file = os.path.dirname(os.path.realpath(__file__))
26 | print(this_file)
27 | sources = [os.path.join(this_file, fname) for fname in sources]
28 | headers = [os.path.join(this_file, fname) for fname in headers]
29 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
30 | 
31 | ffi = create_extension(
32 |     '_ext.dcn_v2_double',
33 |     headers=headers,
34 |     sources=sources,
35 |     define_macros=defines,
36 |     relative_to=__file__,
37 |     with_cuda=with_cuda,
38 |     extra_objects=extra_objects,
39 |     extra_compile_args=extra_compile_args
40 | )
41 | 
42 | if __name__ == '__main__':
43 |     ffi.build()
44 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | cd src/cuda
 3 | 
 4 | # compile dcn
 5 | nvcc -c -o dcn_v2_im2col_cuda.cu.o dcn_v2_im2col_cuda.cu -x cu -Xcompiler -fPIC
 6 | nvcc -c -o dcn_v2_im2col_cuda_double.cu.o dcn_v2_im2col_cuda_double.cu -x cu -Xcompiler -fPIC
 7 | 
 8 | # compile dcn-roi-pooling
 9 | nvcc -c -o dcn_v2_psroi_pooling_cuda.cu.o dcn_v2_psroi_pooling_cuda.cu -x cu -Xcompiler -fPIC
10 | nvcc -c -o dcn_v2_psroi_pooling_cuda_double.cu.o dcn_v2_psroi_pooling_cuda_double.cu -x cu -Xcompiler -fPIC
11 | 
12 | cd -
13 | python build.py
14 | python build_double.py
15 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda.h:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  3 |  *
  4 |  * COPYRIGHT
  5 |  *
  6 |  * All contributions by the University of California:
  7 |  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
  8 |  * All rights reserved.
  9 |  *
 10 |  * All other contributions:
 11 |  * Copyright (c) 2014-2017, the respective contributors
 12 |  * All rights reserved.
 13 |  *
 14 |  * Caffe uses a shared copyright model: each contributor holds copyright over
 15 |  * their contributions to Caffe. The project versioning records all such
 16 |  * contribution and copyright details. If a contributor wants to further mark
 17 |  * their specific copyright on a particular contribution, they should indicate
 18 |  * their copyright solely in the commit message of the change when it is
 19 |  * committed.
 20 |  *
 21 |  * LICENSE
 22 |  *
 23 |  * Redistribution and use in source and binary forms, with or without
 24 |  * modification, are permitted provided that the following conditions are met:
 25 |  *
 26 |  * 1. Redistributions of source code must retain the above copyright notice, this
 27 |  * list of conditions and the following disclaimer.
 28 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 29 |  * this list of conditions and the following disclaimer in the documentation
 30 |  * and/or other materials provided with the distribution.
 31 |  *
 32 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 33 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 34 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 35 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 36 |  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 37 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 38 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 39 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 40 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 41 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 42 |  *
 43 |  * CONTRIBUTION AGREEMENT
 44 |  *
 45 |  * By contributing to the BVLC/caffe repository through pull-request, comment,
 46 |  * or otherwise, the contributor releases their content to the
 47 |  * license and copyright terms herein.
 48 |  *
 49 |  ***************** END Caffe Copyright Notice and Disclaimer ********************
 50 |  *
 51 |  * Copyright (c) 2018 Microsoft
 52 |  * Licensed under The MIT License [see LICENSE for details]
 53 |  * \file modulated_deformable_im2col.h
 54 |  * \brief Function definitions of converting an image to
 55 |  * column matrix based on kernel, padding, dilation, and offset.
 56 |  * These functions are mainly used in deformable convolution operators.
 57 |  * \ref: https://arxiv.org/abs/1811.11168
 58 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
 59 |  */
 60 | 
 61 | /***************** Adapted by Charles Shang *********************/
 62 | 
 63 | #ifndef DCN_V2_IM2COL_CUDA
 64 | #define DCN_V2_IM2COL_CUDA
 65 | 
 66 | #ifdef __cplusplus
 67 | extern "C"
 68 | {
 69 | #endif
 70 | 
 71 |   void modulated_deformable_im2col_cuda(cudaStream_t stream,
 72 |                                         const float *data_im, const float *data_offset, const float *data_mask,
 73 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 74 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 75 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 76 |                                         const int dilation_h, const int dilation_w,
 77 |                                         const int deformable_group, float *data_col);
 78 | 
 79 |   void modulated_deformable_col2im_cuda(cudaStream_t stream,
 80 |                                         const float *data_col, const float *data_offset, const float *data_mask,
 81 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 82 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 83 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 84 |                                         const int dilation_h, const int dilation_w,
 85 |                                         const int deformable_group, float *grad_im);
 86 | 
 87 |   void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
 88 |                                          const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
 89 |                                          const int batch_size, const int channels, const int height_im, const int width_im,
 90 |                                          const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 91 |                                          const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 92 |                                          const int dilation_h, const int dilation_w,
 93 |                                          const int deformable_group,
 94 |                                          float *grad_offset, float *grad_mask);
 95 | 
 96 | #ifdef __cplusplus
 97 | }
 98 | #endif
 99 | 
100 | #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda_double.h:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  3 |  *
  4 |  * COPYRIGHT
  5 |  *
  6 |  * All contributions by the University of California:
  7 |  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
  8 |  * All rights reserved.
  9 |  *
 10 |  * All other contributions:
 11 |  * Copyright (c) 2014-2017, the respective contributors
 12 |  * All rights reserved.
 13 |  *
 14 |  * Caffe uses a shared copyright model: each contributor holds copyright over
 15 |  * their contributions to Caffe. The project versioning records all such
 16 |  * contribution and copyright details. If a contributor wants to further mark
 17 |  * their specific copyright on a particular contribution, they should indicate
 18 |  * their copyright solely in the commit message of the change when it is
 19 |  * committed.
 20 |  *
 21 |  * LICENSE
 22 |  *
 23 |  * Redistribution and use in source and binary forms, with or without
 24 |  * modification, are permitted provided that the following conditions are met:
 25 |  *
 26 |  * 1. Redistributions of source code must retain the above copyright notice, this
 27 |  * list of conditions and the following disclaimer.
 28 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 29 |  * this list of conditions and the following disclaimer in the documentation
 30 |  * and/or other materials provided with the distribution.
 31 |  *
 32 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 33 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 34 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 35 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 36 |  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 37 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 38 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 39 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 40 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 41 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 42 |  *
 43 |  * CONTRIBUTION AGREEMENT
 44 |  *
 45 |  * By contributing to the BVLC/caffe repository through pull-request, comment,
 46 |  * or otherwise, the contributor releases their content to the
 47 |  * license and copyright terms herein.
 48 |  *
 49 |  ***************** END Caffe Copyright Notice and Disclaimer ********************
 50 |  *
 51 |  * Copyright (c) 2018 Microsoft
 52 |  * Licensed under The MIT License [see LICENSE for details]
 53 |  * \file modulated_deformable_im2col.h
 54 |  * \brief Function definitions of converting an image to
 55 |  * column matrix based on kernel, padding, dilation, and offset.
 56 |  * These functions are mainly used in deformable convolution operators.
 57 |  * \ref: https://arxiv.org/abs/1811.11168
 58 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
 59 |  */
 60 | 
 61 | /***************** Adapted by Charles Shang *********************/
 62 | 
 63 | #ifndef DCN_V2_IM2COL_CUDA_DOUBLE
 64 | #define DCN_V2_IM2COL_CUDA_DOUBLE
 65 | 
 66 | #ifdef __cplusplus
 67 | extern "C"
 68 | {
 69 | #endif
 70 | 
 71 |   void modulated_deformable_im2col_cuda(cudaStream_t stream,
 72 |                                         const double *data_im, const double *data_offset, const double *data_mask,
 73 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 74 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 75 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 76 |                                         const int dilation_h, const int dilation_w,
 77 |                                         const int deformable_group, double *data_col);
 78 | 
 79 |   void modulated_deformable_col2im_cuda(cudaStream_t stream,
 80 |                                         const double *data_col, const double *data_offset, const double *data_mask,
 81 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 82 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 83 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 84 |                                         const int dilation_h, const int dilation_w,
 85 |                                         const int deformable_group, double *grad_im);
 86 | 
 87 |   void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
 88 |                                          const double *data_col, const double *data_im, const double *data_offset, const double *data_mask,
 89 |                                          const int batch_size, const int channels, const int height_im, const int width_im,
 90 |                                          const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 91 |                                          const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 92 |                                          const int dilation_h, const int dilation_w,
 93 |                                          const int deformable_group,
 94 |                                          double *grad_offset, double *grad_mask);
 95 | 
 96 | #ifdef __cplusplus
 97 | }
 98 | #endif
 99 | 
100 | #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2017 Microsoft
 3 |  * Licensed under The MIT License [see LICENSE for details]
 4 |  * \file deformable_psroi_pooling.cu
 5 |  * \brief
 6 |  * \author Yi Li, Guodong Zhang, Jifeng Dai
 7 | */
 8 | /***************** Adapted by Charles Shang *********************/
 9 | 
10 | #ifndef DCN_V2_PSROI_POOLING_CUDA
11 | #define DCN_V2_PSROI_POOLING_CUDA
12 | 
13 | #ifdef __cplusplus
14 | extern "C"
15 | {
16 | #endif
17 | 
18 |     void DeformablePSROIPoolForward(cudaStream_t stream,
19 |                                     const float *data,
20 |                                     const float *bbox,
21 |                                     const float *trans,
22 |                                     float *out,
23 |                                     float *top_count,
24 |                                     const int batch,
25 |                                     const int channels,
26 |                                     const int height,
27 |                                     const int width,
28 |                                     const int num_bbox,
29 |                                     const int channels_trans,
30 |                                     const int no_trans,
31 |                                     const float spatial_scale,
32 |                                     const int output_dim,
33 |                                     const int group_size,
34 |                                     const int pooled_size,
35 |                                     const int part_size,
36 |                                     const int sample_per_part,
37 |                                     const float trans_std);
38 | 
39 |     void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
40 |                                         const float *out_grad,
41 |                                         const float *data,
42 |                                         const float *bbox,
43 |                                         const float *trans,
44 |                                         const float *top_count,
45 |                                         float *in_grad,
46 |                                         float *trans_grad,
47 |                                         const int batch,
48 |                                         const int channels,
49 |                                         const int height,
50 |                                         const int width,
51 |                                         const int num_bbox,
52 |                                         const int channels_trans,
53 |                                         const int no_trans,
54 |                                         const float spatial_scale,
55 |                                         const int output_dim,
56 |                                         const int group_size,
57 |                                         const int pooled_size,
58 |                                         const int part_size,
59 |                                         const int sample_per_part,
60 |                                         const float trans_std);
61 | 
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 | 
66 | #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda_double.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2017 Microsoft
 3 |  * Licensed under The MIT License [see LICENSE for details]
 4 |  * \file deformable_psroi_pooling.cu
 5 |  * \brief
 6 |  * \author Yi Li, Guodong Zhang, Jifeng Dai
 7 | */
 8 | /***************** Adapted by Charles Shang *********************/
 9 | 
10 | #ifndef DCN_V2_PSROI_POOLING_CUDA_DOUBLE
11 | #define DCN_V2_PSROI_POOLING_CUDA_DOUBLE
12 | 
13 | #ifdef __cplusplus
14 | extern "C"
15 | {
16 | #endif
17 | 
18 |     void DeformablePSROIPoolForward(cudaStream_t stream,
19 |                                     const double *data,
20 |                                     const double *bbox,
21 |                                     const double *trans,
22 |                                     double *out,
23 |                                     double *top_count,
24 |                                     const int batch,
25 |                                     const int channels,
26 |                                     const int height,
27 |                                     const int width,
28 |                                     const int num_bbox,
29 |                                     const int channels_trans,
30 |                                     const int no_trans,
31 |                                     const double spatial_scale,
32 |                                     const int output_dim,
33 |                                     const int group_size,
34 |                                     const int pooled_size,
35 |                                     const int part_size,
36 |                                     const int sample_per_part,
37 |                                     const double trans_std);
38 | 
39 |     void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
40 |                                         const double *out_grad,
41 |                                         const double *data,
42 |                                         const double *bbox,
43 |                                         const double *trans,
44 |                                         const double *top_count,
45 |                                         double *in_grad,
46 |                                         double *trans_grad,
47 |                                         const int batch,
48 |                                         const int channels,
49 |                                         const int height,
50 |                                         const int width,
51 |                                         const int num_bbox,
52 |                                         const int channels_trans,
53 |                                         const int no_trans,
54 |                                         const double spatial_scale,
55 |                                         const int output_dim,
56 |                                         const int group_size,
57 |                                         const int pooled_size,
58 |                                         const int part_size,
59 |                                         const int sample_per_part,
60 |                                         const double trans_std);
61 | 
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 | 
66 | #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <stdio.h>
 3 | #include <math.h>
 4 | 
 5 | void dcn_v2_forward(THFloatTensor *input, THFloatTensor *weight,
 6 |                         THFloatTensor *bias, THFloatTensor *ones,
 7 |                         THFloatTensor *offset, THFloatTensor *mask,
 8 |                         THFloatTensor *output, THFloatTensor *columns,
 9 |                         const int pad_h, const int pad_w,
10 |                         const int stride_h, const int stride_w,
11 |                         const int dilation_h, const int dilation_w,
12 |                         const int deformable_group)
13 | {
14 |     printf("only implemented in GPU");
15 | }
16 |     void dcn_v2_backward(THFloatTensor *input, THFloatTensor *weight,
17 |                          THFloatTensor *bias, THFloatTensor *ones,
18 |                          THFloatTensor *offset, THFloatTensor *mask,
19 |                          THFloatTensor *output, THFloatTensor *columns,
20 |                          THFloatTensor *grad_input, THFloatTensor *grad_weight,
21 |                          THFloatTensor *grad_bias, THFloatTensor *grad_offset,
22 |                          THFloatTensor *grad_mask, THFloatTensor *grad_output,
23 |                          int kernel_h, int kernel_w,
24 |                          int stride_h, int stride_w,
25 |                          int pad_h, int pad_w,
26 |                          int dilation_h, int dilation_w,
27 |                          int deformable_group)
28 | {
29 |     printf("only implemented in GPU");
30 | }


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2.h:
--------------------------------------------------------------------------------
 1 | void dcn_v2_forward(THFloatTensor *input, THFloatTensor *weight,
 2 |                         THFloatTensor *bias, THFloatTensor *ones,
 3 |                         THFloatTensor *offset, THFloatTensor *mask,
 4 |                         THFloatTensor *output, THFloatTensor *columns,
 5 |                         const int pad_h, const int pad_w,
 6 |                         const int stride_h, const int stride_w,
 7 |                         const int dilation_h, const int dilation_w,
 8 |                         const int deformable_group);
 9 | void dcn_v2_backward(THFloatTensor *input, THFloatTensor *weight,
10 |                         THFloatTensor *bias, THFloatTensor *ones,
11 |                         THFloatTensor *offset, THFloatTensor *mask,
12 |                         THFloatTensor *output, THFloatTensor *columns,
13 |                         THFloatTensor *grad_input, THFloatTensor *grad_weight,
14 |                         THFloatTensor *grad_bias, THFloatTensor *grad_offset,
15 |                         THFloatTensor *grad_mask, THFloatTensor *grad_output,
16 |                         int kernel_h, int kernel_w,
17 |                         int stride_h, int stride_w,
18 |                         int pad_h, int pad_w,
19 |                         int dilation_h, int dilation_w,
20 |                         int deformable_group);


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2_cuda.h:
--------------------------------------------------------------------------------
 1 | // #ifndef DCN_V2_CUDA
 2 | // #define DCN_V2_CUDA
 3 | 
 4 | // #ifdef __cplusplus
 5 | // extern "C"
 6 | // {
 7 | // #endif
 8 | 
 9 | void dcn_v2_cuda_forward(THCudaTensor *input, THCudaTensor *weight,
10 |                          THCudaTensor *bias, THCudaTensor *ones,
11 |                          THCudaTensor *offset, THCudaTensor *mask,
12 |                          THCudaTensor *output, THCudaTensor *columns,
13 |                          int kernel_h, int kernel_w,
14 |                          const int stride_h, const int stride_w,
15 |                          const int pad_h, const int pad_w,
16 |                          const int dilation_h, const int dilation_w,
17 |                          const int deformable_group);
18 | void dcn_v2_cuda_backward(THCudaTensor *input, THCudaTensor *weight,
19 |                           THCudaTensor *bias, THCudaTensor *ones,
20 |                           THCudaTensor *offset, THCudaTensor *mask,
21 |                           THCudaTensor *columns,
22 |                           THCudaTensor *grad_input, THCudaTensor *grad_weight,
23 |                           THCudaTensor *grad_bias, THCudaTensor *grad_offset,
24 |                           THCudaTensor *grad_mask, THCudaTensor *grad_output,
25 |                           int kernel_h, int kernel_w,
26 |                           int stride_h, int stride_w,
27 |                           int pad_h, int pad_w,
28 |                           int dilation_h, int dilation_w,
29 |                           int deformable_group);
30 | 
31 | void dcn_v2_psroi_pooling_cuda_forward(THCudaTensor * input, THCudaTensor * bbox,
32 |                                        THCudaTensor * trans, 
33 |                                        THCudaTensor * out, THCudaTensor * top_count,
34 |                                        const int no_trans,
35 |                                        const float spatial_scale,
36 |                                        const int output_dim,
37 |                                        const int group_size,
38 |                                        const int pooled_size,
39 |                                        const int part_size,
40 |                                        const int sample_per_part,
41 |                                        const float trans_std);
42 | 
43 | void dcn_v2_psroi_pooling_cuda_backward(THCudaTensor * out_grad, 
44 |                                         THCudaTensor * input, THCudaTensor * bbox,
45 |                                         THCudaTensor * trans, THCudaTensor * top_count,
46 |                                         THCudaTensor * input_grad, THCudaTensor * trans_grad,
47 |                                         const int no_trans,
48 |                                         const float spatial_scale,
49 |                                         const int output_dim,
50 |                                         const int group_size,
51 |                                         const int pooled_size,
52 |                                         const int part_size,
53 |                                         const int sample_per_part,
54 |                                         const float trans_std);
55 | 
56 | // #ifdef __cplusplus
57 | // }
58 | // #endif
59 | 
60 | // #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2_cuda_double.h:
--------------------------------------------------------------------------------
 1 | // #ifndef DCN_V2_CUDA
 2 | // #define DCN_V2_CUDA
 3 | 
 4 | // #ifdef __cplusplus
 5 | // extern "C"
 6 | // {
 7 | // #endif
 8 | 
 9 | void dcn_v2_cuda_forward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight,
10 |                          THCudaDoubleTensor *bias, THCudaDoubleTensor *ones,
11 |                          THCudaDoubleTensor *offset, THCudaDoubleTensor *mask,
12 |                          THCudaDoubleTensor *output, THCudaDoubleTensor *columns,
13 |                          int kernel_h, int kernel_w,
14 |                          const int stride_h, const int stride_w,
15 |                          const int pad_h, const int pad_w,
16 |                          const int dilation_h, const int dilation_w,
17 |                          const int deformable_group);
18 | void dcn_v2_cuda_backward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight,
19 |                           THCudaDoubleTensor *bias, THCudaDoubleTensor *ones,
20 |                           THCudaDoubleTensor *offset, THCudaDoubleTensor *mask,
21 |                           THCudaDoubleTensor *columns,
22 |                           THCudaDoubleTensor *grad_input, THCudaDoubleTensor *grad_weight,
23 |                           THCudaDoubleTensor *grad_bias, THCudaDoubleTensor *grad_offset,
24 |                           THCudaDoubleTensor *grad_mask, THCudaDoubleTensor *grad_output,
25 |                           int kernel_h, int kernel_w,
26 |                           int stride_h, int stride_w,
27 |                           int pad_h, int pad_w,
28 |                           int dilation_h, int dilation_w,
29 |                           int deformable_group);
30 | 
31 | void dcn_v2_psroi_pooling_cuda_forward(THCudaDoubleTensor * input, THCudaDoubleTensor * bbox,
32 |                                        THCudaDoubleTensor * trans, 
33 |                                        THCudaDoubleTensor * out, THCudaDoubleTensor * top_count,
34 |                                        const int no_trans,
35 |                                        const double spatial_scale,
36 |                                        const int output_dim,
37 |                                        const int group_size,
38 |                                        const int pooled_size,
39 |                                        const int part_size,
40 |                                        const int sample_per_part,
41 |                                        const double trans_std);
42 | 
43 | void dcn_v2_psroi_pooling_cuda_backward(THCudaDoubleTensor * out_grad, 
44 |                                         THCudaDoubleTensor * input, THCudaDoubleTensor * bbox,
45 |                                         THCudaDoubleTensor * trans, THCudaDoubleTensor * top_count,
46 |                                         THCudaDoubleTensor * input_grad, THCudaDoubleTensor * trans_grad,
47 |                                         const int no_trans,
48 |                                         const double spatial_scale,
49 |                                         const int output_dim,
50 |                                         const int group_size,
51 |                                         const int pooled_size,
52 |                                         const int part_size,
53 |                                         const int sample_per_part,
54 |                                         const double trans_std);
55 | 
56 | 
57 | // #ifdef __cplusplus
58 | // }
59 | // #endif
60 | 
61 | // #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2_double.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <stdio.h>
 3 | #include <math.h>
 4 | 
 5 | void dcn_v2_forward(THDoubleTensor *input, THDoubleTensor *weight,
 6 |                     THDoubleTensor *bias, THDoubleTensor *ones,
 7 |                     THDoubleTensor *offset, THDoubleTensor *mask,
 8 |                     THDoubleTensor *output, THDoubleTensor *columns,
 9 |                     const int pad_h, const int pad_w,
10 |                     const int stride_h, const int stride_w,
11 |                     const int dilation_h, const int dilation_w,
12 |                     const int deformable_group)
13 | {
14 |     printf("only implemented in GPU");
15 | }
16 | void dcn_v2_backward(THDoubleTensor *input, THDoubleTensor *weight,
17 |                      THDoubleTensor *bias, THDoubleTensor *ones,
18 |                      THDoubleTensor *offset, THDoubleTensor *mask,
19 |                      THDoubleTensor *output, THDoubleTensor *columns,
20 |                      THDoubleTensor *grad_input, THDoubleTensor *grad_weight,
21 |                      THDoubleTensor *grad_bias, THDoubleTensor *grad_offset,
22 |                      THDoubleTensor *grad_mask, THDoubleTensor *grad_output,
23 |                      int kernel_h, int kernel_w,
24 |                      int stride_h, int stride_w,
25 |                      int pad_h, int pad_w,
26 |                      int dilation_h, int dilation_w,
27 |                      int deformable_group)
28 | {
29 |     printf("only implemented in GPU");
30 | }


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2_double.h:
--------------------------------------------------------------------------------
 1 | void dcn_v2_forward(THDoubleTensor *input, THDoubleTensor *weight,
 2 |                     THDoubleTensor *bias, THDoubleTensor *ones,
 3 |                     THDoubleTensor *offset, THDoubleTensor *mask,
 4 |                     THDoubleTensor *output, THDoubleTensor *columns,
 5 |                     const int pad_h, const int pad_w,
 6 |                     const int stride_h, const int stride_w,
 7 |                     const int dilation_h, const int dilation_w,
 8 |                     const int deformable_group);
 9 | void dcn_v2_backward(THDoubleTensor *input, THDoubleTensor *weight,
10 |                      THDoubleTensor *bias, THDoubleTensor *ones,
11 |                      THDoubleTensor *offset, THDoubleTensor *mask,
12 |                      THDoubleTensor *output, THDoubleTensor *columns,
13 |                      THDoubleTensor *grad_input, THDoubleTensor *grad_weight,
14 |                      THDoubleTensor *grad_bias, THDoubleTensor *grad_offset,
15 |                      THDoubleTensor *grad_mask, THDoubleTensor *grad_output,
16 |                      int kernel_h, int kernel_w,
17 |                      int stride_h, int stride_w,
18 |                      int pad_h, int pad_w,
19 |                      int dilation_h, int dilation_w,
20 |                      int deformable_group);


--------------------------------------------------------------------------------
/src/lib/models/scatter_gather.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.nn.parallel._functions import Scatter, Gather
 4 | 
 5 | 
 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
 7 |     r"""
 8 |     Slices variables into approximately equal chunks and
 9 |     distributes them across given GPUs. Duplicates
10 |     references to objects that are not variables. Does not
11 |     support Tensors.
12 |     """
13 |     def scatter_map(obj):
14 |         if isinstance(obj, Variable):
15 |             return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
16 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
17 |         if isinstance(obj, tuple):
18 |             return list(zip(*map(scatter_map, obj)))
19 |         if isinstance(obj, list):
20 |             return list(map(list, zip(*map(scatter_map, obj))))
21 |         if isinstance(obj, dict):
22 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
23 |         return [obj for targets in target_gpus]
24 | 
25 |     return scatter_map(inputs)
26 | 
27 | 
28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
29 |     r"""Scatter with support for kwargs dictionary"""
30 |     inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
31 |     kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
32 |     if len(inputs) < len(kwargs):
33 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
34 |     elif len(kwargs) < len(inputs):
35 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
36 |     inputs = tuple(inputs)
37 |     kwargs = tuple(kwargs)
38 |     return inputs, kwargs
39 | 


--------------------------------------------------------------------------------
/src/lib/models/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | def _sigmoid(x):
 9 |   y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
10 |   return y
11 | 
12 | def _gather_feat(feat, ind, mask=None):
13 |     dim  = feat.size(2)
14 |     ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
15 |     feat = feat.gather(1, ind)
16 |     if mask is not None:
17 |         mask = mask.unsqueeze(2).expand_as(feat)
18 |         feat = feat[mask]
19 |         feat = feat.view(-1, dim)
20 |     return feat
21 | 
22 | def _transpose_and_gather_feat(feat, ind):
23 |     feat = feat.permute(0, 2, 3, 1).contiguous()
24 |     feat = feat.view(feat.size(0), -1, feat.size(3))
25 |     feat = _gather_feat(feat, ind)
26 |     return feat
27 | 
28 | def flip_tensor(x):
29 |     return torch.flip(x, [3])
30 |     # tmp = x.detach().cpu().numpy()[..., ::-1].copy()
31 |     # return torch.from_numpy(tmp).to(x.device)
32 | 
33 | def flip_lr(x, flip_idx):
34 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
35 |   shape = tmp.shape
36 |   for e in flip_idx:
37 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
38 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
39 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)
40 | 
41 | def flip_lr_off(x, flip_idx):
42 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
43 |   shape = tmp.shape
44 |   tmp = tmp.reshape(tmp.shape[0], 17, 2, 
45 |                     tmp.shape[2], tmp.shape[3])
46 |   tmp[:, :, 0, :, :] *= -1
47 |   for e in flip_idx:
48 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
49 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
50 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)


--------------------------------------------------------------------------------
/src/lib/trains/base_trainer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import time
  6 | import torch
  7 | from progress.bar import Bar
  8 | from models.data_parallel import DataParallel
  9 | from utils.utils import AverageMeter
 10 | 
 11 | 
 12 | class ModelWithLoss(torch.nn.Module):
 13 |   def __init__(self, model, loss):
 14 |     super(ModelWithLoss, self).__init__()
 15 |     self.model = model
 16 |     self.loss = loss
 17 |   
 18 |   def forward(self, batch):
 19 |     outputs = self.model(batch['input'])
 20 |     loss, loss_stats = self.loss(outputs, batch)
 21 |     return outputs[-1], loss, loss_stats
 22 | 
 23 | class BaseTrainer(object):
 24 |   def __init__(
 25 |     self, opt, model, optimizer=None):
 26 |     self.opt = opt
 27 |     self.optimizer = optimizer
 28 |     self.loss_stats, self.loss = self._get_losses(opt)
 29 |     self.model_with_loss = ModelWithLoss(model, self.loss)
 30 | 
 31 |   def set_device(self, gpus, chunk_sizes, device):
 32 |     if len(gpus) > 1:
 33 |       self.model_with_loss = DataParallel(
 34 |         self.model_with_loss, device_ids=gpus, 
 35 |         chunk_sizes=chunk_sizes).to(device)
 36 |     else:
 37 |       self.model_with_loss = self.model_with_loss.to(device)
 38 |     
 39 |     for state in self.optimizer.state.values():
 40 |       for k, v in state.items():
 41 |         if isinstance(v, torch.Tensor):
 42 |           state[k] = v.to(device=device, non_blocking=True)
 43 | 
 44 |   def run_epoch(self, phase, epoch, data_loader):
 45 |     model_with_loss = self.model_with_loss
 46 |     if phase == 'train':
 47 |       model_with_loss.train()
 48 |     else:
 49 |       if len(self.opt.gpus) > 1:
 50 |         model_with_loss = self.model_with_loss.module
 51 |       model_with_loss.eval()
 52 |       torch.cuda.empty_cache()
 53 | 
 54 |     opt = self.opt
 55 |     results = {}
 56 |     data_time, batch_time = AverageMeter(), AverageMeter()
 57 |     avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
 58 |     num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
 59 |     bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters)
 60 |     end = time.time()
 61 |     for iter_id, batch in enumerate(data_loader):
 62 |       if iter_id >= num_iters:
 63 |         break
 64 |       data_time.update(time.time() - end)
 65 | 
 66 |       for k in batch:
 67 |         if k != 'meta':
 68 |           batch[k] = batch[k].to(device=opt.device, non_blocking=True)    
 69 |       output, loss, loss_stats = model_with_loss(batch)
 70 |       loss = loss.mean()
 71 |       if phase == 'train':
 72 |         self.optimizer.zero_grad()
 73 |         loss.backward()
 74 |         self.optimizer.step()
 75 |       batch_time.update(time.time() - end)
 76 |       end = time.time()
 77 | 
 78 |       Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
 79 |         epoch, iter_id, num_iters, phase=phase,
 80 |         total=bar.elapsed_td, eta=bar.eta_td)
 81 |       for l in avg_loss_stats:
 82 |         avg_loss_stats[l].update(
 83 |           loss_stats[l].mean().item(), batch['input'].size(0))
 84 |         Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg)
 85 |       if not opt.hide_data_time:
 86 |         Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
 87 |           '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
 88 |       if opt.print_iter > 0:
 89 |         if iter_id % opt.print_iter == 0:
 90 |           print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) 
 91 |       else:
 92 |         bar.next()
 93 |       
 94 |       if opt.debug > 0:
 95 |         self.debug(batch, output, iter_id)
 96 |       
 97 |       if opt.test:
 98 |         self.save_result(output, batch, results)
 99 |       del output, loss, loss_stats
100 |     
101 |     bar.finish()
102 |     ret = {k: v.avg for k, v in avg_loss_stats.items()}
103 |     ret['time'] = bar.elapsed_td.total_seconds() / 60.
104 |     return ret, results
105 |   
106 |   def debug(self, batch, output, iter_id):
107 |     raise NotImplementedError
108 | 
109 |   def save_result(self, output, batch, results):
110 |     raise NotImplementedError
111 | 
112 |   def _get_losses(self, opt):
113 |     raise NotImplementedError
114 |   
115 |   def val(self, epoch, data_loader):
116 |     return self.run_epoch('val', epoch, data_loader)
117 | 
118 |   def train(self, epoch, data_loader):
119 |     return self.run_epoch('train', epoch, data_loader)


--------------------------------------------------------------------------------
/src/lib/trains/ctdet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | 
  8 | from models.losses import FocalLoss
  9 | from models.losses import RegL1Loss, RegLoss, NormRegL1Loss, RegWeightedL1Loss
 10 | from models.decode import ctdet_decode
 11 | from models.utils import _sigmoid
 12 | from utils.debugger import Debugger
 13 | from utils.post_process import ctdet_post_process
 14 | from utils.oracle_utils import gen_oracle_map
 15 | from .base_trainer import BaseTrainer
 16 | 
 17 | class CtdetLoss(torch.nn.Module):
 18 |   def __init__(self, opt):
 19 |     super(CtdetLoss, self).__init__()
 20 |     self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
 21 |     self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \
 22 |               RegLoss() if opt.reg_loss == 'sl1' else None
 23 |     self.crit_wh = torch.nn.L1Loss(reduction='sum') if opt.dense_wh else \
 24 |               NormRegL1Loss() if opt.norm_wh else \
 25 |               RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg
 26 |     self.opt = opt
 27 | 
 28 |   def forward(self, outputs, batch):
 29 |     opt = self.opt
 30 |     hm_loss, wh_loss, off_loss = 0, 0, 0
 31 |     for s in range(opt.num_stacks):
 32 |       output = outputs[s]
 33 |       if not opt.mse_loss:
 34 |         output['hm'] = _sigmoid(output['hm'])
 35 | 
 36 |       if opt.eval_oracle_hm:
 37 |         output['hm'] = batch['hm']
 38 |       if opt.eval_oracle_wh:
 39 |         output['wh'] = torch.from_numpy(gen_oracle_map(
 40 |           batch['wh'].detach().cpu().numpy(), 
 41 |           batch['ind'].detach().cpu().numpy(), 
 42 |           output['wh'].shape[3], output['wh'].shape[2])).to(opt.device)
 43 |       if opt.eval_oracle_offset:
 44 |         output['reg'] = torch.from_numpy(gen_oracle_map(
 45 |           batch['reg'].detach().cpu().numpy(), 
 46 |           batch['ind'].detach().cpu().numpy(), 
 47 |           output['reg'].shape[3], output['reg'].shape[2])).to(opt.device)
 48 | 
 49 |       hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks
 50 |       if opt.wh_weight > 0:
 51 |         if opt.dense_wh:
 52 |           mask_weight = batch['dense_wh_mask'].sum() + 1e-4
 53 |           wh_loss += (
 54 |             self.crit_wh(output['wh'] * batch['dense_wh_mask'],
 55 |             batch['dense_wh'] * batch['dense_wh_mask']) / 
 56 |             mask_weight) / opt.num_stacks
 57 |         elif opt.cat_spec_wh:
 58 |           wh_loss += self.crit_wh(
 59 |             output['wh'], batch['cat_spec_mask'],
 60 |             batch['ind'], batch['cat_spec_wh']) / opt.num_stacks
 61 |         else:
 62 |           wh_loss += self.crit_reg(
 63 |             output['wh'], batch['reg_mask'],
 64 |             batch['ind'], batch['wh']) / opt.num_stacks
 65 |       
 66 |       if opt.reg_offset and opt.off_weight > 0:
 67 |         off_loss += self.crit_reg(output['reg'], batch['reg_mask'],
 68 |                              batch['ind'], batch['reg']) / opt.num_stacks
 69 |         
 70 |     loss = opt.hm_weight * hm_loss + opt.wh_weight * wh_loss + \
 71 |            opt.off_weight * off_loss
 72 |     loss_stats = {'loss': loss, 'hm_loss': hm_loss,
 73 |                   'wh_loss': wh_loss, 'off_loss': off_loss}
 74 |     return loss, loss_stats
 75 | 
 76 | class CtdetTrainer(BaseTrainer):
 77 |   def __init__(self, opt, model, optimizer=None):
 78 |     super(CtdetTrainer, self).__init__(opt, model, optimizer=optimizer)
 79 |   
 80 |   def _get_losses(self, opt):
 81 |     loss_states = ['loss', 'hm_loss', 'wh_loss', 'off_loss']
 82 |     loss = CtdetLoss(opt)
 83 |     return loss_states, loss
 84 | 
 85 |   def debug(self, batch, output, iter_id):
 86 |     opt = self.opt
 87 |     reg = output['reg'] if opt.reg_offset else None
 88 |     dets = ctdet_decode(
 89 |       output['hm'], output['wh'], reg=reg,
 90 |       cat_spec_wh=opt.cat_spec_wh, K=opt.K)
 91 |     dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
 92 |     dets[:, :, :4] *= opt.down_ratio
 93 |     dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
 94 |     dets_gt[:, :, :4] *= opt.down_ratio
 95 |     for i in range(1):
 96 |       debugger = Debugger(
 97 |         dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme)
 98 |       img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
 99 |       img = np.clip(((
100 |         img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8)
101 |       pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
102 |       gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
103 |       debugger.add_blend_img(img, pred, 'pred_hm')
104 |       debugger.add_blend_img(img, gt, 'gt_hm')
105 |       debugger.add_img(img, img_id='out_pred')
106 |       for k in range(len(dets[i])):
107 |         if dets[i, k, 4] > opt.center_thresh:
108 |           debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
109 |                                  dets[i, k, 4], img_id='out_pred')
110 | 
111 |       debugger.add_img(img, img_id='out_gt')
112 |       for k in range(len(dets_gt[i])):
113 |         if dets_gt[i, k, 4] > opt.center_thresh:
114 |           debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
115 |                                  dets_gt[i, k, 4], img_id='out_gt')
116 | 
117 |       if opt.debug == 4:
118 |         debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id))
119 |       else:
120 |         debugger.show_all_imgs(pause=True)
121 | 
122 |   def save_result(self, output, batch, results):
123 |     reg = output['reg'] if self.opt.reg_offset else None
124 |     dets = ctdet_decode(
125 |       output['hm'], output['wh'], reg=reg,
126 |       cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)
127 |     dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
128 |     dets_out = ctdet_post_process(
129 |       dets.copy(), batch['meta']['c'].cpu().numpy(),
130 |       batch['meta']['s'].cpu().numpy(),
131 |       output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1])
132 |     results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]


--------------------------------------------------------------------------------
/src/lib/trains/exdet.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | import numpy as np
 7 | import cv2
 8 | import sys
 9 | import time
10 | from utils.debugger import Debugger
11 | from models.data_parallel import DataParallel
12 | from models.losses import FocalLoss, RegL1Loss
13 | from models.decode import agnex_ct_decode, exct_decode
14 | from models.utils import _sigmoid
15 | from .base_trainer import BaseTrainer
16 | 
17 | class ExdetLoss(torch.nn.Module):
18 |   def __init__(self, opt):
19 |     super(ExdetLoss, self).__init__()
20 |     self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
21 |     self.crit_reg = RegL1Loss()
22 |     self.opt = opt
23 |     self.parts = ['t', 'l', 'b', 'r', 'c']
24 | 
25 |   def forward(self, outputs, batch):
26 |     opt = self.opt
27 |     hm_loss, reg_loss = 0, 0
28 |     for s in range(opt.num_stacks):
29 |       output = outputs[s]
30 |       for p in self.parts:
31 |         tag = 'hm_{}'.format(p)
32 |         output[tag] = _sigmoid(output[tag])
33 |         hm_loss += self.crit(output[tag], batch[tag]) / opt.num_stacks
34 |         if p != 'c' and opt.reg_offset and opt.off_weight > 0:
35 |           reg_loss += self.crit_reg(output['reg_{}'.format(p)], 
36 |                                     batch['reg_mask'],
37 |                                     batch['ind_{}'.format(p)],
38 |                                     batch['reg_{}'.format(p)]) / opt.num_stacks
39 |     loss = opt.hm_weight * hm_loss + opt.off_weight * reg_loss
40 |     loss_stats = {'loss': loss, 'off_loss': reg_loss, 'hm_loss': hm_loss}
41 |     return loss, loss_stats
42 | 
43 | class ExdetTrainer(BaseTrainer):
44 |   def __init__(self, opt, model, optimizer=None):
45 |     super(ExdetTrainer, self).__init__(opt, model, optimizer=optimizer)
46 |     self.decode = agnex_ct_decode if opt.agnostic_ex else exct_decode
47 | 
48 |   def _get_losses(self, opt):
49 |     loss_states = ['loss', 'hm_loss', 'off_loss']
50 |     loss = ExdetLoss(opt)
51 |     return loss_states, loss
52 | 
53 |   def debug(self, batch, output, iter_id):
54 |     opt = self.opt
55 |     detections = self.decode(output['hm_t'], output['hm_l'], 
56 |                              output['hm_b'], output['hm_r'], 
57 |                              output['hm_c']).detach().cpu().numpy()
58 |     detections[:, :, :4] *= opt.input_res / opt.output_res
59 |     for i in range(1):
60 |       debugger = Debugger(
61 |         dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme)
62 |       pred_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8)
63 |       gt_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8)
64 |       img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
65 |       img = ((img * self.opt.std + self.opt.mean) * 255.).astype(np.uint8)
66 |       for p in self.parts:
67 |         tag = 'hm_{}'.format(p)
68 |         pred = debugger.gen_colormap(output[tag][i].detach().cpu().numpy())
69 |         gt = debugger.gen_colormap(batch[tag][i].detach().cpu().numpy())
70 |         if p != 'c':
71 |           pred_hm = np.maximum(pred_hm, pred)
72 |           gt_hm = np.maximum(gt_hm, gt)
73 |         if p == 'c' or opt.debug > 2:
74 |           debugger.add_blend_img(img, pred, 'pred_{}'.format(p))
75 |           debugger.add_blend_img(img, gt, 'gt_{}'.format(p))
76 |       debugger.add_blend_img(img, pred_hm, 'pred')
77 |       debugger.add_blend_img(img, gt_hm, 'gt')
78 |       debugger.add_img(img, img_id='out')
79 |       for k in range(len(detections[i])):
80 |         if detections[i, k, 4] > 0.1:
81 |           debugger.add_coco_bbox(detections[i, k, :4], detections[i, k, -1],
82 |                                  detections[i, k, 4], img_id='out')
83 |       if opt.debug == 4:
84 |         debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id))
85 |       else:
86 |         debugger.show_all_imgs(pause=True)


--------------------------------------------------------------------------------
/src/lib/trains/train_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from .ctdet import CtdetTrainer
 6 | from .ddd import DddTrainer
 7 | from .exdet import ExdetTrainer
 8 | from .multi_pose import MultiPoseTrainer
 9 | 
10 | train_factory = {
11 |   'exdet': ExdetTrainer, 
12 |   'ddd': DddTrainer,
13 |   'ctdet': CtdetTrainer,
14 |   'multi_pose': MultiPoseTrainer, 
15 | }
16 | 


--------------------------------------------------------------------------------
/src/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/lib/utils/__init__.py


--------------------------------------------------------------------------------
/src/lib/utils/ddd_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | import cv2
  7 | 
  8 | def compute_box_3d(dim, location, rotation_y):
  9 |   # dim: 3
 10 |   # location: 3
 11 |   # rotation_y: 1
 12 |   # return: 8 x 3
 13 |   c, s = np.cos(rotation_y), np.sin(rotation_y)
 14 |   R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
 15 |   l, w, h = dim[2], dim[1], dim[0]
 16 |   x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2]
 17 |   y_corners = [0,0,0,0,-h,-h,-h,-h]
 18 |   z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2]
 19 | 
 20 |   corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32)
 21 |   corners_3d = np.dot(R, corners) 
 22 |   corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1)
 23 |   return corners_3d.transpose(1, 0)
 24 | 
 25 | def project_to_image(pts_3d, P):
 26 |   # pts_3d: n x 3
 27 |   # P: 3 x 4
 28 |   # return: n x 2
 29 |   pts_3d_homo = np.concatenate(
 30 |     [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
 31 |   pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
 32 |   pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
 33 |   # import pdb; pdb.set_trace()
 34 |   return pts_2d
 35 | 
 36 | def compute_orientation_3d(dim, location, rotation_y):
 37 |   # dim: 3
 38 |   # location: 3
 39 |   # rotation_y: 1
 40 |   # return: 2 x 3
 41 |   c, s = np.cos(rotation_y), np.sin(rotation_y)
 42 |   R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
 43 |   orientation_3d = np.array([[0, dim[2]], [0, 0], [0, 0]], dtype=np.float32)
 44 |   orientation_3d = np.dot(R, orientation_3d)
 45 |   orientation_3d = orientation_3d + \
 46 |                    np.array(location, dtype=np.float32).reshape(3, 1)
 47 |   return orientation_3d.transpose(1, 0)
 48 | 
 49 | def draw_box_3d(image, corners, c=(0, 0, 255)):
 50 |   face_idx = [[0,1,5,4],
 51 |               [1,2,6, 5],
 52 |               [2,3,7,6],
 53 |               [3,0,4,7]]
 54 |   for ind_f in range(3, -1, -1):
 55 |     f = face_idx[ind_f]
 56 |     for j in range(4):
 57 |       cv2.line(image, (corners[f[j], 0], corners[f[j], 1]),
 58 |                (corners[f[(j+1)%4], 0], corners[f[(j+1)%4], 1]), c, 2, lineType=cv2.LINE_AA)
 59 |     if ind_f == 0:
 60 |       cv2.line(image, (corners[f[0], 0], corners[f[0], 1]),
 61 |                (corners[f[2], 0], corners[f[2], 1]), c, 1, lineType=cv2.LINE_AA)
 62 |       cv2.line(image, (corners[f[1], 0], corners[f[1], 1]),
 63 |                (corners[f[3], 0], corners[f[3], 1]), c, 1, lineType=cv2.LINE_AA)
 64 |   return image
 65 | 
 66 | def unproject_2d_to_3d(pt_2d, depth, P):
 67 |   # pts_2d: 2
 68 |   # depth: 1
 69 |   # P: 3 x 4
 70 |   # return: 3
 71 |   z = depth - P[2, 3]
 72 |   x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0]
 73 |   y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1]
 74 |   pt_3d = np.array([x, y, z], dtype=np.float32)
 75 |   return pt_3d
 76 | 
 77 | def alpha2rot_y(alpha, x, cx, fx):
 78 |     """
 79 |     Get rotation_y by alpha + theta - 180
 80 |     alpha : Observation angle of object, ranging [-pi..pi]
 81 |     x : Object center x to the camera center (x-W/2), in pixels
 82 |     rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
 83 |     """
 84 |     rot_y = alpha + np.arctan2(x - cx, fx)
 85 |     if rot_y > np.pi:
 86 |       rot_y -= 2 * np.pi
 87 |     if rot_y < -np.pi:
 88 |       rot_y += 2 * np.pi
 89 |     return rot_y
 90 | 
 91 | def rot_y2alpha(rot_y, x, cx, fx):
 92 |     """
 93 |     Get rotation_y by alpha + theta - 180
 94 |     alpha : Observation angle of object, ranging [-pi..pi]
 95 |     x : Object center x to the camera center (x-W/2), in pixels
 96 |     rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
 97 |     """
 98 |     alpha = rot_y - np.arctan2(x - cx, fx)
 99 |     if alpha > np.pi:
100 |       alpha -= 2 * np.pi
101 |     if alpha < -np.pi:
102 |       alpha += 2 * np.pi
103 |     return alpha
104 | 
105 | 
106 | def ddd2locrot(center, alpha, dim, depth, calib):
107 |   # single image
108 |   locations = unproject_2d_to_3d(center, depth, calib)
109 |   locations[1] += dim[0] / 2
110 |   rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0])
111 |   return locations, rotation_y
112 | 
113 | def project_3d_bbox(location, dim, rotation_y, calib):
114 |   box_3d = compute_box_3d(dim, location, rotation_y)
115 |   box_2d = project_to_image(box_3d, calib)
116 |   return box_2d
117 | 
118 | 
119 | if __name__ == '__main__':
120 |   calib = np.array(
121 |     [[7.070493000000e+02, 0.000000000000e+00, 6.040814000000e+02, 4.575831000000e+01],
122 |      [0.000000000000e+00, 7.070493000000e+02, 1.805066000000e+02, -3.454157000000e-01],
123 |      [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 4.981016000000e-03]],
124 |     dtype=np.float32)
125 |   alpha = -0.20
126 |   tl = np.array([712.40, 143.00], dtype=np.float32)
127 |   br = np.array([810.73, 307.92], dtype=np.float32)
128 |   ct = (tl + br) / 2
129 |   rotation_y = 0.01
130 |   print('alpha2rot_y', alpha2rot_y(alpha, ct[0], calib[0, 2], calib[0, 0]))
131 |   print('rotation_y', rotation_y)


--------------------------------------------------------------------------------
/src/lib/utils/oracle_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import numpy as np
 6 | import numba
 7 | 
 8 | @numba.jit(nopython=True, nogil=True)
 9 | def gen_oracle_map(feat, ind, w, h):
10 |   # feat: B x maxN x featDim
11 |   # ind: B x maxN
12 |   batch_size = feat.shape[0]
13 |   max_objs = feat.shape[1]
14 |   feat_dim = feat.shape[2]
15 |   out = np.zeros((batch_size, feat_dim, h, w), dtype=np.float32)
16 |   vis = np.zeros((batch_size, h, w), dtype=np.uint8)
17 |   ds = [(0, 1), (0, -1), (1, 0), (-1, 0)]
18 |   for i in range(batch_size):
19 |     queue_ind = np.zeros((h*w*2, 2), dtype=np.int32)
20 |     queue_feat = np.zeros((h*w*2, feat_dim), dtype=np.float32)
21 |     head, tail = 0, 0
22 |     for j in range(max_objs):
23 |       if ind[i][j] > 0:
24 |         x, y = ind[i][j] % w, ind[i][j] // w
25 |         out[i, :, y, x] = feat[i][j]
26 |         vis[i, y, x] = 1
27 |         queue_ind[tail] = x, y
28 |         queue_feat[tail] = feat[i][j]
29 |         tail += 1
30 |     while tail - head > 0:
31 |       x, y = queue_ind[head]
32 |       f = queue_feat[head]
33 |       head += 1
34 |       for (dx, dy) in ds:
35 |         xx, yy = x + dx, y + dy
36 |         if xx >= 0 and yy >= 0 and xx < w and yy < h and vis[i, yy, xx] < 1:
37 |           out[i, :, yy, xx] = f
38 |           vis[i, yy, xx] = 1
39 |           queue_ind[tail] = xx, yy
40 |           queue_feat[tail] = f
41 |           tail += 1
42 |   return out


--------------------------------------------------------------------------------
/src/lib/utils/post_process.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | from .image import transform_preds
  7 | from .ddd_utils import ddd2locrot
  8 | 
  9 | 
 10 | def get_pred_depth(depth):
 11 |   return depth
 12 | 
 13 | def get_alpha(rot):
 14 |   # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 
 15 |   #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
 16 |   # return rot[:, 0]
 17 |   idx = rot[:, 1] > rot[:, 5]
 18 |   alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi)
 19 |   alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi)
 20 |   return alpha1 * idx + alpha2 * (1 - idx)
 21 |   
 22 | 
 23 | def ddd_post_process_2d(dets, c, s, opt):
 24 |   # dets: batch x max_dets x dim
 25 |   # return 1-based class det list
 26 |   ret = []
 27 |   include_wh = dets.shape[2] > 16
 28 |   for i in range(dets.shape[0]):
 29 |     top_preds = {}
 30 |     dets[i, :, :2] = transform_preds(
 31 |           dets[i, :, 0:2], c[i], s[i], (opt.output_w, opt.output_h))
 32 |     classes = dets[i, :, -1]
 33 |     for j in range(opt.num_classes):
 34 |       inds = (classes == j)
 35 |       top_preds[j + 1] = np.concatenate([
 36 |         dets[i, inds, :3].astype(np.float32),
 37 |         get_alpha(dets[i, inds, 3:11])[:, np.newaxis].astype(np.float32),
 38 |         get_pred_depth(dets[i, inds, 11:12]).astype(np.float32),
 39 |         dets[i, inds, 12:15].astype(np.float32)], axis=1)
 40 |       if include_wh:
 41 |         top_preds[j + 1] = np.concatenate([
 42 |           top_preds[j + 1],
 43 |           transform_preds(
 44 |             dets[i, inds, 15:17], c[i], s[i], (opt.output_w, opt.output_h))
 45 |           .astype(np.float32)], axis=1)
 46 |     ret.append(top_preds)
 47 |   return ret
 48 | 
 49 | def ddd_post_process_3d(dets, calibs):
 50 |   # dets: batch x max_dets x dim
 51 |   # return 1-based class det list
 52 |   ret = []
 53 |   for i in range(len(dets)):
 54 |     preds = {}
 55 |     for cls_ind in dets[i].keys():
 56 |       preds[cls_ind] = []
 57 |       for j in range(len(dets[i][cls_ind])):
 58 |         center = dets[i][cls_ind][j][:2]
 59 |         score = dets[i][cls_ind][j][2]
 60 |         alpha = dets[i][cls_ind][j][3]
 61 |         depth = dets[i][cls_ind][j][4]
 62 |         dimensions = dets[i][cls_ind][j][5:8]
 63 |         wh = dets[i][cls_ind][j][8:10]
 64 |         locations, rotation_y = ddd2locrot(
 65 |           center, alpha, dimensions, depth, calibs[0])
 66 |         bbox = [center[0] - wh[0] / 2, center[1] - wh[1] / 2,
 67 |                 center[0] + wh[0] / 2, center[1] + wh[1] / 2]
 68 |         pred = [alpha] + bbox + dimensions.tolist() + \
 69 |                locations.tolist() + [rotation_y, score]
 70 |         preds[cls_ind].append(pred)
 71 |       preds[cls_ind] = np.array(preds[cls_ind], dtype=np.float32)
 72 |     ret.append(preds)
 73 |   return ret
 74 | 
 75 | def ddd_post_process(dets, c, s, calibs, opt):
 76 |   # dets: batch x max_dets x dim
 77 |   # return 1-based class det list
 78 |   dets = ddd_post_process_2d(dets, c, s, opt)
 79 |   dets = ddd_post_process_3d(dets, calibs)
 80 |   return dets
 81 | 
 82 | 
 83 | def ctdet_post_process(dets, c, s, h, w, num_classes):
 84 |   # dets: batch x max_dets x dim
 85 |   # return 1-based class det dict
 86 |   ret = []
 87 |   for i in range(dets.shape[0]):
 88 |     top_preds = {}
 89 |     dets[i, :, :2] = transform_preds(
 90 |           dets[i, :, 0:2], c[i], s[i], (w, h))
 91 |     dets[i, :, 2:4] = transform_preds(
 92 |           dets[i, :, 2:4], c[i], s[i], (w, h))
 93 |     classes = dets[i, :, -1]
 94 |     for j in range(num_classes):
 95 |       inds = (classes == j)
 96 |       top_preds[j + 1] = np.concatenate([
 97 |         dets[i, inds, :4].astype(np.float32),
 98 |         dets[i, inds, 4:5].astype(np.float32)], axis=1).tolist()
 99 |     ret.append(top_preds)
100 |   return ret
101 | 
102 | 
103 | def multi_pose_post_process(dets, c, s, h, w):
104 |   # dets: batch x max_dets x 40
105 |   # return list of 39 in image coord
106 |   ret = []
107 |   for i in range(dets.shape[0]):
108 |     bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h))
109 |     pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h))
110 |     top_preds = np.concatenate(
111 |       [bbox.reshape(-1, 4), dets[i, :, 4:5], 
112 |        pts.reshape(-1, 34)], axis=1).astype(np.float32).tolist()
113 |     ret.append({np.ones(1, dtype=np.int32)[0]: top_preds})
114 |   return ret
115 | 


--------------------------------------------------------------------------------
/src/lib/utils/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | 
 7 | class AverageMeter(object):
 8 |     """Computes and stores the average and current value"""
 9 |     def __init__(self):
10 |         self.reset()
11 | 
12 |     def reset(self):
13 |         self.val = 0
14 |         self.avg = 0
15 |         self.sum = 0
16 |         self.count = 0
17 | 
18 |     def update(self, val, n=1):
19 |         self.val = val
20 |         self.sum += val * n
21 |         self.count += n
22 |         if self.count > 0:
23 |           self.avg = self.sum / self.count


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import _init_paths
  6 | 
  7 | import os
  8 | 
  9 | import torch
 10 | import torch.utils.data
 11 | from opts import opts
 12 | from models.model import create_model, load_model, save_model
 13 | from models.data_parallel import DataParallel
 14 | from logger import Logger
 15 | from datasets.dataset_factory import get_dataset
 16 | from trains.train_factory import train_factory
 17 | 
 18 | 
 19 | def main(opt):
 20 |   torch.manual_seed(opt.seed)
 21 |   torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
 22 |   Dataset = get_dataset(opt.dataset, opt.task)
 23 |   opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
 24 |   print(opt)
 25 | 
 26 |   logger = Logger(opt)
 27 | 
 28 |   os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
 29 |   opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')
 30 |   
 31 |   print('Creating model...')
 32 |   model = create_model(opt.arch, opt.heads, opt.head_conv)
 33 |   optimizer = torch.optim.Adam(model.parameters(), opt.lr)
 34 |   start_epoch = 0
 35 |   if opt.load_model != '':
 36 |     model, optimizer, start_epoch = load_model(
 37 |       model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step)
 38 | 
 39 |   Trainer = train_factory[opt.task]
 40 |   trainer = Trainer(opt, model, optimizer)
 41 |   trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)
 42 | 
 43 |   print('Setting up data...')
 44 |   val_loader = torch.utils.data.DataLoader(
 45 |       Dataset(opt, 'val'), 
 46 |       batch_size=1, 
 47 |       shuffle=False,
 48 |       num_workers=1,
 49 |       pin_memory=True
 50 |   )
 51 | 
 52 |   if opt.test:
 53 |     _, preds = trainer.val(0, val_loader)
 54 |     val_loader.dataset.run_eval(preds, opt.save_dir)
 55 |     return
 56 | 
 57 |   train_loader = torch.utils.data.DataLoader(
 58 |       Dataset(opt, 'train'), 
 59 |       batch_size=opt.batch_size, 
 60 |       shuffle=True,
 61 |       num_workers=opt.num_workers,
 62 |       pin_memory=True,
 63 |       drop_last=True
 64 |   )
 65 | 
 66 |   print('Starting training...')
 67 |   best = 1e10
 68 |   for epoch in range(start_epoch + 1, opt.num_epochs + 1):
 69 |     mark = epoch if opt.save_all else 'last'
 70 |     log_dict_train, _ = trainer.train(epoch, train_loader)
 71 |     logger.write('epoch: {} |'.format(epoch))
 72 |     for k, v in log_dict_train.items():
 73 |       logger.scalar_summary('train_{}'.format(k), v, epoch)
 74 |       logger.write('{} {:8f} | '.format(k, v))
 75 |     if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
 76 |       save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), 
 77 |                  epoch, model, optimizer)
 78 |       with torch.no_grad():
 79 |         log_dict_val, preds = trainer.val(epoch, val_loader)
 80 |       for k, v in log_dict_val.items():
 81 |         logger.scalar_summary('val_{}'.format(k), v, epoch)
 82 |         logger.write('{} {:8f} | '.format(k, v))
 83 |       if log_dict_val[opt.metric] < best:
 84 |         best = log_dict_val[opt.metric]
 85 |         save_model(os.path.join(opt.save_dir, 'model_best.pth'), 
 86 |                    epoch, model)
 87 |     else:
 88 |       save_model(os.path.join(opt.save_dir, 'model_last.pth'), 
 89 |                  epoch, model, optimizer)
 90 |     logger.write('\n')
 91 |     if epoch in opt.lr_step:
 92 |       save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), 
 93 |                  epoch, model, optimizer)
 94 |       lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1))
 95 |       print('Drop LR to', lr)
 96 |       for param_group in optimizer.param_groups:
 97 |           param_group['lr'] = lr
 98 |   logger.close()
 99 | 
100 | if __name__ == '__main__':
101 |   opt = opts().parse()
102 |   main(opt)


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import _init_paths
  6 | 
  7 | import os
  8 | import json
  9 | import cv2
 10 | import numpy as np
 11 | import time
 12 | from progress.bar import Bar
 13 | import torch
 14 | 
 15 | from external.nms import soft_nms
 16 | from opts import opts
 17 | from logger import Logger
 18 | from utils.utils import AverageMeter
 19 | from datasets.dataset_factory import dataset_factory
 20 | from detectors.detector_factory import detector_factory
 21 | 
 22 | class PrefetchDataset(torch.utils.data.Dataset):
 23 |   def __init__(self, opt, dataset, pre_process_func):
 24 |     self.images = dataset.images
 25 |     self.load_image_func = dataset.coco.loadImgs
 26 |     self.img_dir = dataset.img_dir
 27 |     self.pre_process_func = pre_process_func
 28 |     self.opt = opt
 29 |   
 30 |   def __getitem__(self, index):
 31 |     img_id = self.images[index]
 32 |     img_info = self.load_image_func(ids=[img_id])[0]
 33 |     img_path = os.path.join(self.img_dir, img_info['file_name'])
 34 |     image = cv2.imread(img_path)
 35 |     images, meta = {}, {}
 36 |     for scale in opt.test_scales:
 37 |       if opt.task == 'ddd':
 38 |         images[scale], meta[scale] = self.pre_process_func(
 39 |           image, scale, img_info['calib'])
 40 |       else:
 41 |         images[scale], meta[scale] = self.pre_process_func(image, scale)
 42 |     return img_id, {'images': images, 'image': image, 'meta': meta}
 43 | 
 44 |   def __len__(self):
 45 |     return len(self.images)
 46 | 
 47 | def prefetch_test(opt):
 48 |   os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
 49 | 
 50 |   Dataset = dataset_factory[opt.dataset]
 51 |   opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
 52 |   print(opt)
 53 |   Logger(opt)
 54 |   Detector = detector_factory[opt.task]
 55 |   
 56 |   split = 'val' if not opt.trainval else 'test'
 57 |   dataset = Dataset(opt, split)
 58 |   detector = Detector(opt)
 59 |   
 60 |   data_loader = torch.utils.data.DataLoader(
 61 |     PrefetchDataset(opt, dataset, detector.pre_process), 
 62 |     batch_size=1, shuffle=False, num_workers=1, pin_memory=True)
 63 | 
 64 |   results = {}
 65 |   num_iters = len(dataset)
 66 |   bar = Bar('{}'.format(opt.exp_id), max=num_iters)
 67 |   time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
 68 |   avg_time_stats = {t: AverageMeter() for t in time_stats}
 69 |   for ind, (img_id, pre_processed_images) in enumerate(data_loader):
 70 |     ret = detector.run(pre_processed_images)
 71 |     results[img_id.numpy().astype(np.int32)[0]] = ret['results']
 72 |     Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
 73 |                    ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
 74 |     for t in avg_time_stats:
 75 |       avg_time_stats[t].update(ret[t])
 76 |       Bar.suffix = Bar.suffix + '|{} {tm.val:.3f}s ({tm.avg:.3f}s) '.format(
 77 |         t, tm = avg_time_stats[t])
 78 |     bar.next()
 79 |   bar.finish()
 80 |   dataset.run_eval(results, opt.save_dir)
 81 | 
 82 | def test(opt):
 83 |   os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
 84 | 
 85 |   Dataset = dataset_factory[opt.dataset]
 86 |   opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
 87 |   print(opt)
 88 |   Logger(opt)
 89 |   Detector = detector_factory[opt.task]
 90 |   
 91 |   split = 'val' if not opt.trainval else 'test'
 92 |   dataset = Dataset(opt, split)
 93 |   detector = Detector(opt)
 94 | 
 95 |   results = {}
 96 |   num_iters = len(dataset)
 97 |   bar = Bar('{}'.format(opt.exp_id), max=num_iters)
 98 |   time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
 99 |   avg_time_stats = {t: AverageMeter() for t in time_stats}
100 |   for ind in range(num_iters):
101 |     img_id = dataset.images[ind]
102 |     img_info = dataset.coco.loadImgs(ids=[img_id])[0]
103 |     img_path = os.path.join(dataset.img_dir, img_info['file_name'])
104 | 
105 |     if opt.task == 'ddd':
106 |       ret = detector.run(img_path, img_info['calib'])
107 |     else:
108 |       ret = detector.run(img_path)
109 |     
110 |     results[img_id] = ret['results']
111 | 
112 |     Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
113 |                    ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
114 |     for t in avg_time_stats:
115 |       avg_time_stats[t].update(ret[t])
116 |       Bar.suffix = Bar.suffix + '|{} {:.3f} '.format(t, avg_time_stats[t].avg)
117 |     bar.next()
118 |   bar.finish()
119 |   dataset.run_eval(results, opt.save_dir)
120 | 
121 | if __name__ == '__main__':
122 |   opt = opts().parse()
123 |   if opt.not_prefetch_test:
124 |     test(opt)
125 |   else:
126 |     prefetch_test(opt)


--------------------------------------------------------------------------------
/src/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, '../lib')
12 | add_path(lib_path)
13 | 


--------------------------------------------------------------------------------
/src/tools/convert_hourglass_weight.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | MODEL_PATH = '../../models/ExtremeNet_500000.pkl'
 6 | OUT_PATH = '../../models/ExtremeNet_500000.pth'
 7 | 
 8 | import torch
 9 | state_dict = torch.load(MODEL_PATH)
10 | key_map = {'t_heats': 'hm_t', 'l_heats': 'hm_l', 'b_heats': 'hm_b', \
11 |            'r_heats': 'hm_r', 'ct_heats': 'hm_c', \
12 |            't_regrs': 'reg_t', 'l_regrs': 'reg_l', \
13 |            'b_regrs': 'reg_b', 'r_regrs': 'reg_r'}
14 | 
15 | out = {}
16 | for k in state_dict.keys():
17 |   changed = False
18 |   for m in key_map.keys():
19 |     if m in k:
20 |       if 'ct_heats' in k and m == 't_heats':
21 |         continue
22 |       new_k = k.replace(m, key_map[m])
23 |       out[new_k] = state_dict[k]
24 |       changed = True
25 |       print('replace {} to {}'.format(k, new_k))
26 |   if not changed:
27 |     out[k] = state_dict[k]
28 | data = {'epoch': 0,
29 |         'state_dict': out}
30 | torch.save(data, OUT_PATH)
31 | 


--------------------------------------------------------------------------------
/src/tools/eval_coco.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | from pycocotools.cocoeval import COCOeval
 7 | import sys
 8 | import cv2
 9 | import numpy as np
10 | import pickle
11 | import os
12 | 
13 | this_dir = os.path.dirname(__file__)
14 | ANN_PATH = this_dir + '../../data/coco/annotations/instances_val2017.json'
15 | print(ANN_PATH)
16 | if __name__ == '__main__':
17 |   pred_path = sys.argv[1]
18 |   coco = coco.COCO(ANN_PATH)
19 |   dets = coco.loadRes(pred_path)
20 |   img_ids = coco.getImgIds()
21 |   num_images = len(img_ids)
22 |   coco_eval = COCOeval(coco, dets, "bbox")
23 |   coco_eval.evaluate()
24 |   coco_eval.accumulate()
25 |   coco_eval.summarize()
26 | 
27 |   
28 | 


--------------------------------------------------------------------------------
/src/tools/eval_coco_hp.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | from pycocotools.cocoeval import COCOeval
 7 | import sys
 8 | import cv2
 9 | import numpy as np
10 | import pickle
11 | import os
12 | 
13 | this_dir = os.path.dirname(__file__)
14 | ANN_PATH = this_dir + '../../data/coco/annotations/person_keypoints_val2017.json'
15 | print(ANN_PATH)
16 | if __name__ == '__main__':
17 |   pred_path = sys.argv[1]
18 |   coco = coco.COCO(ANN_PATH)
19 |   dets = coco.loadRes(pred_path)
20 |   img_ids = coco.getImgIds()
21 |   num_images = len(img_ids)
22 |   coco_eval = COCOeval(coco, dets, "keypoints")
23 |   coco_eval.evaluate()
24 |   coco_eval.accumulate()
25 |   coco_eval.summarize()
26 |   coco_eval = COCOeval(coco, dets, "bbox")
27 |   coco_eval.evaluate()
28 |   coco_eval.accumulate()
29 |   coco_eval.summarize()
30 |   
31 | 


--------------------------------------------------------------------------------
/src/tools/get_kitti.sh:
--------------------------------------------------------------------------------
 1 | mkdir kitti
 2 | cd kitti
 3 | wget http://www.cvlibs.net/download.php?file=data_object_image_2.zip
 4 | wget http://www.cvlibs.net/download.php?file=data_object_label_2.zip
 5 | wget http://www.cvlibs.net/download.php?file=data_object_calib.zip
 6 | unzip data_object_image_2.zip
 7 | unzip data_object_label_2.zip
 8 | unzip data_object_calib.zip
 9 | 
10 | 


--------------------------------------------------------------------------------
/src/tools/get_pascal_voc.sh:
--------------------------------------------------------------------------------
 1 | mkdir voc
 2 | cd voc
 3 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
 4 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
 5 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar
 6 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
 7 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar
 8 | tar xvf VOCtrainval_06-Nov-2007.tar
 9 | tar xvf VOCtest_06-Nov-2007.tar
10 | tar xvf VOCdevkit_08-Jun-2007.tar
11 | tar xvf VOCtrainval_11-May-2012.tar
12 | tar xvf VOCdevkit_18-May-2011.tar
13 | rm VOCtrainval_06-Nov-2007.tar
14 | rm VOCtest_06-Nov-2007.tar
15 | rm VOCdevkit_08-Jun-2007.tar
16 | rm VOCtrainval_11-May-2012.tar
17 | rm VOCdevkit_18-May-2011.tar
18 | mkdir images
19 | cp VOCdevkit/VOC2007/JPEGImages/* images/
20 | cp VOCdevkit/VOC2012/JPEGImages/* images/
21 | wget https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip
22 | unzip PASCAL_VOC.zip
23 | rm PASCAL_VOC.zip
24 | mv PASCAL_VOC annotations/
25 | cd ..
26 | python merge_pascal_json.py
27 | 


--------------------------------------------------------------------------------
/src/tools/kitti_eval/README.md:
--------------------------------------------------------------------------------
 1 | # kitti_eval
 2 | 
 3 | `evaluate_object_3d_offline.cpp`evaluates your KITTI detection locally on your own computer using your validation data selected from KITTI training dataset, with the following metrics:
 4 | 
 5 | - overlap on image (AP)
 6 | - oriented overlap on image (AOS)
 7 | - overlap on ground-plane (AP)
 8 | - overlap in 3D (AP)
 9 | 
10 | Compile `evaluate_object_3d_offline.cpp` with dependency of Boost and Linux `dirent.h` (You should already have it under most Linux).
11 | 
12 | Run the evalutaion by:
13 | 
14 |     ./evaluate_object_3d_offline groundtruth_dir result_dir
15 |     
16 | Note that you don't have to detect over all KITTI training data. The evaluator only evaluates samples whose result files exist.
17 | 
18 | 
19 | ### Updates
20 | 
21 | - June, 2017:
22 |   * Fixed the bug of detection box filtering based on min height according to KITTI's note on 25.04.2017.
23 | 


--------------------------------------------------------------------------------
/src/tools/kitti_eval/evaluate_object_3d_offline:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/kitti_eval/evaluate_object_3d_offline


--------------------------------------------------------------------------------
/src/tools/kitti_eval/mail.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAIL_H
 2 | #define MAIL_H
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdarg.h>
 6 | #include <string.h>
 7 | 
 8 | class Mail {
 9 | 
10 | public:
11 | 
12 |   Mail (std::string email = "") {
13 |     if (email.compare("")) {
14 |       mail = popen("/usr/lib/sendmail -t -f noreply@cvlibs.net","w");
15 |       fprintf(mail,"To: %s\n", email.c_str());
16 |       fprintf(mail,"From: noreply@cvlibs.net\n");
17 |       fprintf(mail,"Subject: KITTI Evaluation Benchmark\n");
18 |       fprintf(mail,"\n\n");
19 |     } else {
20 |       mail = 0;
21 |     }
22 |   }
23 |   
24 |   ~Mail() {
25 |     if (mail) {
26 |       pclose(mail);
27 |     }
28 |   }
29 |   
30 |   void msg (const char *format, ...) {
31 |     va_list args;
32 |     va_start(args,format);
33 |     if (mail) {
34 |       vfprintf(mail,format,args);
35 |       fprintf(mail,"\n");
36 |     }
37 |     vprintf(format,args);
38 |     printf("\n");
39 |     va_end(args);
40 |   }
41 |     
42 | private:
43 | 
44 |   FILE *mail;
45 |   
46 | };
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/src/tools/merge_pascal_json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | # ANNOT_PATH = '/home/zxy/Datasets/VOC/annotations/'
 4 | ANNOT_PATH = 'voc/annotations/'
 5 | OUT_PATH = ANNOT_PATH
 6 | INPUT_FILES = ['pascal_train2012.json', 'pascal_val2012.json',
 7 |                'pascal_train2007.json', 'pascal_val2007.json']
 8 | OUTPUT_FILE = 'pascal_trainval0712.json'
 9 | KEYS = ['images', 'type', 'annotations', 'categories']
10 | MERGE_KEYS = ['images', 'annotations']
11 | 
12 | out = {}
13 | tot_anns = 0
14 | for i, file_name in enumerate(INPUT_FILES):
15 |   data = json.load(open(ANNOT_PATH + file_name, 'r'))
16 |   print('keys', data.keys())
17 |   if i == 0:
18 |     for key in KEYS:
19 |       out[key] = data[key]
20 |       print(file_name, key, len(data[key]))
21 |   else:
22 |     out['images'] += data['images']
23 |     for j in range(len(data['annotations'])):
24 |       data['annotations'][j]['id'] += tot_anns
25 |     out['annotations'] += data['annotations']
26 |     print(file_name, 'images', len(data['images']))
27 |     print(file_name, 'annotations', len(data['annotations']))
28 |   tot_anns = len(out['annotations'])
29 | print('tot', len(out['annotations']))
30 | json.dump(out, open(OUT_PATH + OUTPUT_FILE, 'w'))
31 | 


--------------------------------------------------------------------------------
/src/tools/reval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # Modified by Xingyi Zhou
 9 | # --------------------------------------------------------
10 | 
11 | # Reval = re-eval. Re-evaluate saved detections.
12 | from __future__ import absolute_import
13 | from __future__ import division
14 | from __future__ import print_function
15 | 
16 | import sys
17 | import os.path as osp
18 | sys.path.insert(0, osp.join(osp.dirname(__file__), 'voc_eval_lib'))
19 | 
20 | from model.test import apply_nms
21 | from datasets.pascal_voc import pascal_voc
22 | import pickle
23 | import os, argparse
24 | import numpy as np
25 | import json
26 | 
27 | def parse_args():
28 |   """
29 |   Parse input arguments
30 |   """
31 |   parser = argparse.ArgumentParser(description='Re-evaluate results')
32 |   parser.add_argument('detection_file', type=str)
33 |   parser.add_argument('--output_dir', help='results directory', type=str)
34 |   parser.add_argument('--imdb', dest='imdb_name',
35 |                       help='dataset to re-evaluate',
36 |                       default='voc_2007_test', type=str)
37 |   parser.add_argument('--matlab', dest='matlab_eval',
38 |                       help='use matlab for evaluation',
39 |                       action='store_true')
40 |   parser.add_argument('--comp', dest='comp_mode', help='competition mode',
41 |                       action='store_true')
42 |   parser.add_argument('--nms', dest='apply_nms', help='apply nms',
43 |                       action='store_true')
44 | 
45 |   if len(sys.argv) == 1:
46 |     parser.print_help()
47 |     sys.exit(1)
48 | 
49 |   args = parser.parse_args()
50 |   return args
51 | 
52 | 
53 | def from_dets(imdb_name, detection_file, args):
54 |   imdb = pascal_voc('test', '2007')
55 |   imdb.competition_mode(args.comp_mode)
56 |   imdb.config['matlab_eval'] = args.matlab_eval
57 |   with open(os.path.join(detection_file), 'rb') as f:
58 |     if 'json' in detection_file:
59 |       dets = json.load(f)
60 |     else:
61 |       dets = pickle.load(f, encoding='latin1')
62 |   # import pdb; pdb.set_trace()
63 |   if args.apply_nms:
64 |     print('Applying NMS to all detections')
65 |     test_nms = 0.3
66 |     nms_dets = apply_nms(dets, test_nms)
67 |   else:
68 |     nms_dets = dets
69 | 
70 |   print('Evaluating detections')
71 |   imdb.evaluate_detections(nms_dets)
72 | 
73 | 
74 | if __name__ == '__main__':
75 |   args = parse_args()
76 | 
77 |   imdb_name = args.imdb_name
78 |   from_dets(imdb_name, args.detection_file, args)
79 | 


--------------------------------------------------------------------------------
/src/tools/vis_pred.py:
--------------------------------------------------------------------------------
  1 | import pycocotools.coco as coco
  2 | from pycocotools.cocoeval import COCOeval
  3 | import sys
  4 | import cv2
  5 | import numpy as np
  6 | import pickle
  7 | IMG_PATH = '../../data/coco/val2017/'
  8 | ANN_PATH = '../../data/coco/annotations/instances_val2017.json'
  9 | DEBUG = True
 10 | 
 11 | def _coco_box_to_bbox(box):
 12 |   bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
 13 |                   dtype=np.int32)
 14 |   return bbox
 15 | 
 16 | _cat_ids = [
 17 |   1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 
 18 |   14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 
 19 |   24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 
 20 |   37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 
 21 |   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 
 22 |   58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 
 23 |   72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 
 24 |   82, 84, 85, 86, 87, 88, 89, 90
 25 | ]
 26 | num_classes = 80
 27 | _classes = {
 28 |   ind + 1: cat_id for ind, cat_id in enumerate(_cat_ids)
 29 | }
 30 | _to_order = {cat_id: ind for ind, cat_id in enumerate(_cat_ids)}
 31 | coco = coco.COCO(ANN_PATH)
 32 | CAT_NAMES = [coco.loadCats([_classes[i + 1]])[0]['name'] \
 33 |               for i in range(num_classes)]
 34 | COLORS = [((np.random.random((3, )) * 0.6 + 0.4)*255).astype(np.uint8) \
 35 |               for _ in range(num_classes)]
 36 | 
 37 | 
 38 | def add_box(image, bbox, sc, cat_id):
 39 |   cat_id = _to_order[cat_id]
 40 |   cat_name = CAT_NAMES[cat_id]
 41 |   cat_size  = cv2.getTextSize(cat_name + '0', cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
 42 |   color = np.array(COLORS[cat_id]).astype(np.int32).tolist()
 43 |   txt = '{}{:.0f}'.format(cat_name, sc * 10)
 44 |   if bbox[1] - cat_size[1] - 2 < 0:
 45 |     cv2.rectangle(image,
 46 |                   (bbox[0], bbox[1] + 2),
 47 |                   (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),
 48 |                   color, -1)
 49 |     cv2.putText(image, txt, 
 50 |                 (bbox[0], bbox[1] + cat_size[1] + 2), 
 51 |                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1)
 52 |   else:
 53 |     cv2.rectangle(image,
 54 |                   (bbox[0], bbox[1] - cat_size[1] - 2),
 55 |                   (bbox[0] + cat_size[0], bbox[1] - 2),
 56 |                   color, -1)
 57 |     cv2.putText(image, txt, 
 58 |                 (bbox[0], bbox[1] - 2), 
 59 |                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1)
 60 |   cv2.rectangle(image,
 61 |                 (bbox[0], bbox[1]),
 62 |                 (bbox[2], bbox[3]),
 63 |                 color, 2)
 64 |   return image
 65 | 
 66 | if __name__ == '__main__':
 67 |   dets = []
 68 |   img_ids = coco.getImgIds()
 69 |   num_images = len(img_ids)
 70 |   for k in range(1, len(sys.argv)):
 71 |     pred_path = sys.argv[k]
 72 |     dets.append(coco.loadRes(pred_path))
 73 |   # import pdb; pdb.set_trace()
 74 |   for i, img_id in enumerate(img_ids):
 75 |     img_info = coco.loadImgs(ids=[img_id])[0]
 76 |     img_path = IMG_PATH + img_info['file_name']
 77 |     img = cv2.imread(img_path)
 78 |     gt_ids = coco.getAnnIds(imgIds=[img_id])
 79 |     gts = coco.loadAnns(gt_ids)
 80 |     gt_img = img.copy()
 81 |     for j, pred in enumerate(gts):
 82 |       bbox = _coco_box_to_bbox(pred['bbox'])
 83 |       cat_id = pred['category_id']
 84 |       gt_img = add_box(gt_img, bbox, 0, cat_id)
 85 |     for k in range(len(dets)):
 86 |       pred_ids = dets[k].getAnnIds(imgIds=[img_id])
 87 |       preds = dets[k].loadAnns(pred_ids)
 88 |       pred_img = img.copy()
 89 |       for j, pred in enumerate(preds):
 90 |         bbox = _coco_box_to_bbox(pred['bbox'])
 91 |         sc = pred['score']
 92 |         cat_id = pred['category_id']
 93 |         if sc > 0.2:
 94 |           pred_img = add_box(pred_img, bbox, sc, cat_id)
 95 |       cv2.imshow('pred{}'.format(k), pred_img)
 96 |       # cv2.imwrite('vis/{}_pred{}.png'.format(i, k), pred_img)
 97 |     cv2.imshow('gt', gt_img)
 98 |     # cv2.imwrite('vis/{}_gt.png'.format(i), gt_img)
 99 |     cv2.waitKey()
100 |   # coco_eval.evaluate()
101 |   # coco_eval.accumulate()
102 |   # coco_eval.summarize()
103 | 
104 |   
105 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Xinlei Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | clean:
5 | 	rm -rf */*.pyc
6 | 	rm -rf */*.so
7 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/voc_eval_lib/__init__.py


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/voc_eval_lib/datasets/__init__.py


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/datasets/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 
57 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def unique_boxes(boxes, scale=1.0):
14 |   """Return indices of unique boxes."""
15 |   v = np.array([1, 1e3, 1e6, 1e9])
16 |   hashes = np.round(boxes * scale).dot(v)
17 |   _, index = np.unique(hashes, return_index=True)
18 |   return np.sort(index)
19 | 
20 | 
21 | def xywh_to_xyxy(boxes):
22 |   """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 |   return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 | 
25 | 
26 | def xyxy_to_xywh(boxes):
27 |   """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 |   return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 | 
30 | 
31 | def validate_boxes(boxes, width=0, height=0):
32 |   """Check that a set of boxes are valid."""
33 |   x1 = boxes[:, 0]
34 |   y1 = boxes[:, 1]
35 |   x2 = boxes[:, 2]
36 |   y2 = boxes[:, 3]
37 |   assert (x1 >= 0).all()
38 |   assert (y1 >= 0).all()
39 |   assert (x2 >= x1).all()
40 |   assert (y2 >= y1).all()
41 |   assert (x2 < width).all()
42 |   assert (y2 < height).all()
43 | 
44 | 
45 | def filter_small_boxes(boxes, min_size):
46 |   w = boxes[:, 2] - boxes[:, 0]
47 |   h = boxes[:, 3] - boxes[:, 1]
48 |   keep = np.where((w >= min_size) & (h > min_size))[0]
49 |   return keep
50 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/voc_eval_lib/model/__init__.py


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/model/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | 
13 | def bbox_transform(ex_rois, gt_rois):
14 |   ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
15 |   ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
16 |   ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
17 |   ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
18 | 
19 |   gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
20 |   gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
21 |   gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
22 |   gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
23 | 
24 |   targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
25 |   targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
26 |   targets_dw = np.log(gt_widths / ex_widths)
27 |   targets_dh = np.log(gt_heights / ex_heights)
28 | 
29 |   targets = np.vstack(
30 |     (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
31 |   return targets
32 | 
33 | 
34 | def bbox_transform_inv(boxes, deltas):
35 |   if boxes.shape[0] == 0:
36 |     return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
37 | 
38 |   boxes = boxes.astype(deltas.dtype, copy=False)
39 |   widths = boxes[:, 2] - boxes[:, 0] + 1.0
40 |   heights = boxes[:, 3] - boxes[:, 1] + 1.0
41 |   ctr_x = boxes[:, 0] + 0.5 * widths
42 |   ctr_y = boxes[:, 1] + 0.5 * heights
43 | 
44 |   dx = deltas[:, 0::4]
45 |   dy = deltas[:, 1::4]
46 |   dw = deltas[:, 2::4]
47 |   dh = deltas[:, 3::4]
48 |   
49 |   pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
50 |   pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
51 |   pred_w = np.exp(dw) * widths[:, np.newaxis]
52 |   pred_h = np.exp(dh) * heights[:, np.newaxis]
53 | 
54 |   pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
55 |   # x1
56 |   pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
57 |   # y1
58 |   pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
59 |   # x2
60 |   pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
61 |   # y2
62 |   pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
63 | 
64 |   return pred_boxes
65 | 
66 | 
67 | def clip_boxes(boxes, im_shape):
68 |   """
69 |   Clip boxes to image boundaries.
70 |   """
71 | 
72 |   # x1 >= 0
73 |   boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
74 |   # y1 >= 0
75 |   boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
76 |   # x2 < im_shape[1]
77 |   boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
78 |   # y2 < im_shape[0]
79 |   boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
80 |   return boxes
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/model/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from model.config import cfg
12 | from nms.gpu_nms import gpu_nms
13 | from nms.cpu_nms import cpu_nms
14 | 
15 | def nms(dets, thresh, force_cpu=False):
16 |   """Dispatch to either CPU or GPU NMS implementations."""
17 | 
18 |   if dets.shape[0] == 0:
19 |     return []
20 |   if cfg.USE_GPU_NMS and not force_cpu:
21 |     return gpu_nms(dets, thresh, device_id=0)
22 |   else:
23 |     return cpu_nms(dets, thresh)
24 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/nms/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/voc_eval_lib/nms/.gitignore


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterNet/4c50fd3a46bdf63dbf2082c5cbb3458d39579e6c/src/tools/voc_eval_lib/nms/__init__.py


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | import numpy as np
 11 | from distutils.core import setup
 12 | from distutils.extension import Extension
 13 | from Cython.Distutils import build_ext
 14 | 
 15 | def find_in_path(name, path):
 16 |     "Find a file in a search path"
 17 |     #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 18 |     for dir in path.split(os.pathsep):
 19 |         binpath = pjoin(dir, name)
 20 |         if os.path.exists(binpath):
 21 |             return os.path.abspath(binpath)
 22 |     return None
 23 | 
 24 | def locate_cuda():
 25 |     """Locate the CUDA environment on the system
 26 | 
 27 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 28 |     and values giving the absolute path to each directory.
 29 | 
 30 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 31 |     is based on finding 'nvcc' in the PATH.
 32 |     """
 33 | 
 34 |     # first check if the CUDAHOME env variable is in use
 35 |     if 'CUDAHOME' in os.environ:
 36 |         home = os.environ['CUDAHOME']
 37 |         nvcc = pjoin(home, 'bin', 'nvcc')
 38 |     else:
 39 |         # otherwise, search the PATH for NVCC
 40 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 41 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 42 |         if nvcc is None:
 43 |             raise EnvironmentError('The nvcc binary could not be '
 44 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 45 |         home = os.path.dirname(os.path.dirname(nvcc))
 46 | 
 47 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 48 |                   'include': pjoin(home, 'include'),
 49 |                   'lib64': pjoin(home, 'lib64')}
 50 |     for k, v in cudaconfig.items():
 51 |         if not os.path.exists(v):
 52 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 53 | 
 54 |     return cudaconfig
 55 | CUDA = locate_cuda()
 56 | 
 57 | # Obtain the numpy include directory.  This logic works across numpy versions.
 58 | try:
 59 |     numpy_include = np.get_include()
 60 | except AttributeError:
 61 |     numpy_include = np.get_numpy_include()
 62 | 
 63 | def customize_compiler_for_nvcc(self):
 64 |     """inject deep into distutils to customize how the dispatch
 65 |     to gcc/nvcc works.
 66 | 
 67 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 68 |     injected in, and still have the right customizations (i.e.
 69 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 70 |     the OO route, I have this. Note, it's kindof like a wierd functional
 71 |     subclassing going on."""
 72 | 
 73 |     # tell the compiler it can processes .cu
 74 |     self.src_extensions.append('.cu')
 75 | 
 76 |     # save references to the default compiler_so and _comple methods
 77 |     default_compiler_so = self.compiler_so
 78 |     super = self._compile
 79 | 
 80 |     # now redefine the _compile method. This gets executed for each
 81 |     # object but distutils doesn't have the ability to change compilers
 82 |     # based on source extension: we add it.
 83 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 84 |         print(extra_postargs)
 85 |         if os.path.splitext(src)[1] == '.cu':
 86 |             # use the cuda for .cu files
 87 |             self.set_executable('compiler_so', CUDA['nvcc'])
 88 |             # use only a subset of the extra_postargs, which are 1-1 translated
 89 |             # from the extra_compile_args in the Extension class
 90 |             postargs = extra_postargs['nvcc']
 91 |         else:
 92 |             postargs = extra_postargs['gcc']
 93 | 
 94 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 95 |         # reset the default compiler_so, which we might have changed for cuda
 96 |         self.compiler_so = default_compiler_so
 97 | 
 98 |     # inject our redefined _compile method into the class
 99 |     self._compile = _compile
100 | 
101 | # run the customize_compiler
102 | class custom_build_ext(build_ext):
103 |     def build_extensions(self):
104 |         customize_compiler_for_nvcc(self.compiler)
105 |         build_ext.build_extensions(self)
106 | 
107 | ext_modules = [
108 |     Extension(
109 |         "utils.cython_bbox",
110 |         ["utils/bbox.pyx"],
111 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
112 |         include_dirs = [numpy_include]
113 |     ),
114 |     Extension(
115 |         "nms.cpu_nms",
116 |         ["nms/cpu_nms.pyx"],
117 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
118 |         include_dirs = [numpy_include]
119 |     ),
120 |     Extension('nms.gpu_nms',
121 |         ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
122 |         library_dirs=[CUDA['lib64']],
123 |         libraries=['cudart'],
124 |         language='c++',
125 |         runtime_library_dirs=[CUDA['lib64']],
126 |         # this syntax is specific to this build system
127 |         # we're only going to use certain compiler args with nvcc and not with gcc
128 |         # the implementation of this trick is in customize_compiler() below
129 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
130 |                             'nvcc': ['-arch=sm_61',
131 |                                      '--ptxas-options=-v',
132 |                                      '-c',
133 |                                      '--compiler-options',
134 |                                      "'-fPIC'"]},
135 |         include_dirs = [numpy_include, CUDA['include']]
136 |     )
137 | ]
138 | 
139 | setup(
140 |     name='tf_faster_rcnn',
141 |     ext_modules=ext_modules,
142 |     # inject our custom trigger
143 |     cmdclass={'build_ext': custom_build_ext},
144 | )
145 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.h
4 | *.hpp
5 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 
57 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import cv2
15 | 
16 | 
17 | def im_list_to_blob(ims):
18 |   """Convert a list of images into a network input.
19 | 
20 |   Assumes images are already prepared (means subtracted, BGR order, ...).
21 |   """
22 |   max_shape = np.array([im.shape for im in ims]).max(axis=0)
23 |   num_images = len(ims)
24 |   blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
25 |                   dtype=np.float32)
26 |   for i in range(num_images):
27 |     im = ims[i]
28 |     blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
29 | 
30 |   return blob
31 | 
32 | 
33 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
34 |   """Mean subtract and scale an image for use in a blob."""
35 |   im = im.astype(np.float32, copy=False)
36 |   im -= pixel_means
37 |   im_shape = im.shape
38 |   im_size_min = np.min(im_shape[0:2])
39 |   im_size_max = np.max(im_shape[0:2])
40 |   im_scale = float(target_size) / float(im_size_min)
41 |   # Prevent the biggest axis from being more than MAX_SIZE
42 |   if np.round(im_scale * im_size_max) > max_size:
43 |     im_scale = float(max_size) / float(im_size_max)
44 |   im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
45 |                   interpolation=cv2.INTER_LINEAR)
46 | 
47 |   return im, im_scale
48 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/src/tools/voc_eval_lib/utils/visualization.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from six.moves import range
12 | import PIL.Image as Image
13 | import PIL.ImageColor as ImageColor
14 | import PIL.ImageDraw as ImageDraw
15 | import PIL.ImageFont as ImageFont
16 | 
17 | STANDARD_COLORS = [
18 |     'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
19 |     'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
20 |     'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
21 |     'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
22 |     'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
23 |     'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
24 |     'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
25 |     'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
26 |     'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
27 |     'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
28 |     'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
29 |     'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
30 |     'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
31 |     'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
32 |     'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
33 |     'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
34 |     'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
35 |     'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
36 |     'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
37 |     'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
38 |     'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
39 |     'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
40 |     'WhiteSmoke', 'Yellow', 'YellowGreen'
41 | ]
42 | 
43 | NUM_COLORS = len(STANDARD_COLORS)
44 | 
45 | try:
46 |   FONT = ImageFont.truetype('arial.ttf', 24)
47 | except IOError:
48 |   FONT = ImageFont.load_default()
49 | 
50 | def _draw_single_box(image, xmin, ymin, xmax, ymax, display_str, font, color='black', thickness=4):
51 |   draw = ImageDraw.Draw(image)
52 |   (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
53 |   draw.line([(left, top), (left, bottom), (right, bottom),
54 |              (right, top), (left, top)], width=thickness, fill=color)
55 |   text_bottom = bottom
56 |   # Reverse list and print from bottom to top.
57 |   text_width, text_height = font.getsize(display_str)
58 |   margin = np.ceil(0.05 * text_height)
59 |   draw.rectangle(
60 |       [(left, text_bottom - text_height - 2 * margin), (left + text_width,
61 |                                                         text_bottom)],
62 |       fill=color)
63 |   draw.text(
64 |       (left + margin, text_bottom - text_height - margin),
65 |       display_str,
66 |       fill='black',
67 |       font=font)
68 | 
69 |   return image
70 | 
71 | def draw_bounding_boxes(image, gt_boxes, im_info):
72 |   num_boxes = gt_boxes.shape[0]
73 |   gt_boxes_new = gt_boxes.copy()
74 |   gt_boxes_new[:,:4] = np.round(gt_boxes_new[:,:4].copy() / im_info[2])
75 |   disp_image = Image.fromarray(np.uint8(image[0]))
76 | 
77 |   for i in range(num_boxes):
78 |     this_class = int(gt_boxes_new[i, 4])
79 |     disp_image = _draw_single_box(disp_image, 
80 |                                 gt_boxes_new[i, 0],
81 |                                 gt_boxes_new[i, 1],
82 |                                 gt_boxes_new[i, 2],
83 |                                 gt_boxes_new[i, 3],
84 |                                 'N%02d-C%02d' % (i, this_class),
85 |                                 FONT,
86 |                                 color=STANDARD_COLORS[this_class % NUM_COLORS])
87 | 
88 |   image[0, :] = np.array(disp_image)
89 |   return image
90 | 


--------------------------------------------------------------------------------