├── .gitignore
├── .travis.yml
├── LICENSE
├── NOTICE
├── README.md
├── data
    └── .gitignore
├── exp
    └── .gitignore
├── experiments
    ├── single_pose_movenet_active.sh
    └── single_pose_movenet_coco.sh
├── images
    └── NOTICE
├── models
    ├── .gitignore
    └── movenet.pth
├── readme
    ├── DATA.md
    ├── DEVELOP.md
    ├── GETTING_STARTED.md
    ├── INSTALL.md
    ├── MODEL_ZOO.md
    ├── det1.png
    ├── det2.png
    ├── fig2.png
    ├── pose1.png
    ├── pose2.png
    └── pose3.png
├── requirements.txt
└── src
    ├── _init_paths.py
    ├── demo.py
    ├── lib
        ├── datasets
        │   ├── dataset
        │   │   ├── active.py
        │   │   └── coco_hp.py
        │   ├── dataset_factory.py
        │   └── sample
        │   │   ├── multi_pose.py
        │   │   └── single_pose.py
        ├── detectors
        │   ├── base_detector.py
        │   ├── detector_factory.py
        │   ├── multi_pose.py
        │   └── single_pose.py
        ├── external
        │   ├── .gitignore
        │   ├── Makefile
        │   ├── __init__.py
        │   ├── nms.pyx
        │   └── setup.py
        ├── logger.py
        ├── models
        │   ├── data_parallel.py
        │   ├── decode.py
        │   ├── losses.py
        │   ├── model.py
        │   ├── networks
        │   │   ├── backbone_utils.py
        │   │   ├── feature_pyramid_network.py
        │   │   ├── mobilenetv2.py
        │   │   └── movenet.py
        │   ├── scatter_gather.py
        │   └── utils.py
        ├── opts.py
        ├── trains
        │   ├── base_trainer.py
        │   ├── multi_pose.py
        │   ├── single_pose.py
        │   └── train_factory.py
        └── utils
        │   ├── __init__.py
        │   ├── debugger.py
        │   ├── image.py
        │   ├── oracle_utils.py
        │   ├── post_process.py
        │   └── utils.py
    ├── main.py
    ├── test.py
    └── tools
        ├── _init_paths.py
        ├── calc_coco_overlap.py
        ├── convert_active_to_coco.py
        ├── convert_kitti_to_coco.py
        ├── convert_mpii_to_coco.py
        ├── draw.py
        ├── eval_coco.py
        ├── eval_coco_hp.py
        ├── filter_hp.py
        ├── get_kitti.sh
        ├── get_pascal_voc.sh
        ├── merge_active_coco_json.py
        ├── reval.py
        ├── tflite_weight_viewer.py
        └── vis_pred.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | *.jpg
  2 | *.txt
  3 | pre/*
  4 | legacy/*
  5 | src/_site/*
  6 | .idea/*
  7 | .jekyll-cache
  8 | .DS_Store
  9 | debug/*
 10 | *.DS_Store
 11 | *.json
 12 | images/*
 13 | images_bak/*
 14 | *.mat
 15 | src/.vscode/*
 16 | preds/*
 17 | *.h5
 18 | *.pth
 19 | *.checkpoint
 20 | # Byte-compiled / optimized / DLL files
 21 | __pycache__/
 22 | *.py[cod]
 23 | *$py.class
 24 | 
 25 | # C extensions
 26 | *.so
 27 | 
 28 | # Distribution / packaging
 29 | .Python
 30 | env/
 31 | build/
 32 | develop-eggs/
 33 | dist/
 34 | downloads/
 35 | eggs/
 36 | .eggs/
 37 | lib64/
 38 | parts/
 39 | sdist/
 40 | var/
 41 | wheels/
 42 | *.egg-info/
 43 | .installed.cfg
 44 | *.egg
 45 | 
 46 | # PyInstaller
 47 | #  Usually these files are written by a python script from a template
 48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 49 | *.manifest
 50 | *.spec
 51 | 
 52 | # Installer logs
 53 | pip-log.txt
 54 | pip-delete-this-directory.txt
 55 | 
 56 | # Unit test / coverage reports
 57 | htmlcov/
 58 | .tox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | .hypothesis/
 66 | 
 67 | # Translations
 68 | *.mo
 69 | *.pot
 70 | 
 71 | # Django stuff:
 72 | *.log
 73 | local_settings.py
 74 | 
 75 | # Flask stuff:
 76 | instance/
 77 | .webassets-cache
 78 | 
 79 | # Scrapy stuff:
 80 | .scrapy
 81 | 
 82 | # Sphinx documentation
 83 | docs/_build/
 84 | 
 85 | # PyBuilder
 86 | target/
 87 | 
 88 | # Jupyter Notebook
 89 | .ipynb_checkpoints
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # celery beat schedule file
 95 | celerybeat-schedule
 96 | 
 97 | # SageMath parsed files
 98 | *.sage.py
 99 | 
100 | # dotenv
101 | .env
102 | 
103 | # virtualenv
104 | .venv
105 | venv/
106 | ENV/
107 | 
108 | # Spyder project settings
109 | .spyderproject
110 | .spyproject
111 | 
112 | # Rope project settings
113 | .ropeproject
114 | 
115 | # mkdocs documentation
116 | /site
117 | 
118 | # mypy
119 | .mypy_cache/
120 | 
121 | # image file in images/
122 | images/*.jpg
123 | images/active/*.jpg
124 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | group: travis_latest
 2 | dist: xenial  # ubuntu-16.04
 3 | language: python
 4 | cache: pip
 5 | python:
 6 |   - 3.6
 7 |   - 3.7
 8 | install:
 9 |   - pip install flake8
10 |   - pip install -r requirements.txt
11 | before_script:
12 |   # stop the build if there are Python syntax errors or undefined names
13 |   - flake8 . --count --select=E9,F63,F72,F82 --show-source --statistics
14 |   # exit-zero treats all errors as warnings.  The GitHub editor is 127 chars wide
15 |   - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
16 | script:
17 |   - true  # add other tests here
18 | notifications:
19 |   on_success: change
20 |   on_failure: change  # `always` will be the setting once code changes slow down
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Xingyi Zhou
 4 | All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 
24 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
  1 | Portions of this software are derived from tf-faster-rcnn.
  2 | 
  3 | ==============================================================================
  4 | tf-faster-rcnn licence
  5 | ==============================================================================
  6 | 
  7 | MIT License
  8 | 
  9 | Copyright (c) 2017 Xinlei Chen
 10 | 
 11 | Permission is hereby granted, free of charge, to any person obtaining a copy
 12 | of this software and associated documentation files (the "Software"), to deal
 13 | in the Software without restriction, including without limitation the rights
 14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 15 | copies of the Software, and to permit persons to whom the Software is
 16 | furnished to do so, subject to the following conditions:
 17 | 
 18 | The above copyright notice and this permission notice shall be included in all
 19 | copies or substantial portions of the Software.
 20 | 
 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 27 | SOFTWARE.
 28 | 
 29 | 
 30 | Portions of this software are derived from human-pose-estimation.pytorch.
 31 | 
 32 | ==============================================================================
 33 | human-pose-estimation.pytorch licence
 34 | ==============================================================================
 35 |     MIT License
 36 | 
 37 |     Copyright (c) Microsoft Corporation. All rights reserved.
 38 | 
 39 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 40 |     of this software and associated documentation files (the "Software"), to deal
 41 |     in the Software without restriction, including without limitation the rights
 42 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 43 |     copies of the Software, and to permit persons to whom the Software is
 44 |     furnished to do so, subject to the following conditions:
 45 | 
 46 |     The above copyright notice and this permission notice shall be included in all
 47 |     copies or substantial portions of the Software.
 48 | 
 49 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 50 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 51 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 52 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 53 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 54 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 55 |     SOFTWARE
 56 |     
 57 | Portions of this software are derived from CornerNet.
 58 | 
 59 | ==============================================================================
 60 | CornerNet licence
 61 | ==============================================================================
 62 | 
 63 | BSD 3-Clause License
 64 | 
 65 | Copyright (c) 2018, University of Michigan
 66 | All rights reserved.
 67 | 
 68 | Redistribution and use in source and binary forms, with or without
 69 | modification, are permitted provided that the following conditions are met:
 70 | 
 71 | * Redistributions of source code must retain the above copyright notice, this
 72 |   list of conditions and the following disclaimer.
 73 | 
 74 | * Redistributions in binary form must reproduce the above copyright notice,
 75 |   this list of conditions and the following disclaimer in the documentation
 76 |   and/or other materials provided with the distribution.
 77 | 
 78 | * Neither the name of the copyright holder nor the names of its
 79 |   contributors may be used to endorse or promote products derived from
 80 |   this software without specific prior written permission.
 81 | 
 82 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 83 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 84 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 85 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 86 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 87 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 88 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 89 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 90 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 91 | 
 92 | 
 93 | Portions of this software are derived from DCNv2.
 94 | 
 95 | ==============================================================================
 96 | DCNv2 licence
 97 | ==============================================================================
 98 | 
 99 | BSD 3-Clause License
100 | 
101 | Copyright (c) 2019, Charles Shang
102 | All rights reserved.
103 | 
104 | Redistribution and use in source and binary forms, with or without
105 | modification, are permitted provided that the following conditions are met:
106 | 
107 | 1. Redistributions of source code must retain the above copyright notice, this
108 |    list of conditions and the following disclaimer.
109 | 
110 | 2. Redistributions in binary form must reproduce the above copyright notice,
111 |    this list of conditions and the following disclaimer in the documentation
112 |    and/or other materials provided with the distribution.
113 | 
114 | 3. Neither the name of the copyright holder nor the names of its
115 |    contributors may be used to endorse or promote products derived from
116 |    this software without specific prior written permission.
117 | 
118 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
119 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
120 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
121 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
122 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
123 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
124 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
125 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
126 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
127 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
128 | 
129 | ==============================================================================
130 | DLA licence
131 | ==============================================================================
132 | 
133 | BSD 3-Clause License
134 | 
135 | Copyright (c) 2018, Fisher Yu
136 | All rights reserved.
137 | 
138 | Redistribution and use in source and binary forms, with or without
139 | modification, are permitted provided that the following conditions are met:
140 | 
141 | * Redistributions of source code must retain the above copyright notice, this
142 |   list of conditions and the following disclaimer.
143 | 
144 | * Redistributions in binary form must reproduce the above copyright notice,
145 |   this list of conditions and the following disclaimer in the documentation
146 |   and/or other materials provided with the distribution.
147 | 
148 | * Neither the name of the copyright holder nor the names of its
149 |   contributors may be used to endorse or promote products derived from
150 |   this software without specific prior written permission.
151 | 
152 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
153 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
154 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
155 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
156 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
157 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
158 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
159 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
160 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
161 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/exp/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/experiments/single_pose_movenet_active.sh:
--------------------------------------------------------------------------------
 1 | # finetune scripts for movenet
 2 | cd /Users/rachel/PycharmProjects/movenet/src
 3 | # python main.py single_pose --exp_id yoga_movenet --dataset active --arch movenet --batch_size 24 --master_batch 4 --lr 5e-4 --gpus 0,1,2,3 --num_epochs 150 --lr_step 30 60 90 --num_workers 16 --load_model ../models/movenet.pth
 4 | # test   7e -3   5e-3  1e-3  5e-4  1e-4
 5 | # python test.py single_pose --exp_id yoga_movenet --dataset active --arch movenet --resume
 6 | # # flip test
 7 | # python test.py single_pose --exp_id yoga_movenet --dataset active --arch movenet --resume --flip_test
 8 | python demo.py single_pose --dataset active --arch movenet --demo ../images/1111error/ --load_model ../models/movenet_thunder.pth --K 1 --gpus -1 --debug 2 #--vis_thresh 0.0 --not_reg_offset
 9 | 
10 | cd ..
11 | 


--------------------------------------------------------------------------------
/experiments/single_pose_movenet_coco.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | python main.py single_pose --exp_id coac_movenet --dataset active_coco --dataset active_coco --arch movenet --batch_size 24 --master_batch 4 --lr 2.5e-4 --gpus 0,1,2,3 --num_epochs 250 --lr_step 120,150,180,200,230 --num_workers 16 --resume #--load_model ../models/ctdet_movenet.pth
3 | # test
4 | python test.py single_pose --exp_id coac_movenet --dataset active_coco --arch movenet --keep_res --resume
5 | # flip test
6 | python test.py single_pose --exp_id coac_movenet --dataset active_coco --arch movenet --keep_res --resume --flip_test
7 | cd ..


--------------------------------------------------------------------------------
/images/NOTICE:
--------------------------------------------------------------------------------
 1 | The demo images are licensed as United States government work:
 2 | https://www.usa.gov/government-works
 3 | 
 4 | The image files were obtained on Jan 13, 2018 from the following
 5 | URLs.
 6 | 
 7 | 16004479832_a748d55f21_k.jpg
 8 | https://www.flickr.com/photos/archivesnews/16004479832
 9 | 
10 | 18124840932_e42b3e377c_k.jpg
11 | https://www.flickr.com/photos/usnavy/18124840932
12 | 
13 | 33887522274_eebd074106_k.jpg
14 | https://www.flickr.com/photos/usaid_pakistan/33887522274
15 | 
16 | 15673749081_767a7fa63a_k.jpg
17 | https://www.flickr.com/photos/usnavy/15673749081
18 | 
19 | 34501842524_3c858b3080_k.jpg
20 | https://www.flickr.com/photos/departmentofenergy/34501842524
21 | 
22 | 24274813513_0cfd2ce6d0_k.jpg
23 | https://www.flickr.com/photos/dhsgov/24274813513
24 | 
25 | 19064748793_bb942deea1_k.jpg
26 | https://www.flickr.com/photos/statephotos/19064748793
27 | 
28 | 33823288584_1d21cf0a26_k.jpg
29 | https://www.flickr.com/photos/cbpphotos/33823288584
30 | 
31 | 17790319373_bd19b24cfc_k.jpg
32 | https://www.flickr.com/photos/secdef/17790319373
33 | 


--------------------------------------------------------------------------------
/models/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/models/movenet.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/models/movenet.pth


--------------------------------------------------------------------------------
/readme/DATA.md:
--------------------------------------------------------------------------------
  1 | # Dataset preparation
  2 | 
  3 | If you want to reproduce the results in the paper for benchmark evaluation and training, you will need to setup dataset.
  4 | 
  5 | 
  6 | ### COCO
  7 | - Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download).
  8 | - Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download). 
  9 | - Place the data (or create symlinks) to make the data folder like:
 10 | 
 11 |   ~~~
 12 |   ${CenterNet_ROOT}
 13 |   |-- data
 14 |   `-- |-- coco
 15 |       `-- |-- annotations
 16 |           |   |-- instances_train2017.json
 17 |           |   |-- instances_val2017.json
 18 |           |   |-- person_keypoints_train2017.json
 19 |           |   |-- person_keypoints_val2017.json
 20 |           |   |-- image_info_test-dev2017.json
 21 |           |---|-- train2017
 22 |           |---|-- val2017
 23 |           `---|-- test2017
 24 |   ~~~
 25 | 
 26 | - [Optional] If you want to train ExtremeNet, generate extreme point annotation from segmentation:
 27 |     
 28 |     ~~~
 29 |     cd $CenterNet_ROOT/tools/
 30 |     python gen_coco_extreme_points.py
 31 |     ~~~
 32 |   It generates `instances_extreme_train2017.json` and `instances_extreme_val2017.json` in `data/coco/annotations/`. 
 33 | 
 34 | ### Pascal VOC
 35 | 
 36 | - Run
 37 | 
 38 |     ~~~
 39 |     cd $CenterNet_ROOT/tools/
 40 |     bash get_pascal_voc.sh
 41 |     ~~~
 42 | - The above script includes:
 43 |     - Download, unzip, and move Pascal VOC images from the [VOC website](http://host.robots.ox.ac.uk/pascal/VOC/). 
 44 |     - [Download](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip) Pascal VOC annotation in COCO format (from [Detectron](https://github.com/facebookresearch/Detectron/tree/master/detectron/datasets/data)). 
 45 |     - Combine train/val 2007/2012 annotation files into a single json. 
 46 | 
 47 | 
 48 | - Move the created `voc` folder to `data` (or create symlinks) to make the data folder like:
 49 | 
 50 |   ~~~
 51 |   ${CenterNet_ROOT}
 52 |   |-- data
 53 |   `-- |-- voc
 54 |       `-- |-- annotations
 55 |           |   |-- pascal_trainval0712.json
 56 |           |   |-- pascal_test2017.json
 57 |           |-- images
 58 |           |   |-- 000001.jpg
 59 |           |   ......
 60 |           `-- VOCdevkit
 61 |   
 62 |   ~~~
 63 |   The `VOCdevkit` folder is needed to run the evaluation script from [faster rcnn](https://github.com/rbgirshick/py-faster-rcnn/blob/master/tools/reval.py).
 64 | 
 65 | ### KITTI
 66 | 
 67 | - Download [images](http://www.cvlibs.net/download.php?file=data_object_image_2.zip), [annotations](http://www.cvlibs.net/download.php?file=data_object_label_2.zip), and [calibrations](http://www.cvlibs.net/download.php?file=data_object_calib.zip) from [KITTI website](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d) and unzip.
 68 | 
 69 | - Download the train-val split of [3DOP](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz) and [SubCNN](https://github.com/tanshen/SubCNN/tree/master/fast-rcnn/data/KITTI) and place the data as below
 70 | 
 71 |   ~~~
 72 |   ${CenterNet_ROOT}
 73 |   |-- data
 74 |   `-- |-- kitti
 75 |       `-- |-- training
 76 |           |   |-- image_2
 77 |           |   |-- label_2
 78 |           |   |-- calib
 79 |           |-- ImageSets_3dop
 80 |           |   |-- test.txt
 81 |           |   |-- train.txt
 82 |           |   |-- val.txt
 83 |           |   |-- trainval.txt
 84 |           `-- ImageSets_subcnn
 85 |               |-- test.txt
 86 |               |-- train.txt
 87 |               |-- val.txt
 88 |               |-- trainval.txt
 89 |   ~~~
 90 | 
 91 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. You can set `DEBUG=True` in `line 5` to visualize the annotation.
 92 | 
 93 | - Link image folder
 94 | 
 95 |   ~~~
 96 |   cd ${CenterNet_ROOT}/data/kitti/
 97 |   mkdir images
 98 |   ln -s training/image_2 images/trainval
 99 |   ~~~
100 | 
101 | - The data structure should look like:
102 | 
103 |   ~~~
104 |   ${CenterNet_ROOT}
105 |   |-- data
106 |   `-- |-- kitti
107 |       `-- |-- annotations
108 |           |   |-- kitti_3dop_train.json
109 |           |   |-- kitti_3dop_val.json
110 |           |   |-- kitti_subcnn_train.json
111 |           |   |-- kitti_subcnn_val.json
112 |           `-- images
113 |               |-- trainval
114 |               |-- test
115 |   ~~~
116 | 


--------------------------------------------------------------------------------
/readme/DEVELOP.md:
--------------------------------------------------------------------------------
 1 | # Develop
 2 | 
 3 | This document provides tutorials to develop CenterNet. `lib/src/opts` lists a few more options that the current version supports.
 4 | 
 5 | ## New dataset
 6 | Basically there are three steps:
 7 | 
 8 | - Convert the dataset annotation to [COCO format](http://cocodataset.org/#format-data). Please refer to [src/tools/convert_kitti_to_coco.py](../src/tools/convert_kitti_to_coco.py) for an example to convert kitti format to coco format.
 9 | - Create a dataset intilization file in `src/lib/datasets/dataset`. In most cases you can just copy `src/lib/datasets/dataset/coco.py` to your dataset name and change the category information, and annotation path.
10 | - Import your dataset at `src/lib/datasets/dataset_factory`.
11 | 
12 | ## New task
13 | 
14 | You will need to add files to `src/lib/datasets/sample/`, `src/lib/datasets/trains/`, and `src/lib/datasets/detectors/`, which specify the data generation during training, the training targets, and the testing, respectively.
15 | 
16 | ## New architecture
17 | 
18 | - Add your model file to `src/lib/models/networks/`. The model should accept a dict `heads` of `{name: channels}`, which specify the name of each network output and its number of channels. Make sure your model returns a list (for multiple stages. Single stage model should return a list containing a single element.). The element of the list is a dict contraining the same keys with `heads`.
19 | - Add your model in `model_factory` of `src/lib/models/model.py`.
20 | 


--------------------------------------------------------------------------------
/readme/GETTING_STARTED.md:
--------------------------------------------------------------------------------
 1 | # Getting Started
 2 | 
 3 | This document provides tutorials to train and evaluate CenterNet. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md).
 4 | 
 5 | ## Benchmark evaluation
 6 | 
 7 | First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`. 
 8 | 
 9 | ### COCO
10 | 
11 | To evaluate COCO object detection with DLA
12 | run
13 | 
14 | ~~~
15 | python test.py ctdet --exp_id coco_dla --keep_res --load_model ../models/ctdet_coco_dla_2x.pth
16 | ~~~
17 | 
18 | This will give an AP of `37.4` if setup correctly. `--keep_res` is for keep the original image resolution. Without `--keep_res` it will resize the images to `512 x 512`. You can add `--flip_test` and `--flip_test --test_scales 0.5,0.75,1,1.25,1.5` to the above commend, for flip test and multi_scale test, respectively. The expected APs are `39.2` and `41.7`, respectively.
19 | 
20 | To test with hourglass net, run
21 | 
22 | ~~~
23 | python test.py ctdet --exp_id coco_hg --arch hourglass --fix_res --load_model ../models/ctdet_coco_hg.pth
24 | ~~~
25 | 
26 | Similarly, to evaluate human pose estimation, run the following command for dla
27 | 
28 | ~~~
29 | python test.py multi_pose --exp_id dla --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
30 | ~~~
31 | 
32 | and the following for hourglass
33 | 
34 | ~~~
35 | python test.py multi_pose --exp_id hg --arch hourglass --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
36 | ~~~
37 | 
38 | The expected results can be found in the model zoo.
39 | 
40 | ### Pascal
41 | 
42 | To evaluate object detection on Pascal VOC (test2007), run
43 | 
44 | ~~~
45 | python test.py ctdet --exp_id dla --dataset pascal --load_model ../models/ctdet_pascal_dla.pth --flip_test
46 | ~~~
47 | 
48 | Note that we fix the resolution during testing.
49 | And you can change to other network architectures and resolutions by specifying `--arch` and `--input_res 512`.
50 | 
51 | ### KITTI
52 | 
53 | To evaluate the kitti dataset, first compile the evaluation tool (from [here](https://github.com/prclibo/kitti_eval)):
54 | 
55 | ~~~
56 | cd CenterNet_ROOT/src/tools/kitti_eval
57 | g++ -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp -O3
58 | ~~~
59 | 
60 | Then run the evaluation with pretrained model:
61 | 
62 | ~~~
63 | python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --load_model ../models/ddd_3dop.pth
64 | ~~~
65 | 
66 | to evaluate the 3DOP split. For the subcnn split, change `--kitti_split` to `subcnn` and load the corresponding models.
67 | Note that test time augmentation is not trivially applicable for 3D orientation.
68 | 
69 | ## Training
70 | 
71 | We have packed all the training scripts in the [experiments](../experiments) folder.
72 | The experiment names are correspond to the model name in the [model zoo](MODEL_ZOO.md).
73 | The number of GPUs for each experiments can be found in the scripts and the model zoo.
74 | In the case that you don't have 8 GPUs, you can follow the [linear learning rate rule](https://arxiv.org/abs/1706.02677) to scale the learning rate as batch size.
75 | For example, to train COCO object detection with dla on 2 GPUs, run
76 | 
77 | ~~~
78 | python main.py ctdet --exp_id coco_dla --batch_size 32 --master_batch 15 --lr 1.25e-4  --gpus 0,1
79 | ~~~
80 | 
81 | The default learning rate is `1.25e-4` for batch size `32` (on 2 GPUs).
82 | By default, pytorch evenly splits the total batch size to each GPUs.
83 | `--master_batch` allows using different batchsize for the master GPU, which usually costs more memory than other GPUs.
84 | If it encounters GPU memory out, using slightly less batch size (e.g., `112` of `128`) with the same learning is fine.
85 | 
86 | If the training is terminated before finishing, you can use the same commond with `--resume` to resume training. It will found the lastest model with the same `exp_id`.
87 | 
88 | Our HourglassNet model is finetuned from the pretrained [ExtremeNet model](https://drive.google.com/file/d/1omiOUjWCrFbTJREypuZaODu0bOlF_7Fg/view?usp=sharing) (from the [ExtremeNet repo](https://github.com/xingyizhou/ExtremeNet)).
89 | You will need to download the model, run `python convert_hourglass_weight.py` to convert the model format, and load the model for training (see the [script](../experiments/ctdet_coco_hg.sh)).
90 | 


--------------------------------------------------------------------------------
/readme/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | 
 4 | The code was tested on Ubuntu 16.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v0.4.1. NVIDIA GPUs are needed for both training and testing.
 5 | After install Anaconda:
 6 | 
 7 | 0. [Optional but recommended] create a new conda environment. 
 8 | 
 9 |     ~~~
10 |     conda create --name CenterNet python=3.6
11 |     ~~~
12 |     And activate the environment.
13 |     
14 |     ~~~
15 |     conda activate CenterNet
16 |     ~~~
17 | 
18 | 1. Install pytorch0.4.1:
19 | 
20 |     ~~~
21 |     conda install pytorch=0.4.1 torchvision -c pytorch
22 |     ~~~
23 |     
24 |     And disable cudnn batch normalization(Due to [this issue](https://github.com/xingyizhou/pytorch-pose-hg-3d/issues/16)).
25 |     
26 |      ~~~
27 |     # PYTORCH=/path/to/pytorch # usually ~/anaconda3/envs/CenterNet/lib/python3.6/site-packages/
28 |     # for pytorch v0.4.0
29 |     sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
30 |     # for pytorch v0.4.1
31 |     sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
32 |      ~~~
33 |      
34 |      For other pytorch version, you can manually open `torch/nn/functional.py` and find the line with `torch.batch_norm` and replace the `torch.backends.cudnn.enabled` with `False`. We observed slight worse training results without doing so. 
35 |      
36 | 2. Install [COCOAPI](https://github.com/cocodataset/cocoapi):
37 | 
38 |     ~~~
39 |     # COCOAPI=/path/to/clone/cocoapi
40 |     git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
41 |     cd $COCOAPI/PythonAPI
42 |     make
43 |     python setup.py install --user
44 |     ~~~
45 | 
46 | 3. Clone this repo:
47 | 
48 |     ~~~
49 |     CenterNet_ROOT=/path/to/clone/CenterNet
50 |     git clone https://github.com/xingyizhou/CenterNet $CenterNet_ROOT
51 |     ~~~
52 | 
53 | 
54 | 4. Install the requirements
55 | 
56 |     ~~~
57 |     pip install -r requirements.txt
58 |     ~~~
59 |     
60 |     
61 | 5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_0.4)).
62 | 
63 |     ~~~
64 |     cd $CenterNet_ROOT/src/lib/models/networks/DCNv2
65 |     ./make.sh
66 |     ~~~
67 | 6. [Optional, only required if you are using extremenet or multi-scale testing] Compile NMS if your want to use multi-scale testing or test ExtremeNet.
68 | 
69 |     ~~~
70 |     cd $CenterNet_ROOT/src/lib/external
71 |     make
72 |     ~~~
73 | 
74 | 7. Download pertained models for [detection]() or [pose estimation]() and move them to `$CenterNet_ROOT/models/`. More models can be found in [Model zoo](MODEL_ZOO.md).
75 | 


--------------------------------------------------------------------------------
/readme/MODEL_ZOO.md:
--------------------------------------------------------------------------------
 1 | # MODEL ZOO
 2 | 
 3 | ### Common settings and notes
 4 | 
 5 | - The experiments are run with pytorch 0.4.1, CUDA 9.0, and CUDNN 7.1.
 6 | - Training times are measured on our servers with 8 TITAN V GPUs (12 GB Memeory).
 7 | - Testing times are measured on our local machine with TITAN Xp GPU. 
 8 | - The models can be downloaded directly from [Google drive](https://drive.google.com/open?id=1px-Xg7jXSC79QqgsD1AAGJQkuf5m0zh_).
 9 | 
10 | ## Object Detection
11 | 
12 | 
13 | ### COCO
14 | 
15 | | Model                    | GPUs |Train time(h)| Test time (ms) |   AP               |  Download | 
16 | |--------------------------|------|-------------|----------------|--------------------|-----------|
17 | |[ctdet\_coco\_hg](../experiments/ctdet_coco_hg.sh)       |   5  |109          | 71 / 129 / 674 | 40.3 / 42.2 / 45.1 | [model](https://drive.google.com/open?id=1cNyDmyorOduMRsgXoUnuyUiF6tZNFxaG) |
18 | |[ctdet\_coco\_dla\_1x](../experiments/ctdet_coco_dla_1x.sh)  |   8  | 57          |  19 / 36 / 248 | 36.3 / 38.2 / 40.7 | [model](https://drive.google.com/open?id=1r89_KNXyDyvUp8NggduG9uKQTMU2DsK_) |
19 | |[ctdet\_coco\_dla\_2x](../experiments/ctdet_coco_dla_2x.sh)  |   8  | 92          |  19 / 36 / 248 | 37.4 / 39.2 / 41.7 | [model](https://drive.google.com/open?id=1pl_-ael8wERdUREEnaIfqOV_VF2bEVRT) |
20 | |[ctdet\_coco\_resdcn101](../experiments/ctdet_coco_resdcn101.sh)|   8  | 65          |  22 / 40 / 259 | 34.6 / 36.2 / 39.3 | [model](https://drive.google.com/open?id=1bTJCbAc1szA9lWU-fvVw52lqR3U2TTry) |
21 | |[ctdet\_coco\_resdcn18](../experiments/ctdet_coco_resdcn18.sh) |   4  | 28          |  7 / 14 / 81   | 28.1 / 30.0 / 33.2 | [model](https://drive.google.com/open?id=1b-_sjq1Pe_dVxt5SeFmoadMfiPTPZqpz) |
22 | |[exdet\_coco\_hg](../experiments/exdet_coco_hg.sh)       |   5  |215          | 134 / 246/1340 | 35.8 / 39.8 / 42.4 | [model](https://drive.google.com/open?id=1-5bT5ZF8bXriJ-wAvOjJFrBLvZV2-mlV) |
23 | |[exdet\_coco\_dla](../experiments/exdet_coco_dla.sh)      |   8  |133          | 51 / 90 / 481  | 33.0 / 36.5 / 38.5 | [model](https://drive.google.com/open?id=1PFcEqN0KjFuq9XaqzB7TkVD3pvXZx04e) |
24 | 
25 | #### Notes
26 | 
27 | - All models are trained on COCO train 2017 and evaluated on val 2017. 
28 | - We show test time and AP with no augmentation / flip augmentation / multi scale (0.5, 0.75, 1, 1.25, 1.5) augmentation. 
29 | - Results on COCO test-dev can be found in the paper or add `--trainval` for `test.py`. 
30 | - exdet is our re-implementation of [ExtremeNet](https://github.com/xingyizhou/ExtremeNet). The testing does not include edge aggregation.
31 | - For dla and resnets, `1x` means the training schedule that train 140 epochs with learning rate dropped 10 times at the 90 and 120 epoch (following [SimpleBaseline](https://github.com/Microsoft/human-pose-estimation.pytorch)). `2x` means train 230 epochs with learning rate dropped 10 times at the 180 and 210 epoch. The training schedules are **not** carefully investigated.
32 | - The hourglass trained schedule follows [ExtremeNet](https://github.com/xingyizhou/ExtremeNet): trains 50 epochs (approximately 250000 iterations in batch size 24) and drops learning rate at the 40 epoch.
33 | - Testing time include network forwarding time, decoding time, and nms time (for ExtremeNet).
34 | - We observed up to 0.4 AP performance jitter due to randomness in training. 
35 | 
36 | ### Pascal VOC
37 | 
38 | | Model                           |GPUs| Train time (h)| Test time (ms) | mAP  | Download  |
39 | |---------------------------------|----|---------------|----------------|------|-----------|
40 | |[ctdet\_pascal\_dla\_384](../experiments/ctdet_pascal_dla_384.sh)      | 1  |15             | 20             | 79.3 | [model](https://drive.google.com/open?id=1IC3FZkxAQHm2rxoIGmS4YluYpZxwYkJf) |
41 | |[ctdet\_pascal\_dla\_512](../experiments/ctdet_pascal_dla_512.sh)      | 2  |15             | 30             | 80.7 | [model](https://drive.google.com/open?id=1jIfK9EyqzNcupxGsp3YRnEiewrIG4_Ma) |
42 | |[ctdet\_pascal\_resdcn18\_384](../experiments/ctdet_pascal_resdcn18_384.sh) | 1  |3              | 7              | 72.6 | [model](https://drive.google.com/open?id=1Kq27D1uoPZK42j2alDWmCGyqRU2ob1BX) |
43 | |[ctdet\_pascal\_resdcn18\_512](../experiments/ctdet_pascal_resdcn18_512.sh) | 1  |5              | 10             | 75.7 | [model](https://drive.google.com/open?id=1MRUJTTJ4-ZDN0Y-zQOqQBqjrQMcXFzet) |
44 | |[ctdet\_pascal\_resdcn101\_384](../experiments/ctdet_pascal_resdcn101_384.sh)| 2  |7              | 22             | 77.1 | [model](https://drive.google.com/open?id=11YXE04zILuXA5-kaYQEEg0ljNKBe6GPO) |
45 | |[ctdet\_pascal\_resdcn101\_512](../experiments/ctdet_pascal_resdcn101_512.sh)| 4  |7              | 33             | 78.7 | [model](https://drive.google.com/open?id=1xhEf-a_y2Di6YdyPpCIj0-kVFjQvDf9N) |
46 | 
47 | #### Notes
48 | - All models are trained on trainval 07+12 and tested on test 2007.
49 | - Flip test is used by default.
50 | - Training schedule: train for 70 epochs with learning rate dropped 10 times at the 45 and 60 epoch.
51 | - We observed up to 1 mAP performance jitter due to randomness in training.
52 | 
53 | ## Human pose estimation
54 | 
55 | ### COCO
56 | 
57 | | Model                    | GPUs |Train time(h)| Test time (ms) |   AP        |  Download | 
58 | |--------------------------|------|-------------|----------------|-------------|-----------|
59 | |[multi\_pose\_hg_1x](../experiments/multi_pose_hg_1x.sh)    |   5  |62           | 151            | 58.7        | [model](https://drive.google.com/open?id=1HBB5KRaSj-m-vtpGESm7_3evNP5Y84RS) |
60 | |[multi\_pose\_hg_3x](../experiments/multi_pose_hg_3x.sh)    |   5  |188          | 151            | 64.0        | [model](https://drive.google.com/open?id=1n6EvwhTbz7LglVXXlL9irJia7YuakHdB) |
61 | |[multi\_pose\_dla_1x](../experiments/multi_pose_dla_1x.sh)   |   8  |30           | 44             | 54.7        | [model](https://drive.google.com/open?id=1VeiRtuXfCbmhQNGV-XWL6elUzpuWN-4K) |
62 | |[multi\_pose\_dla_3x](../experiments/multi_pose_dla_3x.sh)   |   8  |70           | 44             | 58.9        | [model](https://drive.google.com/open?id=1PO1Ax_GDtjiemEmDVD7oPWwqQkUu28PI) |
63 | 
64 | #### Notes
65 | - All models are trained on keypoint train 2017 images which contains at least one human with keypoint annotations (64115 images).
66 | - The evaluation is done on COCO keypoint val 2017 (5000 images).
67 | - Flip test is used by default.
68 | - The models are fine-tuned from the corresponding center point detection models.
69 | - Dla training schedule: `1x`: train for 140 epochs with learning rate dropped 10 times at the 90 and 120 epoch.`3x`: train for 320 epochs with learning rate dropped 10 times at the 270 and 300 epoch.
70 | - Hourglass training schedule: `1x`: train for 50 epochs with learning rate dropped 10 times at the 40 epoch.`3x`: train for 150 epochs with learning rate dropped 10 times at the 130 epoch.
71 | 
72 | ## 3D bounding box detection
73 | 
74 | #### Notes
75 | - The 3dop split is from [3DOP](https://papers.nips.cc/paper/5644-3d-object-proposals-for-accurate-object-class-detection) and the suborn split is from [SubCNN](https://github.com/tanshen/SubCNN).
76 | - No augmentation is used in testing.
77 | - The models are trained for 70 epochs with learning rate dropped at the 45 and 60 epoch.
78 | 
79 | ### KITTI 3DOP split
80 | 
81 | |Model       |GPUs|Train time|Test time|AP-E|AP-M|AP-H|AOS-E|AOS-M|AOS-H|BEV-E|BEV-M|BEV-H| Download |
82 | |------------|----|----------|---------|----|----|----|-----|-----|-----|-----|-----|-----|----------|
83 | |[ddd_3dop](../experiments/ddd_3dop.sh)|2   | 7h       |  31ms   |96.9|87.8|79.2|93.9 |84.3 |75.7 |34.0 |30.5 |26.8 | [model](https://drive.google.com/open?id=1znsM6E-aVTkATreDuUVxoU0ajL1az8rz)|
84 | 
85 | ### KITTI SubCNN split
86 | 
87 | |Model       |GPUs|Train time|Test time|AP-E|AP-M|AP-H|AOS-E|AOS-M|AOS-H|BEV-E|BEV-M|BEV-H| Download |
88 | |------------|----|----------|---------|----|----|----|-----|-----|-----|-----|-----|-----|----------|
89 | |[ddd_sub](../experiments/ddd_sub.sh) |2   | 7h       |  31ms   |89.6|79.8|70.3|85.7 |75.2 |65.9 |34.9 |27.7 |26.4 | [model](https://drive.google.com/open?id=15XuJxTxCBnA8O37M_ghjppnWmVnjC0Hp)|


--------------------------------------------------------------------------------
/readme/det1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/det1.png


--------------------------------------------------------------------------------
/readme/det2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/det2.png


--------------------------------------------------------------------------------
/readme/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/fig2.png


--------------------------------------------------------------------------------
/readme/pose1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/pose1.png


--------------------------------------------------------------------------------
/readme/pose2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/pose2.png


--------------------------------------------------------------------------------
/readme/pose3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/pose3.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | Cython
3 | numba
4 | progress
5 | matplotlib
6 | scipy
7 | 


--------------------------------------------------------------------------------
/src/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 | 


--------------------------------------------------------------------------------
/src/demo.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import _init_paths
 6 | 
 7 | import os
 8 | import cv2
 9 | 
10 | import torch
11 | 
12 | from opts import opts
13 | from detectors.detector_factory import detector_factory
14 | 
15 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
16 | video_ext = ['mp4', 'mov', 'avi', 'mkv']
17 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
18 | 
19 | def demo(opt):
20 |   os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
21 |   opt.debug = max(opt.debug, 1)
22 |   Detector = detector_factory[opt.task]
23 |   detector = Detector(opt)
24 | 
25 |   if opt.demo == 'webcam' or \
26 |     opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext:
27 |     cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo)
28 |     detector.pause = False
29 |     while True:
30 |         success, img = cam.read()
31 |         if not success:
32 |           print("no more images, close.")
33 |           return
34 |         if opt.debug < 4:
35 |           cv2.imshow('input', img)
36 |         ret = detector.run(img)
37 |         time_str = ''
38 |         for stat in time_stats:
39 |           time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
40 |         print(time_str)
41 |         if opt.debug < 4 and cv2.waitKey(1) == 27:
42 |           return  # esc to quit
43 |   else:
44 |     if os.path.isdir(opt.demo):
45 |       image_names = []
46 |       ls = os.listdir(opt.demo)
47 |       for file_name in sorted(ls):
48 |           ext = file_name[file_name.rfind('.') + 1:].lower()
49 |           if ext in image_ext:
50 |               image_names.append(os.path.join(opt.demo, file_name))
51 |     else:
52 |       image_names = [opt.demo]
53 |     
54 |     for (image_name) in image_names:
55 |       ret = detector.run(image_name)
56 |       time_str = ''
57 |       for stat in time_stats:
58 |         time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
59 |       print(time_str)
60 | if __name__ == '__main__':
61 |   opt = opts().init()
62 |   demo(opt)
63 | 


--------------------------------------------------------------------------------
/src/lib/datasets/dataset/active.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | from pycocotools.cocoeval import COCOeval
  7 | import numpy as np
  8 | import json
  9 | import os
 10 | 
 11 | import torch.utils.data as data
 12 | 
 13 | 
 14 | class ACTIVE(data.Dataset):
 15 |     """
 16 |     The modified single-pose version of COCO human pose estimation dataset, naming `Active` dataset here. The main difference is that we limit the human counts in one single image to be less than or equal to 2.
 17 |     The order of joints:
 18 |         KEYPOINT_DICT = {
 19 |         'nose': 0,
 20 |         'left_eye': 1,
 21 |         'right_eye': 2,
 22 |         'left_ear': 3,
 23 |         'right_ear': 4,
 24 |         'left_shoulder': 5,
 25 |         'right_shoulder': 6,
 26 |         'left_elbow': 7,
 27 |         'right_elbow': 8,
 28 |         'left_wrist': 9,
 29 |         'right_wrist': 10,
 30 |         'left_hip': 11,
 31 |         'right_hip': 12,
 32 |         'left_knee': 13,
 33 |         'right_knee': 14,
 34 |         'left_ankle': 15,
 35 |         'right_ankle': 16
 36 |         }
 37 |     """
 38 |     num_classes = 1
 39 |     num_joints = 17
 40 |     default_resolution = [192, 192] # mli: for movenet-lightning
 41 |     mean = np.array([1., 1., 1.],
 42 |                     dtype=np.float32).reshape(1, 1, 3)
 43 |     std = np.array([1., 1., 1.],
 44 |                    dtype=np.float32).reshape(1, 1, 3)
 45 |     flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
 46 |                 [11, 12], [13, 14], [15, 16]]
 47 | 
 48 |     def __init__(self, opt, split, sp=False):
 49 |         super(ACTIVE, self).__init__()
 50 |         self.edges = [[0, 1], [0, 2], [1, 3], [2, 4],
 51 |                       [4, 6], [3, 5], [5, 6],
 52 |                       [5, 7], [7, 9], [6, 8], [8, 10],
 53 |                       [6, 12], [5, 11], [11, 12],
 54 |                       [12, 14], [14, 16], [11, 13], [13, 15]]
 55 | 
 56 |         self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
 57 |         self.data_dir = os.path.join(opt.data_dir, opt.dataset) # mli: the dir name is specified by `opt.dataset`
 58 |         self.img_dir = os.path.join(self.data_dir, '{}'.format(split))
 59 |         if split == 'test':
 60 |             raise ValueError('No supported for the testing dataset.')
 61 |         else:
 62 |             self.annot_path = os.path.join(
 63 |                 self.data_dir, 'annotations',
 64 |                 '{}_{}.json').format(opt.dataset, split)
 65 |         self.max_objs = 2 # mli: only consider the images with less than 2 human objects
 66 |         self._data_rng = np.random.RandomState(123)
 67 |         self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
 68 |                                  dtype=np.float32)
 69 |         self._eig_vec = np.array([
 70 |             [-0.58752847, -0.69563484, 0.41340352],
 71 |             [-0.5832747, 0.00994535, -0.81221408],
 72 |             [-0.56089297, 0.71832671, 0.41158938]
 73 |         ], dtype=np.float32)
 74 |         self.split = split
 75 |         self.opt = opt
 76 | 
 77 |         print('==> initializing {} {} data.'.format(opt.dataset, split))
 78 |         self.coco = coco.COCO(self.annot_path)
 79 |         image_ids = self.coco.getImgIds()
 80 | 
 81 |         if split == 'train':
 82 |             self.images = []
 83 |             for img_id in image_ids:
 84 |                 idxs = self.coco.getAnnIds(imgIds=[img_id])
 85 |                 if len(idxs) > 0:
 86 |                     self.images.append(img_id)
 87 |         else:
 88 |             self.images = image_ids
 89 |         self.num_samples = len(self.images)
 90 |         print('Loaded {} {} samples'.format(split, self.num_samples))
 91 | 
 92 |     def _to_float(self, x):
 93 |         return float("{:.2f}".format(x))
 94 | 
 95 |     def bbox_from_kpt(self, kpts):
 96 |         bbox = np.zeros((4))
 97 |         xmin = np.min(kpts[:,0])
 98 |         ymin = np.min(kpts[:,1])
 99 |         xmax = np.max(kpts[:,0])
100 |         ymax = np.max(kpts[:,1])
101 |         width = xmax - xmin - 1
102 |         height = ymax - ymin - 1
103 |         
104 |         # corrupted bounding box
105 |         if width <= 0 or height <= 0:
106 |             return bbox
107 |         # 20% extend    
108 |         else:
109 |             bbox[0] = (xmin + xmax)/2. - width/2*1.2
110 |             bbox[1] = (ymin + ymax)/2. - height/2*1.2
111 |             bbox[2] = width*1.2
112 |             bbox[3] = height*1.2
113 |         return bbox
114 | 
115 |     def convert_eval_format(self, all_dets):
116 |         # import pdb; pdb.set_trace()
117 |         print()
118 |         detections = []
119 |         for image_id in all_dets:
120 |             category_id = 1
121 |             dets = all_dets[image_id]
122 |             bbox = self.bbox_from_kpt(dets)
123 |             bbox_out = list(map(self._to_float, bbox))
124 |             score = np.sum(dets[:, 2]) / 4
125 |             keypoints = np.concatenate([
126 |                 dets[:, [1, 0]],
127 |                 np.ones((17, 1), dtype=np.float32)], axis=1)
128 |             keypoints[1:5] = np.zeros((4, 3))
129 |             keypoints = keypoints.reshape(51).tolist()
130 |             keypoints = list(map(self._to_float, keypoints))
131 |             detection = {
132 |                 "image_id": int(image_id),
133 |                 "category_id": int(category_id),
134 |                 "bbox": bbox_out,
135 |                 "score": float("{:.2f}".format(score)),
136 |                 "keypoints": keypoints
137 |             }
138 |             detections.append(detection)
139 |         return detections
140 | 
141 |     def __len__(self):
142 |         return self.num_samples
143 | 
144 |     def save_results(self, results, save_dir):
145 |         json.dump(self.convert_eval_format(results),
146 |                   open('{}/results.json'.format(save_dir), 'w'))
147 | 
148 |     def run_eval(self, results, save_dir):
149 |         self.save_results(results, save_dir)
150 |         coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
151 |         coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
152 |         coco_eval.evaluate()
153 |         coco_eval.accumulate()
154 |         coco_eval.summarize()
155 | 


--------------------------------------------------------------------------------
/src/lib/datasets/dataset/coco_hp.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | from pycocotools.cocoeval import COCOeval
  7 | import numpy as np
  8 | import json
  9 | import os
 10 | 
 11 | import torch.utils.data as data
 12 | 
 13 | 
 14 | class COCOHP(data.Dataset):
 15 |     """
 16 |     The order of joints:
 17 |         KEYPOINT_DICT = {
 18 |         'nose': 0,
 19 |         'left_eye': 1,
 20 |         'right_eye': 2,
 21 |         'left_ear': 3,
 22 |         'right_ear': 4,
 23 |         'left_shoulder': 5,
 24 |         'right_shoulder': 6,
 25 |         'left_elbow': 7,
 26 |         'right_elbow': 8,
 27 |         'left_wrist': 9,
 28 |         'right_wrist': 10,
 29 |         'left_hip': 11,
 30 |         'right_hip': 12,
 31 |         'left_knee': 13,
 32 |         'right_knee': 14,
 33 |         'left_ankle': 15,
 34 |         'right_ankle': 16
 35 |         }
 36 |     """
 37 |     num_classes = 1
 38 |     num_joints = 17
 39 |     default_resolution = [512, 512]
 40 |     mean = np.array([0.40789654, 0.44719302, 0.47026115],
 41 |                     dtype=np.float32).reshape(1, 1, 3)
 42 |     std = np.array([0.28863828, 0.27408164, 0.27809835],
 43 |                    dtype=np.float32).reshape(1, 1, 3)
 44 |     flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
 45 |                 [11, 12], [13, 14], [15, 16]]
 46 | 
 47 |     def __init__(self, opt, split, sp=False):
 48 |         super(COCOHP, self).__init__()
 49 |         self.edges = [[0, 1], [0, 2], [1, 3], [2, 4],
 50 |                       [4, 6], [3, 5], [5, 6],
 51 |                       [5, 7], [7, 9], [6, 8], [8, 10],
 52 |                       [6, 12], [5, 11], [11, 12],
 53 |                       [12, 14], [14, 16], [11, 13], [13, 15]]
 54 | 
 55 |         self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
 56 |         self.data_dir = os.path.join(opt.data_dir, 'coco')
 57 |         self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
 58 |         if split == 'test':
 59 |             self.annot_path = os.path.join(
 60 |                 self.data_dir, 'annotations',
 61 |                 'image_info_test-dev2017.json').format(split)
 62 |         else:
 63 |             self.annot_path = os.path.join(
 64 |                 self.data_dir, 'annotations',
 65 |                 'person_keypoints_{}2017.json').format(split)
 66 |         self.max_objs = 32
 67 |         self._data_rng = np.random.RandomState(123)
 68 |         self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
 69 |                                  dtype=np.float32)
 70 |         self._eig_vec = np.array([
 71 |             [-0.58752847, -0.69563484, 0.41340352],
 72 |             [-0.5832747, 0.00994535, -0.81221408],
 73 |             [-0.56089297, 0.71832671, 0.41158938]
 74 |         ], dtype=np.float32)
 75 |         self.split = split
 76 |         self.opt = opt
 77 | 
 78 |         print('==> initializing coco 2017 {} data.'.format(split))
 79 |         self.coco = coco.COCO(self.annot_path)
 80 |         image_ids = self.coco.getImgIds()
 81 | 
 82 |         if split == 'train':
 83 |             self.images = []
 84 |             for img_id in image_ids:
 85 |                 idxs = self.coco.getAnnIds(imgIds=[img_id])
 86 |                 if len(idxs) > 0:
 87 |                     self.images.append(img_id)
 88 |         else:
 89 |             self.images = image_ids
 90 |         self.num_samples = len(self.images)
 91 |         print('Loaded {} {} samples'.format(split, self.num_samples))
 92 | 
 93 |     def _to_float(self, x):
 94 |         return float("{:.2f}".format(x))
 95 | 
 96 |     def convert_eval_format(self, all_bboxes):
 97 |         # import pdb; pdb.set_trace()
 98 |         detections = []
 99 |         for image_id in all_bboxes:
100 |             for cls_ind in all_bboxes[image_id]:
101 |                 category_id = 1
102 |                 for dets in all_bboxes[image_id][cls_ind]:
103 |                     bbox = dets[:4]
104 |                     bbox[2] -= bbox[0]
105 |                     bbox[3] -= bbox[1]
106 |                     score = dets[4]
107 |                     bbox_out = list(map(self._to_float, bbox))
108 |                     keypoints = np.concatenate([
109 |                         np.array(dets[5:39], dtype=np.float32).reshape(-1, 2),
110 |                         np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
111 |                     keypoints = list(map(self._to_float, keypoints))
112 | 
113 |                     detection = {
114 |                         "image_id": int(image_id),
115 |                         "category_id": int(category_id),
116 |                         "bbox": bbox_out,
117 |                         "score": float("{:.2f}".format(score)),
118 |                         "keypoints": keypoints
119 |                     }
120 |                     detections.append(detection)
121 |         return detections
122 | 
123 |     def __len__(self):
124 |         return self.num_samples
125 | 
126 |     def save_results(self, results, save_dir):
127 |         json.dump(self.convert_eval_format(results),
128 |                   open('{}/results.json'.format(save_dir), 'w'))
129 | 
130 |     def run_eval(self, results, save_dir):
131 |         self.save_results(results, save_dir)
132 |         coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
133 |         coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
134 |         coco_eval.evaluate()
135 |         coco_eval.accumulate()
136 |         coco_eval.summarize()
137 |         coco_eval = COCOeval(self.coco, coco_dets, "bbox")
138 |         coco_eval.evaluate()
139 |         coco_eval.accumulate()
140 |         coco_eval.summarize()
141 | 


--------------------------------------------------------------------------------
/src/lib/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | 
 6 | from .sample.multi_pose import MultiPoseDataset
 7 | from .sample.single_pose import SinglePoseDataset
 8 | 
 9 | from .dataset.coco_hp import COCOHP
10 | from .dataset.active import ACTIVE
11 | 
12 | 
13 | dataset_factory = {
14 |   'coco_hp': COCOHP,
15 |   'active': ACTIVE,
16 |   'active_coco': ACTIVE,
17 | }
18 | 
19 | _sample_factory = {
20 |   'multi_pose': MultiPoseDataset,
21 |   'single_pose': SinglePoseDataset,
22 | }
23 | 
24 | 
25 | def get_dataset(dataset, task):
26 |   class Dataset(dataset_factory[dataset], _sample_factory[task]):
27 |     pass
28 |   return Dataset
29 |   
30 | 


--------------------------------------------------------------------------------
/src/lib/datasets/sample/multi_pose.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch.utils.data as data
  6 | import numpy as np
  7 | import torch
  8 | import json
  9 | import cv2
 10 | import os
 11 | from utils.image import flip, color_aug
 12 | from utils.image import get_affine_transform, affine_transform
 13 | from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
 14 | from utils.image import draw_dense_reg
 15 | import math
 16 | 
 17 | class MultiPoseDataset(data.Dataset):
 18 |   def _coco_box_to_bbox(self, box):
 19 |     bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
 20 |                     dtype=np.float32)
 21 |     return bbox
 22 | 
 23 |   def _get_border(self, border, size):
 24 |     i = 1
 25 |     while size - border // i <= border // i:
 26 |         i *= 2
 27 |     return border // i
 28 | 
 29 |   def __getitem__(self, index):
 30 |     img_id = self.images[index]
 31 |     file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
 32 |     img_path = os.path.join(self.img_dir, file_name)
 33 |     ann_ids = self.coco.getAnnIds(imgIds=[img_id])
 34 |     anns = self.coco.loadAnns(ids=ann_ids)
 35 |     num_objs = min(len(anns), self.max_objs)
 36 | 
 37 |     img = cv2.imread(img_path)
 38 | 
 39 |     height, width = img.shape[0], img.shape[1]
 40 |     c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
 41 |     s = max(img.shape[0], img.shape[1]) * 1.0
 42 |     rot = 0
 43 | 
 44 |     flipped = False
 45 |     if self.split == 'train':
 46 |       if not self.opt.not_rand_crop:
 47 |         s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
 48 |         w_border = self._get_border(128, img.shape[1])
 49 |         h_border = self._get_border(128, img.shape[0])
 50 |         c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
 51 |         c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
 52 |       else:
 53 |         sf = self.opt.scale
 54 |         cf = self.opt.shift
 55 |         c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
 56 |         c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
 57 |         s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
 58 |       if np.random.random() < self.opt.aug_rot:
 59 |         rf = self.opt.rotate
 60 |         rot = np.clip(np.random.randn()*rf, -rf*2, rf*2)
 61 | 
 62 |       if np.random.random() < self.opt.flip:
 63 |         flipped = True
 64 |         img = img[:, ::-1, :]
 65 |         c[0] =  width - c[0] - 1
 66 |         
 67 | 
 68 |     trans_input = get_affine_transform(
 69 |       c, s, rot, [self.opt.input_res, self.opt.input_res])
 70 |     inp = cv2.warpAffine(img, trans_input, 
 71 |                          (self.opt.input_res, self.opt.input_res),
 72 |                          flags=cv2.INTER_LINEAR)
 73 |     inp = (inp.astype(np.float32) / 255.)
 74 |     if self.split == 'train' and not self.opt.no_color_aug:
 75 |       color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
 76 |     inp = (inp - self.mean) / self.std
 77 |     inp = inp.transpose(2, 0, 1)
 78 | 
 79 |     output_res = self.opt.output_res
 80 |     num_joints = self.num_joints
 81 |     trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res])
 82 |     trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
 83 | 
 84 |     hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32)
 85 |     hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32)
 86 |     dense_kps = np.zeros((num_joints, 2, output_res, output_res), 
 87 |                           dtype=np.float32)
 88 |     dense_kps_mask = np.zeros((num_joints, output_res, output_res), 
 89 |                                dtype=np.float32)
 90 |     wh = np.zeros((self.max_objs, 2), dtype=np.float32)
 91 |     kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
 92 |     reg = np.zeros((self.max_objs, 2), dtype=np.float32)
 93 |     ind = np.zeros((self.max_objs), dtype=np.int64)
 94 |     reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
 95 |     kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8)
 96 |     hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
 97 |     hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
 98 |     hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)
 99 | 
100 |     draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
101 |                     draw_umich_gaussian
102 | 
103 |     gt_det = []
104 |     for k in range(num_objs):
105 |       ann = anns[k]
106 |       bbox = self._coco_box_to_bbox(ann['bbox'])
107 |       cls_id = int(ann['category_id']) - 1
108 |       pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
109 |       if flipped:
110 |         bbox[[0, 2]] = width - bbox[[2, 0]] - 1
111 |         pts[:, 0] = width - pts[:, 0] - 1
112 |         for e in self.flip_idx:
113 |           pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
114 |       bbox[:2] = affine_transform(bbox[:2], trans_output)
115 |       bbox[2:] = affine_transform(bbox[2:], trans_output)
116 |       bbox = np.clip(bbox, 0, output_res - 1)
117 |       h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
118 |       if (h > 0 and w > 0) or (rot != 0):
119 |         radius = gaussian_radius((math.ceil(h), math.ceil(w)))
120 |         radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius)) 
121 |         ct = np.array(
122 |           [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
123 |         ct_int = ct.astype(np.int32)
124 |         wh[k] = 1. * w, 1. * h
125 |         ind[k] = ct_int[1] * output_res + ct_int[0]
126 |         reg[k] = ct - ct_int
127 |         reg_mask[k] = 1
128 |         num_kpts = pts[:, 2].sum()
129 |         if num_kpts == 0:
130 |           hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
131 |           reg_mask[k] = 0
132 | 
133 |         hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
134 |         hp_radius = self.opt.hm_gauss \
135 |                     if self.opt.mse_loss else max(0, int(hp_radius)) 
136 |         for j in range(num_joints):
137 |           if pts[j, 2] > 0:
138 |             pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot)
139 |             if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
140 |                pts[j, 1] >= 0 and pts[j, 1] < output_res:
141 |               kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int
142 |               kps_mask[k, j * 2: j * 2 + 2] = 1
143 |               pt_int = pts[j, :2].astype(np.int32)
144 |               hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
145 |               hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0]
146 |               hp_mask[k * num_joints + j] = 1
147 |               if self.opt.dense_hp:
148 |                 # must be before draw center hm gaussian
149 |                 draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, 
150 |                                pts[j, :2] - ct_int, radius, is_offset=True)
151 |                 draw_gaussian(dense_kps_mask[j], ct_int, radius)
152 |               draw_gaussian(hm_hp[j], pt_int, hp_radius)
153 |         draw_gaussian(hm[cls_id], ct_int, radius)
154 |         gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 
155 |                        ct[0] + w / 2, ct[1] + h / 2, 1] + 
156 |                        pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
157 |     if rot != 0:
158 |       hm = hm * 0 + 0.9999
159 |       reg_mask *= 0
160 |       kps_mask *= 0
161 |     ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
162 |            'hps': kps, 'hps_mask': kps_mask}
163 |     if self.opt.dense_hp:
164 |       dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res)
165 |       dense_kps_mask = dense_kps_mask.reshape(
166 |         num_joints, 1, output_res, output_res)
167 |       dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1)
168 |       dense_kps_mask = dense_kps_mask.reshape(
169 |         num_joints * 2, output_res, output_res)
170 |       ret.update({'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask})
171 |       del ret['hps'], ret['hps_mask']
172 |     if self.opt.reg_offset:
173 |       ret.update({'reg': reg})
174 |     if self.opt.hm_hp:
175 |       ret.update({'hm_hp': hm_hp})
176 |     if self.opt.reg_hp_offset:
177 |       ret.update({'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask})
178 |     if self.opt.debug > 0 or not self.split == 'train':
179 |       gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
180 |                np.zeros((1, 40), dtype=np.float32)
181 |       meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
182 |       ret['meta'] = meta
183 |     return ret
184 | 


--------------------------------------------------------------------------------
/src/lib/datasets/sample/single_pose.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch.utils.data as data
  6 | import numpy as np
  7 | import torch
  8 | import json
  9 | import cv2
 10 | import os
 11 | from utils.image import flip, color_aug
 12 | from utils.image import get_affine_transform, affine_transform
 13 | from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
 14 | from utils.image import draw_dense_reg
 15 | import math
 16 | 
 17 | 
 18 | class SinglePoseDataset(data.Dataset):
 19 |     def _coco_box_to_bbox(self, box):
 20 |         bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
 21 |                         dtype=np.float32)
 22 |         return bbox
 23 | 
 24 |     def _get_border(self, border, size):
 25 |         i = 1
 26 |         while size - border // i <= border // i:
 27 |             i *= 2
 28 |         return border // i
 29 | 
 30 |     def __getitem__(self, index):
 31 |         img_id = self.images[index]
 32 |         file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
 33 |         img_path = os.path.join(self.img_dir, file_name)
 34 |         ann_ids = self.coco.getAnnIds(imgIds=[img_id])
 35 |         anns = self.coco.loadAnns(ids=ann_ids)
 36 |         num_objs = min(len(anns), self.max_objs)
 37 | 
 38 |         img = cv2.imread(img_path)
 39 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
 40 | 
 41 |         height, width = img.shape[0], img.shape[1]
 42 |         c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
 43 |         s = max(img.shape[0], img.shape[1]) * 1.0
 44 |         rot = 0
 45 | 
 46 |         flipped = False
 47 |         if self.split == 'train':
 48 |             if not self.opt.not_rand_crop:
 49 |                 s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
 50 |                 w_border = self._get_border(128, img.shape[1])
 51 |                 h_border = self._get_border(128, img.shape[0])
 52 |                 c[0] = np.random.randint(
 53 |                     low=w_border, high=img.shape[1] - w_border)
 54 |                 c[1] = np.random.randint(
 55 |                     low=h_border, high=img.shape[0] - h_border)
 56 |             else:
 57 |                 sf = self.opt.scale
 58 |                 cf = self.opt.shift
 59 |                 c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
 60 |                 c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
 61 |                 s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
 62 |             if np.random.random() < self.opt.aug_rot:
 63 |                 rf = self.opt.rotate
 64 |                 rot = np.clip(np.random.randn()*rf, -rf*2, rf*2)
 65 | 
 66 |             if np.random.random() < self.opt.flip:
 67 |                 flipped = True
 68 |                 img = img[:, ::-1, :]
 69 |                 c[0] = width - c[0] - 1
 70 | 
 71 |         trans_input = get_affine_transform(
 72 |             c, s, rot, [self.opt.input_res, self.opt.input_res])
 73 |         inp = cv2.warpAffine(img, trans_input,
 74 |                              (self.opt.input_res, self.opt.input_res),
 75 |                              flags=cv2.INTER_LINEAR)
 76 |         inp = (inp.astype(np.float32) / 127.5)
 77 |         if self.split == 'train' and not self.opt.no_color_aug:
 78 |             color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
 79 |         inp = (inp - self.mean) / self.std
 80 |         inp = inp.transpose(2, 0, 1)
 81 | 
 82 |         output_res = self.opt.output_res
 83 |         num_joints = self.num_joints
 84 |         trans_output_rot = get_affine_transform(
 85 |             c, s, rot, [output_res, output_res])
 86 |         trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
 87 | 
 88 |         hm = np.zeros((self.num_classes, output_res,
 89 |                       output_res), dtype=np.float32)
 90 |         hm_hp = np.zeros((num_joints, output_res, output_res),
 91 |                          dtype=np.float32)
 92 |         kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
 93 |         ind = np.zeros((self.max_objs), dtype=np.int64)
 94 |         kps_mask = np.zeros(
 95 |             (self.max_objs, self.num_joints * 2), dtype=np.uint8)
 96 |         hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
 97 |         hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
 98 |         hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)
 99 | 
100 |         draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
101 |             draw_umich_gaussian
102 | 
103 |         gt_det = []
104 |         for k in range(num_objs):
105 |             ann = anns[k]
106 |             bbox = self._coco_box_to_bbox(ann['bbox'])
107 |             cls_id = int(ann['category_id']) - 1
108 |             pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
109 |             if flipped:
110 |                 bbox[[0, 2]] = width - bbox[[2, 0]] - 1
111 |                 pts[:, 0] = width - pts[:, 0] - 1
112 |                 for e in self.flip_idx:
113 |                     pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
114 |             bbox[:2] = affine_transform(bbox[:2], trans_output)
115 |             bbox[2:] = affine_transform(bbox[2:], trans_output)
116 |             bbox = np.clip(bbox, 0, output_res - 1)
117 |             h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
118 |             if (h > 0 and w > 0) or (rot != 0):
119 |                 radius = gaussian_radius((math.ceil(h), math.ceil(w)))
120 |                 radius = self.opt.hm_gauss if self.opt.mse_loss else max(
121 |                     0, int(radius))
122 |                 ct = np.array(
123 |                     [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
124 |                 ct_int = ct.astype(np.int32)
125 |                 ind[k] = ct_int[1] * output_res + ct_int[0]
126 |                 num_kpts = pts[:, 2].sum()
127 |                 if num_kpts == 0:
128 |                     hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
129 | 
130 |                 hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
131 |                 hp_radius = self.opt.hm_gauss \
132 |                     if self.opt.mse_loss else max(0, int(hp_radius))
133 |                 for j in range(num_joints):
134 |                     if pts[j, 2] > 0:
135 |                         pts[j, :2] = affine_transform(
136 |                             pts[j, :2], trans_output_rot)
137 |                         if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
138 |                            pts[j, 1] >= 0 and pts[j, 1] < output_res:
139 |                             # TODO: Check the ordering of y,x here.
140 |                             # kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int
141 |                             kps[k, j * 2] = pts[j, 1:2] - ct_int[1]
142 |                             kps[k, j * 2 + 1] = pts[j, 0:1] - ct_int[0]
143 | 
144 |                             kps_mask[k, j * 2: j * 2 + 2] = 1
145 |                             pt_int = pts[j, :2].astype(np.int32)
146 |                             # hp_offset[k * num_joints + j] = pts[j, :2] - pt_int
147 |                             # TODO: Check the ordering of y,x here.
148 |                             hp_offset[k * num_joints + j][0] = pts[j, 1:2] - pt_int[1]
149 |                             hp_offset[k * num_joints + j][1] = pts[j, 0:1] - pt_int[0]
150 |                             hp_ind[k * num_joints + j] = pt_int[1] * \
151 |                                 output_res + pt_int[0]
152 |                             hp_mask[k * num_joints + j] = 1
153 |                             draw_gaussian(hm_hp[j], pt_int, hp_radius)
154 |                 draw_gaussian(hm[cls_id], ct_int, radius)
155 |                 gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
156 |                                ct[0] + w / 2, ct[1] + h / 2, 1] +
157 |                               pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
158 |         if rot != 0:
159 |             hm = hm * 0 + 0.9999
160 |             # reg_mask *= 0
161 |             kps_mask *= 0
162 |         # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
163 |         #        'hps': kps, 'hps_mask': kps_mask}
164 |         # ret = {'input': inp, 'hm': hm, 'ind': ind,
165 |         #        'hps': kps, 'hps_mask': kps_mask}
166 |         # if self.opt.hm_hp:
167 |         #     ret.update({'hm_hp': hm_hp})
168 |         # if self.opt.reg_hp_offset:
169 |         #     ret.update({'hp_offset': hp_offset,
170 |         #                'hp_ind': hp_ind, 'hp_mask': hp_mask})
171 |         ret = {'input': inp, 'hm': hm, 'ind': ind,
172 |                'hps': kps, 'hps_mask': kps_mask,
173 |                'hm_hp': hm_hp, 'hp_offset': hp_offset,
174 |                'hp_ind': hp_ind, 'hp_mask': hp_mask}
175 |         if self.opt.debug > 0 or not self.split == 'train':
176 |             gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
177 |                 np.zeros((1, 40), dtype=np.float32)
178 |             meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
179 |             ret['meta'] = meta
180 |         return ret
181 | 


--------------------------------------------------------------------------------
/src/lib/detectors/base_detector.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from progress.bar import Bar
  8 | import time
  9 | import torch
 10 | 
 11 | from models.model import create_model, load_model
 12 | from utils.image import get_affine_transform
 13 | from utils.debugger import Debugger
 14 | 
 15 | 
 16 | class BaseDetector(object):
 17 |     def __init__(self, opt):
 18 |         if opt.gpus[0] >= 0:
 19 |             opt.device = torch.device('cuda')
 20 |         else:
 21 |             opt.device = torch.device('cpu')
 22 | 
 23 |         print('Creating model...')
 24 |         self.model = create_model(opt.arch, opt.heads,
 25 |                                   opt.head_conv, opt.froze_backbone)
 26 |         self.model = load_model(self.model, opt.load_model)
 27 |         self.model = self.model.to(opt.device)
 28 |         self.model.eval()
 29 |         
 30 |         self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3)
 31 |         self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3)
 32 |         self.max_per_image = 100
 33 |         self.num_classes = opt.num_classes
 34 |         self.opt = opt
 35 |         self.pause = True
 36 |         self.global_num = 0
 37 | 
 38 |     def pre_process(self, image, meta=None):
 39 |         height, width = image.shape[0:2]
 40 | 
 41 |         # padding all images to be square.
 42 |         if height > width:
 43 |             diff = height - width
 44 |             image = cv2.copyMakeBorder(
 45 |                 image, 0, 0, int(diff//2), int(diff//2 + diff%2),
 46 |                 cv2.BORDER_CONSTANT, value=(0,0,0))
 47 |         elif height < width:
 48 |             diff = width - height
 49 |             image = cv2.copyMakeBorder(
 50 |                 image, int(diff//2), int(diff//2+diff%2), 0, 0,
 51 |                 cv2.BORDER_CONSTANT, value=(0,0,0))
 52 | 
 53 |         new_height = 256#192
 54 |         new_width = 256#192
 55 | 
 56 |         inp_height = new_height
 57 |         inp_width = new_width
 58 |         c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
 59 |         s = np.array([inp_width, inp_height], dtype=np.float32)
 60 | 
 61 |         inp_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
 62 |         inp_image = cv2.cvtColor(inp_image, cv2.COLOR_BGR2RGB).astype(np.float32)
 63 |         inp_image = ((inp_image / 127.5 - self.mean) /
 64 |                      self.std).astype(np.float32)
 65 |         images = inp_image.transpose(2, 0, 1).reshape(
 66 |             1, 3, inp_height, inp_width)
 67 |         images = torch.from_numpy(images)
 68 |         meta = {'c': c, 's': s,
 69 |                 'in_height': height,
 70 |                 'in_width': width,
 71 |                 'out_height': inp_height // self.opt.down_ratio,
 72 |                 'out_width': inp_width // self.opt.down_ratio}
 73 |         return images, meta
 74 | 
 75 |     def process(self, images, return_time=False):
 76 |         raise NotImplementedError
 77 | 
 78 |     def post_process(self, dets, meta, scale=1):
 79 |         raise NotImplementedError
 80 | 
 81 |     def merge_outputs(self, detections):
 82 |         raise NotImplementedError
 83 | 
 84 |     def debug(self, debugger, images, dets, output, scale=1):
 85 |         raise NotImplementedError
 86 | 
 87 |     def show_results(self, debugger, image, results):
 88 |         raise NotImplementedError
 89 | 
 90 |     def run(self, image_or_path_or_tensor, meta=None):
 91 |         load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
 92 |         merge_time, tot_time = 0, 0
 93 |         debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3),
 94 |                             theme=self.opt.debugger_theme)
 95 |         start_time = time.time()
 96 |         if isinstance(image_or_path_or_tensor, np.ndarray):
 97 |             image = image_or_path_or_tensor
 98 |         elif type(image_or_path_or_tensor) == type(''):
 99 |             image = cv2.imread(image_or_path_or_tensor)
100 | 
101 |         loaded_time = time.time()
102 |         load_time += (loaded_time - start_time)
103 | 
104 |         # detections = []
105 |         scale_start_time = time.time()
106 | 
107 |         images, meta = self.pre_process(image, meta)
108 | 
109 |         images = images.to(self.opt.device)
110 |         # torch.cuda.synchronize()
111 |         pre_process_time = time.time()
112 |         pre_time += pre_process_time - scale_start_time
113 |         output, dets, forward_time = self.process(images, return_time=True)
114 |         # torch.cuda.synchronize()
115 |         net_time += forward_time - pre_process_time
116 |         decode_time = time.time()
117 |         dec_time += decode_time - forward_time
118 |         if self.opt.debug >= 2:
119 |             self.debug(debugger, images, dets, output)
120 |         dets = self.post_process(dets, meta)
121 |         # torch.cuda.synchronize()
122 |         post_process_time = time.time()
123 |         post_time += post_process_time - decode_time
124 |         results = dets
125 | 
126 |         # results = self.merge_outputs(detections)
127 |         # torch.cuda.synchronize()
128 |         end_time = time.time()
129 |         merge_time += end_time - post_process_time
130 |         tot_time += end_time - start_time
131 | 
132 |         if self.opt.debug >= 1:
133 |             self.show_results(debugger, image, results, prefix=self.global_num)
134 |             self.global_num += 1
135 | 
136 |         return {'results': results, 'tot': tot_time, 'load': load_time,
137 |                 'pre': pre_time, 'net': net_time, 'dec': dec_time,
138 |                 'post': post_time, 'merge': merge_time}
139 | 


--------------------------------------------------------------------------------
/src/lib/detectors/detector_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | 
 6 | from .multi_pose import MultiPoseDetector
 7 | from .single_pose import SinglePoseDetector
 8 | 
 9 | detector_factory = {
10 |   'multi_pose': MultiPoseDetector,
11 |   'single_pose': SinglePoseDetector
12 | }
13 | 


--------------------------------------------------------------------------------
/src/lib/detectors/multi_pose.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from progress.bar import Bar
  8 | import time
  9 | import torch
 10 | 
 11 | try:
 12 |   from external.nms import soft_nms_39
 13 | except:
 14 |   print('NMS not imported! If you need it,'
 15 |         ' do \n cd $CenterNet_ROOT/src/lib/external \n make')
 16 | from models.decode import multi_pose_decode
 17 | from models.utils import flip_tensor, flip_lr_off, flip_lr
 18 | from utils.image import get_affine_transform
 19 | from utils.post_process import multi_pose_post_process
 20 | from utils.debugger import Debugger
 21 | 
 22 | from .base_detector import BaseDetector
 23 | 
 24 | class MultiPoseDetector(BaseDetector):
 25 |   def __init__(self, opt):
 26 |     super(MultiPoseDetector, self).__init__(opt)
 27 |     self.flip_idx = opt.flip_idx
 28 | 
 29 |   def process(self, images, return_time=False):
 30 |     with torch.no_grad():
 31 |       torch.cuda.synchronize()
 32 |       output = self.model(images)[-1]
 33 |       output['hm'] = output['hm'].sigmoid_()
 34 |       if self.opt.hm_hp and not self.opt.mse_loss:
 35 |         output['hm_hp'] = output['hm_hp'].sigmoid_()
 36 | 
 37 |       reg = output['reg'] if self.opt.reg_offset else None
 38 |       hm_hp = output['hm_hp'] if self.opt.hm_hp else None
 39 |       hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None
 40 |       torch.cuda.synchronize()
 41 |       forward_time = time.time()
 42 |       
 43 |       if self.opt.flip_test:
 44 |         num_joints = self.opt.heads["hm_hp"]
 45 |         output['hm'] = (output['hm'][0:1] + flip_tensor(output['hm'][1:2])) / 2
 46 |         output['wh'] = (output['wh'][0:1] + flip_tensor(output['wh'][1:2])) / 2
 47 |         output['hps'] = (output['hps'][0:1] + 
 48 |           flip_lr_off(output['hps'][1:2], self.flip_idx, num_joints)) / 2
 49 |         hm_hp = (hm_hp[0:1] + flip_lr(hm_hp[1:2], self.flip_idx)) / 2 \
 50 |                 if hm_hp is not None else None
 51 |         reg = reg[0:1] if reg is not None else None
 52 |         hp_offset = hp_offset[0:1] if hp_offset is not None else None
 53 |       
 54 |       dets = multi_pose_decode(
 55 |         output['hm'], output['wh'], output['hps'],
 56 |         reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K)
 57 | 
 58 |     if return_time:
 59 |       return output, dets, forward_time
 60 |     else:
 61 |       return output, dets
 62 | 
 63 |   def post_process(self, dets, meta, scale=1):
 64 |     dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
 65 |     dets = multi_pose_post_process(
 66 |       dets.copy(), [meta['c']], [meta['s']],
 67 |       meta['out_height'], meta['out_width'])
 68 |     for j in range(1, self.num_classes + 1):
 69 |       dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 39)
 70 |       # import pdb; pdb.set_trace()
 71 |       dets[0][j][:, :4] /= scale
 72 |       dets[0][j][:, 5:] /= scale
 73 |     return dets[0]
 74 | 
 75 |   def merge_outputs(self, detections):
 76 |     results = {}
 77 |     results[1] = np.concatenate(
 78 |         [detection[1] for detection in detections], axis=0).astype(np.float32)
 79 |     if self.opt.nms or len(self.opt.test_scales) > 1:
 80 |       soft_nms_39(results[1], Nt=0.5, method=2)
 81 |     results[1] = results[1].tolist()
 82 |     return results
 83 | 
 84 |   def debug(self, debugger, images, dets, output, scale=1):
 85 |     dets = dets.detach().cpu().numpy().copy()
 86 |     dets[:, :, :4] *= self.opt.down_ratio
 87 |     dets[:, :, 5:39] *= self.opt.down_ratio
 88 |     img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
 89 |     img = np.clip(((
 90 |       img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8)
 91 |     pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
 92 |     debugger.add_blend_img(img, pred, 'pred_hm')
 93 |     if self.opt.hm_hp:
 94 |       pred = debugger.gen_colormap_hp(
 95 |         output['hm_hp'][0].detach().cpu().numpy())
 96 |       debugger.add_blend_img(img, pred, 'pred_hmhp')
 97 |   
 98 |   def show_results(self, debugger, image, results):
 99 |     debugger.add_img(image, img_id='multi_pose')
100 |     for bbox in results[1]:
101 |       if bbox[4] > self.opt.vis_thresh:
102 |         debugger.add_coco_bbox(bbox[:4], 0, bbox[4], img_id='multi_pose')
103 |         debugger.add_coco_hp(bbox[5:39], img_id='multi_pose')
104 |     debugger.show_all_imgs(pause=self.pause)
105 | 


--------------------------------------------------------------------------------
/src/lib/detectors/single_pose.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import cv2
 6 | import numpy as np
 7 | from progress.bar import Bar
 8 | import time
 9 | import torch
10 | 
11 | try:
12 |     from external.nms import soft_nms_39
13 | except:
14 |     print('NMS not imported! If you need it,'
15 |           ' do \n cd $CenterNet_ROOT/src/lib/external \n make')
16 | from models.decode import single_pose_decode
17 | from models.utils import flip_tensor, flip_lr_off, flip_lr
18 | from utils.image import get_affine_transform
19 | from utils.post_process import multi_pose_post_process, single_pose_post_process
20 | from utils.debugger import Debugger
21 | 
22 | from .base_detector import BaseDetector
23 | 
24 | 
25 | class SinglePoseDetector(BaseDetector):
26 |     def __init__(self, opt):
27 |         super(SinglePoseDetector, self).__init__(opt)
28 |         self.flip_idx = opt.flip_idx
29 |         self.vis_thresh = opt.vis_thresh
30 | 
31 |     def process(self, images, return_time=False):
32 |         with torch.no_grad():
33 |             # torch.cuda.synchronize()
34 |             output = self.model(images)[0]
35 |             dets = self.model.decode(output)
36 |             # torch.cuda.synchronize()
37 |             forward_time = time.time()
38 | 
39 |         if return_time:
40 |             return output, dets, forward_time
41 |         else:
42 |             return output, dets
43 | 
44 |     def post_process(self, dets, meta):
45 |         dets = dets[0, 0, :, :]
46 |         dets = dets.cpu().numpy()
47 |         dets = single_pose_post_process(
48 |             dets.copy(),
49 |             meta['in_height'], meta['in_width'])
50 |         return dets
51 | 
52 |     def merge_outputs(self, detections):
53 |         results = {}
54 |         results[1] = np.concatenate(
55 |             [detection[1] for detection in detections], axis=0).astype(np.float32)
56 |         if self.opt.nms or len(self.opt.test_scales) > 1:
57 |             soft_nms_39(results[1], Nt=0.5, method=2)
58 |         results[1] = results[1].tolist()
59 |         return results
60 | 
61 |     def debug(self, debugger, images, dets, output):
62 |         img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
63 |         img = np.clip(((
64 |             img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8)
65 |         pred = debugger.gen_colormap(torch.sigmoid(output['hm'][0]).detach().cpu().numpy())
66 |         debugger.add_blend_img(img, pred, 'pred_hm')
67 |         pred = debugger.gen_colormap_hp(
68 |             torch.sigmoid(output['hm_hp'][0]).detach().cpu().numpy())
69 |         debugger.add_blend_img(img, pred, 'pred_hmhp')
70 | 
71 |     def show_results(self, debugger, image, results, prefix=''):
72 |         debugger.add_img(image, img_id='single_pose')
73 |         debugger.add_coco_hp(results, img_id='single_pose', vis_thresh=self.vis_thresh)
74 |         if self.opt.debug < 4:
75 |             debugger.show_all_imgs(pause=self.pause)
76 |         else:
77 |             debugger.save_all_imgs(prefix=prefix)
78 | 


--------------------------------------------------------------------------------
/src/lib/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 | 
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 | 


--------------------------------------------------------------------------------
/src/lib/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/src/lib/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/src/lib/external/__init__.py


--------------------------------------------------------------------------------
/src/lib/external/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from distutils.core import setup
 3 | from distutils.extension import Extension
 4 | from Cython.Build import cythonize
 5 | 
 6 | extensions = [
 7 |     Extension(
 8 |         "nms", 
 9 |         ["nms.pyx"],
10 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 |     )
12 | ]
13 | 
14 | setup(
15 |     name="coco",
16 |     ext_modules=cythonize(extensions),
17 |     include_dirs=[numpy.get_include()]
18 | )
19 | 


--------------------------------------------------------------------------------
/src/lib/logger.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 6 | import os
 7 | import time
 8 | import sys
 9 | import torch
10 | USE_TENSORBOARD = True
11 | try:
12 |   import tensorboardX
13 |   print('Using tensorboardX')
14 | except:
15 |   USE_TENSORBOARD = False
16 | 
17 | class Logger(object):
18 |   def __init__(self, opt):
19 |     """Create a summary writer logging to log_dir."""
20 |     if not os.path.exists(opt.save_dir):
21 |       os.makedirs(opt.save_dir)
22 |     if not os.path.exists(opt.debug_dir):
23 |       os.makedirs(opt.debug_dir)
24 |    
25 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
26 | 
27 |     args = dict((name, getattr(opt, name)) for name in dir(opt)
28 |                 if not name.startswith('_'))
29 |     file_name = os.path.join(opt.save_dir, 'opt.txt')
30 |     with open(file_name, 'wt') as opt_file:
31 |       opt_file.write('==> torch version: {}\n'.format(torch.__version__))
32 |       opt_file.write('==> cudnn version: {}\n'.format(
33 |         torch.backends.cudnn.version()))
34 |       opt_file.write('==> Cmd:\n')
35 |       opt_file.write(str(sys.argv))
36 |       opt_file.write('\n==> Opt:\n')
37 |       for k, v in sorted(args.items()):
38 |         opt_file.write('  %s: %s\n' % (str(k), str(v)))
39 |           
40 |     log_dir = opt.save_dir + '/logs_{}'.format(time_str)
41 |     if USE_TENSORBOARD:
42 |       self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
43 |     else:
44 |       if not os.path.exists(os.path.dirname(log_dir)):
45 |         os.mkdir(os.path.dirname(log_dir))
46 |       if not os.path.exists(log_dir):
47 |         os.mkdir(log_dir)
48 |     self.log = open(log_dir + '/log.txt', 'w')
49 |     try:
50 |       os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
51 |     except:
52 |       pass
53 |     self.start_line = True
54 | 
55 |   def write(self, txt):
56 |     if self.start_line:
57 |       time_str = time.strftime('%Y-%m-%d-%H-%M')
58 |       self.log.write('{}: {}'.format(time_str, txt))
59 |     else:
60 |       self.log.write(txt)  
61 |     self.start_line = False
62 |     if '\n' in txt:
63 |       self.start_line = True
64 |       self.log.flush()
65 |   
66 |   def close(self):
67 |     self.log.close()
68 |   
69 |   def scalar_summary(self, tag, value, step):
70 |     """Log a scalar variable."""
71 |     if USE_TENSORBOARD:
72 |       self.writer.add_scalar(tag, value, step)
73 | 


--------------------------------------------------------------------------------
/src/lib/models/data_parallel.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn.modules import Module
  3 | from torch.nn.parallel.scatter_gather import gather
  4 | from torch.nn.parallel.replicate import replicate
  5 | from torch.nn.parallel.parallel_apply import parallel_apply
  6 | 
  7 | 
  8 | from .scatter_gather import scatter_kwargs
  9 | 
 10 | class _DataParallel(Module):
 11 |     r"""Implements data parallelism at the module level.
 12 | 
 13 |     This container parallelizes the application of the given module by
 14 |     splitting the input across the specified devices by chunking in the batch
 15 |     dimension. In the forward pass, the module is replicated on each device,
 16 |     and each replica handles a portion of the input. During the backwards
 17 |     pass, gradients from each replica are summed into the original module.
 18 | 
 19 |     The batch size should be larger than the number of GPUs used. It should
 20 |     also be an integer multiple of the number of GPUs so that each chunk is the
 21 |     same size (so that each GPU processes the same number of samples).
 22 | 
 23 |     See also: :ref:`cuda-nn-dataparallel-instead`
 24 | 
 25 |     Arbitrary positional and keyword inputs are allowed to be passed into
 26 |     DataParallel EXCEPT Tensors. All variables will be scattered on dim
 27 |     specified (default 0). Primitive types will be broadcasted, but all
 28 |     other types will be a shallow copy and can be corrupted if written to in
 29 |     the model's forward pass.
 30 | 
 31 |     Args:
 32 |         module: module to be parallelized
 33 |         device_ids: CUDA devices (default: all devices)
 34 |         output_device: device location of output (default: device_ids[0])
 35 | 
 36 |     Example::
 37 | 
 38 |         >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
 39 |         >>> output = net(input_var)
 40 |     """
 41 | 
 42 |     # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
 43 | 
 44 |     def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
 45 |         super(_DataParallel, self).__init__()
 46 | 
 47 |         if not torch.cuda.is_available():
 48 |             self.module = module
 49 |             self.device_ids = []
 50 |             return
 51 | 
 52 |         if device_ids is None:
 53 |             device_ids = list(range(torch.cuda.device_count()))
 54 |         if output_device is None:
 55 |             output_device = device_ids[0]
 56 |         self.dim = dim
 57 |         self.module = module
 58 |         self.device_ids = device_ids
 59 |         self.chunk_sizes = chunk_sizes
 60 |         self.output_device = output_device
 61 |         if len(self.device_ids) == 1:
 62 |             self.module.cuda(device_ids[0])
 63 | 
 64 |     def forward(self, *inputs, **kwargs):
 65 |         if not self.device_ids:
 66 |             return self.module(*inputs, **kwargs)
 67 |         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
 68 |         if len(self.device_ids) == 1:
 69 |             return self.module(*inputs[0], **kwargs[0])
 70 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
 71 |         outputs = self.parallel_apply(replicas, inputs, kwargs)
 72 |         return self.gather(outputs, self.output_device)
 73 | 
 74 |     def replicate(self, module, device_ids):
 75 |         return replicate(module, device_ids)
 76 | 
 77 |     def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
 78 |         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
 79 | 
 80 |     def parallel_apply(self, replicas, inputs, kwargs):
 81 |         return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
 82 | 
 83 |     def gather(self, outputs, output_device):
 84 |         return gather(outputs, output_device, dim=self.dim)
 85 | 
 86 | 
 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
 88 |     r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
 89 | 
 90 |     This is the functional version of the DataParallel module.
 91 | 
 92 |     Args:
 93 |         module: the module to evaluate in parallel
 94 |         inputs: inputs to the module
 95 |         device_ids: GPU ids on which to replicate module
 96 |         output_device: GPU location of the output  Use -1 to indicate the CPU.
 97 |             (default: device_ids[0])
 98 |     Returns:
 99 |         a Variable containing the result of module(input) located on
100 |         output_device
101 |     """
102 |     if not isinstance(inputs, tuple):
103 |         inputs = (inputs,)
104 | 
105 |     if device_ids is None:
106 |         device_ids = list(range(torch.cuda.device_count()))
107 | 
108 |     if output_device is None:
109 |         output_device = device_ids[0]
110 | 
111 |     inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
112 |     if len(device_ids) == 1:
113 |         return module(*inputs[0], **module_kwargs[0])
114 |     used_device_ids = device_ids[:len(inputs)]
115 |     replicas = replicate(module, used_device_ids)
116 |     outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
117 |     return gather(outputs, output_device, dim)
118 | 
119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
120 |     if chunk_sizes is None:
121 |         return torch.nn.DataParallel(module, device_ids, output_device, dim)
122 |     standard_size = True
123 |     for i in range(1, len(chunk_sizes)):
124 |         if chunk_sizes[i] != chunk_sizes[0]:
125 |             standard_size = False
126 |     if standard_size:
127 |         return torch.nn.DataParallel(module, device_ids, output_device, dim)
128 |     return _DataParallel(module, device_ids, output_device, dim, chunk_sizes)


--------------------------------------------------------------------------------
/src/lib/models/losses.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Portions of this code are from
  3 | # CornerNet (https://github.com/princeton-vl/CornerNet)
  4 | # Copyright (c) 2018, University of Michigan
  5 | # Licensed under the BSD 3-Clause License
  6 | # ------------------------------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | from .utils import _transpose_and_gather_feat, _transpose_and_gather_feat_plus
 14 | import torch.nn.functional as F
 15 | 
 16 | 
 17 | def _slow_neg_loss(pred, gt):
 18 |   '''focal loss from CornerNet'''
 19 |   pos_inds = gt.eq(1)
 20 |   neg_inds = gt.lt(1)
 21 | 
 22 |   neg_weights = torch.pow(1 - gt[neg_inds], 4)
 23 | 
 24 |   loss = 0
 25 |   pos_pred = pred[pos_inds]
 26 |   neg_pred = pred[neg_inds]
 27 | 
 28 |   pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
 29 |   neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
 30 | 
 31 |   num_pos  = pos_inds.float().sum()
 32 |   pos_loss = pos_loss.sum()
 33 |   neg_loss = neg_loss.sum()
 34 | 
 35 |   if pos_pred.nelement() == 0:
 36 |     loss = loss - neg_loss
 37 |   else:
 38 |     loss = loss - (pos_loss + neg_loss) / num_pos
 39 |   return loss
 40 | 
 41 | 
 42 | def _neg_loss(pred, gt):
 43 |   ''' Modified focal loss. Exactly the same as CornerNet.
 44 |       Runs faster and costs a little bit more memory
 45 |     Arguments:
 46 |       pred (batch x c x h x w)
 47 |       gt_regr (batch x c x h x w)
 48 |   '''
 49 |   pos_inds = gt.eq(1).float()
 50 |   neg_inds = gt.lt(1).float()
 51 | 
 52 |   neg_weights = torch.pow(1 - gt, 4)
 53 | 
 54 |   loss = 0
 55 | 
 56 |   pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds
 57 |   neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
 58 | 
 59 |   num_pos  = pos_inds.float().sum()
 60 |   pos_loss = pos_loss.sum()
 61 |   neg_loss = neg_loss.sum()
 62 | 
 63 |   if num_pos == 0:
 64 |     loss = loss - neg_loss
 65 |   else:
 66 |     loss = loss - (pos_loss + neg_loss) / num_pos
 67 |   return loss
 68 | 
 69 | def _not_faster_neg_loss(pred, gt):
 70 |     pos_inds = gt.eq(1).float()
 71 |     neg_inds = gt.lt(1).float()    
 72 |     num_pos  = pos_inds.float().sum()
 73 |     neg_weights = torch.pow(1 - gt, 4)
 74 | 
 75 |     loss = 0
 76 |     trans_pred = pred * neg_inds + (1 - pred) * pos_inds
 77 |     weight = neg_weights * neg_inds + pos_inds
 78 |     all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight
 79 |     all_loss = all_loss.sum()
 80 | 
 81 |     if num_pos > 0:
 82 |         all_loss /= num_pos
 83 |     loss -=  all_loss
 84 |     return loss
 85 | 
 86 | def _slow_reg_loss(regr, gt_regr, mask):
 87 |     num  = mask.float().sum()
 88 |     mask = mask.unsqueeze(2).expand_as(gt_regr)
 89 | 
 90 |     regr    = regr[mask]
 91 |     gt_regr = gt_regr[mask]
 92 |     
 93 |     regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
 94 |     regr_loss = regr_loss / (num + 1e-4)
 95 |     return regr_loss
 96 | 
 97 | def _reg_loss(regr, gt_regr, mask):
 98 |   ''' L1 regression loss
 99 |     Arguments:
100 |       regr (batch x max_objects x dim)
101 |       gt_regr (batch x max_objects x dim)
102 |       mask (batch x max_objects)
103 |   '''
104 |   num = mask.float().sum()
105 |   mask = mask.unsqueeze(2).expand_as(gt_regr).float()
106 | 
107 |   regr = regr * mask
108 |   gt_regr = gt_regr * mask
109 |     
110 |   regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
111 |   regr_loss = regr_loss / (num + 1e-4)
112 |   return regr_loss
113 | 
114 | class FocalLoss(nn.Module):
115 |   '''nn.Module warpper for focal loss'''
116 |   def __init__(self):
117 |     super(FocalLoss, self).__init__()
118 |     self.neg_loss = _neg_loss
119 | 
120 |   def forward(self, out, target):
121 |     return self.neg_loss(out, target)
122 | 
123 | class RegLoss(nn.Module):
124 |   '''Regression loss for an output tensor
125 |     Arguments:
126 |       output (batch x dim x h x w)
127 |       mask (batch x max_objects)
128 |       ind (batch x max_objects)
129 |       target (batch x max_objects x dim)
130 |   '''
131 |   def __init__(self):
132 |     super(RegLoss, self).__init__()
133 |   
134 |   def forward(self, output, mask, ind, target):
135 |     pred = _transpose_and_gather_feat(output, ind)
136 |     loss = _reg_loss(pred, target, mask)
137 |     return loss
138 | 
139 | class RegL1Loss(nn.Module):
140 |   def __init__(self):
141 |     super(RegL1Loss, self).__init__()
142 |   
143 |   def forward(self, output, mask, ind, target):
144 |     num_joints = output.shape[1] // 2
145 |     pred = _transpose_and_gather_feat_plus(output, ind, num_joints)
146 |     mask = mask.unsqueeze(2).expand_as(pred).float()
147 |     # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
148 |     loss = F.l1_loss(pred * mask, target * mask, size_average=False)
149 |     loss = loss / (mask.sum() + 1e-4)
150 |     return loss
151 | 
152 | class NormRegL1Loss(nn.Module):
153 |   def __init__(self):
154 |     super(NormRegL1Loss, self).__init__()
155 |   
156 |   def forward(self, output, mask, ind, target):
157 |     pred = _transpose_and_gather_feat(output, ind)
158 |     mask = mask.unsqueeze(2).expand_as(pred).float()
159 |     # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
160 |     pred = pred / (target + 1e-4)
161 |     target = target * 0 + 1
162 |     loss = F.l1_loss(pred * mask, target * mask, size_average=False)
163 |     loss = loss / (mask.sum() + 1e-4)
164 |     return loss
165 | 
166 | class RegWeightedL1Loss(nn.Module):
167 |   def __init__(self):
168 |     super(RegWeightedL1Loss, self).__init__()
169 |   
170 |   def forward(self, output, mask, ind, target):
171 |     pred = _transpose_and_gather_feat(output, ind)
172 |     mask = mask.float()
173 |     # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
174 |     loss = F.l1_loss(pred * mask, target * mask, size_average=False)
175 |     loss = loss / (mask.sum() + 1e-4)
176 |     return loss
177 | 
178 | class L1Loss(nn.Module):
179 |   def __init__(self):
180 |     super(L1Loss, self).__init__()
181 |   
182 |   def forward(self, output, mask, ind, target):
183 |     pred = _transpose_and_gather_feat(output, ind)
184 |     mask = mask.unsqueeze(2).expand_as(pred).float()
185 |     loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
186 |     return loss
187 | 
188 | class BinRotLoss(nn.Module):
189 |   def __init__(self):
190 |     super(BinRotLoss, self).__init__()
191 |   
192 |   def forward(self, output, mask, ind, rotbin, rotres):
193 |     pred = _transpose_and_gather_feat(output, ind)
194 |     loss = compute_rot_loss(pred, rotbin, rotres, mask)
195 |     return loss
196 | 
197 | def compute_res_loss(output, target):
198 |     return F.smooth_l1_loss(output, target, reduction='elementwise_mean')
199 | 
200 | # TODO: weight
201 | def compute_bin_loss(output, target, mask):
202 |     mask = mask.expand_as(output)
203 |     output = output * mask.float()
204 |     return F.cross_entropy(output, target, reduction='elementwise_mean')
205 | 
206 | def compute_rot_loss(output, target_bin, target_res, mask):
207 |     # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 
208 |     #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
209 |     # target_bin: (B, 128, 2) [bin1_cls, bin2_cls]
210 |     # target_res: (B, 128, 2) [bin1_res, bin2_res]
211 |     # mask: (B, 128, 1)
212 |     # import pdb; pdb.set_trace()
213 |     output = output.view(-1, 8)
214 |     target_bin = target_bin.view(-1, 2)
215 |     target_res = target_res.view(-1, 2)
216 |     mask = mask.view(-1, 1)
217 |     loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask)
218 |     loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask)
219 |     loss_res = torch.zeros_like(loss_bin1)
220 |     if target_bin[:, 0].nonzero().shape[0] > 0:
221 |         idx1 = target_bin[:, 0].nonzero()[:, 0]
222 |         valid_output1 = torch.index_select(output, 0, idx1.long())
223 |         valid_target_res1 = torch.index_select(target_res, 0, idx1.long())
224 |         loss_sin1 = compute_res_loss(
225 |           valid_output1[:, 2], torch.sin(valid_target_res1[:, 0]))
226 |         loss_cos1 = compute_res_loss(
227 |           valid_output1[:, 3], torch.cos(valid_target_res1[:, 0]))
228 |         loss_res += loss_sin1 + loss_cos1
229 |     if target_bin[:, 1].nonzero().shape[0] > 0:
230 |         idx2 = target_bin[:, 1].nonzero()[:, 0]
231 |         valid_output2 = torch.index_select(output, 0, idx2.long())
232 |         valid_target_res2 = torch.index_select(target_res, 0, idx2.long())
233 |         loss_sin2 = compute_res_loss(
234 |           valid_output2[:, 6], torch.sin(valid_target_res2[:, 1]))
235 |         loss_cos2 = compute_res_loss(
236 |           valid_output2[:, 7], torch.cos(valid_target_res2[:, 1]))
237 |         loss_res += loss_sin2 + loss_cos2
238 |     return loss_bin1 + loss_bin2 + loss_res
239 | 


--------------------------------------------------------------------------------
/src/lib/models/model.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from typing import Dict
 5 | 
 6 | import torchvision.models as models
 7 | import torch
 8 | import torch.nn as nn
 9 | import os
10 | 
11 | from .networks.movenet import get_pose_net as get_move_net
12 | 
13 | _model_factory = {
14 |     'movenet': get_move_net
15 | }
16 | 
17 | 
18 | def create_model(arch, heads, head_conv, froze_backbone):
19 |     arch = arch[:arch.find('_')] if '_' in arch else arch
20 |     get_model = _model_factory[arch]
21 |     model = get_model(heads=heads, head_conv=head_conv, froze_backbone=froze_backbone, model_type = 'thunder')
22 |     return model
23 | 
24 | 
25 | def load_model(model, model_path, optimizer=None, resume=False,
26 |                lr=None, lr_step=None):
27 |     start_epoch = 0
28 |     checkpoint = torch.load(
29 |         model_path, map_location=lambda storage, loc: storage)
30 |     # print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
31 |     if 'state_dict' in checkpoint.keys():
32 |         state_dict = checkpoint['state_dict']
33 |     else:
34 |         state_dict = checkpoint
35 | 
36 |     model_state_dict = model.state_dict()
37 | 
38 |     # check loaded parameters and created model parameters
39 |     msg = 'If you see this, your model does not fully load the ' + \
40 |           'pre-trained weight. Please make sure ' + \
41 |           'you have correctly specified --arch xxx ' + \
42 |           'or set the correct --num_classes for your own dataset.'
43 |     for k in state_dict:
44 |         if k in model_state_dict:
45 |             if state_dict[k].shape != model_state_dict[k].shape:
46 |                 print('Skip loading parameter {}, required shape{}, '
47 |                       'loaded shape{}. {}'.format(
48 |                           k, model_state_dict[k].shape, state_dict[k].shape, msg))
49 |                 state_dict[k] = model_state_dict[k]
50 |         else:
51 |             print('Drop parameter {}.'.format(k) + msg)
52 |     for k in model_state_dict:
53 |         if not (k in state_dict):
54 |             print('No param {}.'.format(k) + msg)
55 |             state_dict[k] = model_state_dict[k]
56 |     model.load_state_dict(state_dict, strict=False)
57 | 
58 |     # resume optimizer parameters
59 |     if optimizer is not None and resume:
60 |         if 'optimizer' in checkpoint:
61 |             optimizer.load_state_dict(checkpoint['optimizer'])
62 |             start_epoch = checkpoint['epoch']
63 |             start_lr = lr
64 |             for step in lr_step:
65 |                 if start_epoch >= step:
66 |                     start_lr *= 0.1
67 |             for param_group in optimizer.param_groups:
68 |                 param_group['lr'] = start_lr
69 |             print('Resumed optimizer with start lr', start_lr)
70 |         else:
71 |             print('No optimizer parameters in checkpoint.')
72 |     if optimizer is not None:
73 |         return model, optimizer, start_epoch
74 |     else:
75 |         return model
76 | 
77 | 
78 | def save_model(path, epoch, model, optimizer=None):
79 |     if isinstance(model, torch.nn.DataParallel):
80 |         state_dict = model.module.state_dict()
81 |     else:
82 |         state_dict = model.state_dict()
83 |     data = {'epoch': epoch,
84 |             'state_dict': state_dict}
85 |     if not (optimizer is None):
86 |         data['optimizer'] = optimizer.state_dict()
87 |     torch.save(data, path)
88 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/backbone_utils.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | import torch
  3 | from torch import nn
  4 | from .feature_pyramid_network import FeaturePyramidNetwork
  5 | 
  6 | 
  7 | from torchvision.ops import misc as misc_nn_ops
  8 | from torchvision.models._utils import IntermediateLayerGetter
  9 | # from torchvision.models import mobilenet
 10 | from .mobilenetv2 import mobilenet_v2
 11 | 
 12 | 
 13 | class BackboneWithFPN(nn.Module):
 14 |     """
 15 |     Adds a FPN on top of a model.
 16 |     Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
 17 |     extract a submodel that returns the feature maps specified in return_layers.
 18 |     The same limitations of IntermediatLayerGetter apply here.
 19 |     Args:
 20 |         backbone (nn.Module)
 21 |         return_layers (Dict[name, new_name]): a dict containing the names
 22 |             of the modules for which the activations will be returned as
 23 |             the key of the dict, and the value of the dict is the name
 24 |             of the returned activation (which the user can specify).
 25 |         in_channels_list (List[int]): number of channels for each feature map
 26 |             that is returned, in the order they are present in the OrderedDict
 27 |         out_channels (int): number of channels in the FPN.
 28 |     Attributes:
 29 |         out_channels (int): the number of channels in the FPN
 30 |     """
 31 |     def __init__(self, backbone, return_layers, in_channels_list, out_channels):
 32 |         super(BackboneWithFPN, self).__init__()
 33 | 
 34 | 
 35 |         self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
 36 |         self.fpn = FeaturePyramidNetwork(
 37 |             in_channels_list=in_channels_list,
 38 |             out_channels_list=[24, 32, 64, 64],
 39 |             fused_channels_list=[24, 24, 32],
 40 |         )
 41 |         self.out_channels = out_channels
 42 | 
 43 |     def forward(self, x):
 44 |         x = self.body(x)
 45 |         x = self.fpn(x)
 46 |         return x
 47 | 
 48 | 
 49 | def mobilenet_backbone(
 50 |     backbone_name, # discared as we always use mobilenet v2
 51 |     pretrained,
 52 |     fpn,
 53 |     norm_layer=misc_nn_ops.FrozenBatchNorm2d,
 54 |     trainable_layers=2,
 55 |     returned_layers=None,
 56 |     extra_blocks=None,
 57 |     model_type='lighting'
 58 | ):
 59 |     if model_type == 'lighting':
 60 |         inverted_residual_setting = [
 61 |             # t, c, n, s
 62 |             [1, 16, 1, 1],
 63 |             [6, 24, 2, 2],
 64 |             [6, 32, 3, 2],
 65 |             [6, 64, 4, 2],
 66 |             [6, 96, 3, 1],
 67 |             [6, 160, 3, 2],
 68 |             [6, 320, 1, 1],
 69 |         ]
 70 |     else:
 71 |         inverted_residual_setting = [
 72 |             # t, c, n, s
 73 |             [1, 32, 1, 1],
 74 |             [6, 40, 2, 2],
 75 |             [6, 56, 3, 2],
 76 |             [6, 112, 4, 2],
 77 |             [6, 168, 3, 1],
 78 |             [6, 280, 3, 2],
 79 |             [6, 560, 1, 1],
 80 |         ]
 81 | 
 82 |     backbone = mobilenet_v2(pretrained=pretrained, norm_layer=norm_layer, inverted_residual_setting = inverted_residual_setting).features
 83 |     # print("backbone: ", backbone)
 84 | 
 85 |     # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks.
 86 |     # The first and last blocks are always included because they are the C0 (conv1) and Cn.
 87 |     # mli: for mobilenet, the obtained stage_indices = [0, 2, 4, 7, 14, 18]
 88 |     # stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1]
 89 |     # mli: the following block indices refer to the last layer of each stage (s4, s8, s16, s32)
 90 |     # **This is wrong** stage_indices = [2, 4, 7, 14]
 91 |     stage_indices = [3, 6, 10, 18]
 92 |     num_stages = len(stage_indices)
 93 |     # print("# stages: ", num_stages)
 94 |     # print("Stage indicse: ", stage_indices)
 95 | 
 96 |     # find the index of the layer from which we wont freeze
 97 |     assert 0 <= trainable_layers <= num_stages
 98 |     freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers]
 99 | 
100 |     # mli: make all layers trainable.
101 |     # for b in backbone[:freeze_before]:
102 |     #     for parameter in b.parameters():
103 |     #         parameter.requires_grad_(False)
104 | 
105 |     out_channels = 24
106 |     if fpn:
107 |         # mli: remove the extra_blocks
108 |         # if extra_blocks is None:
109 |         #     extra_blocks = LastLevelMaxPool()
110 | 
111 |         if returned_layers is None:
112 |             returned_layers = list(range(num_stages))
113 |         assert min(returned_layers) >= 0 and max(returned_layers) < num_stages
114 |         return_layers = {f'{stage_indices[k]}': str(v) for v, k in enumerate(returned_layers)}
115 |         # print("Return layers: ", return_layers)
116 | 
117 |         in_channels_list = [backbone[stage_indices[i]].out_channels for i in returned_layers]
118 |         # print("in_channels_list", in_channels_list)
119 | 
120 |         return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
121 |     else:
122 |         m = nn.Sequential(
123 |             backbone,
124 |             # depthwise linear combination of channels to reduce their size
125 |             nn.Conv2d(backbone[-1].out_channels, out_channels, 1),
126 |         )
127 |         m.out_channels = out_channels
128 |         return m
129 | 
130 | '''
131 | # test the functionality
132 | if __name__=='__main__':
133 |     """
134 |     Constructs a specified MobileNet v2 backbone with FPN on top. Freezes the specified number of layers in the backbone.
135 | 
136 |     Examples::
137 | 
138 |         >>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
139 |         >>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3)
140 |         >>> # get some dummy image
141 |         >>> x = torch.rand(1,3,64,64)
142 |         >>> # compute the output
143 |         >>> output = backbone(x)
144 |         >>> print([(k, v.shape) for k, v in output.items()])
145 |         >>> # returns
146 |         >>>   [('0', torch.Size([1, 256, 16, 16])),
147 |         >>>    ('1', torch.Size([1, 256, 8, 8])),
148 |         >>>    ('2', torch.Size([1, 256, 4, 4])),
149 |         >>>    ('3', torch.Size([1, 256, 2, 2])),
150 |         >>>    ('pool', torch.Size([1, 256, 1, 1]))]
151 |     """
152 |     backbone = mobilenet_backbone('mobilenet_v2', fpn=True, pretrained=False, trainable_layers=3)
153 |     x = torch.rand(1,3,192,192)
154 |     # compute the output
155 |     output = backbone(x)
156 |     print('output shape: ', output.shape)
157 | '''
158 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/feature_pyramid_network.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Based on torchvision.ops.feature_pyramid_network.
  3 | In the original paper, they `fix the feature dimension (numbers of channels, denoted as d) in all the feature maps.`
  4 | However, by diving into the Movenet, I found out that the feature dimension is incrementally decreased, from 64 to 32 to 24. So I made the changes correspondingly.
  5 | '''
  6 | 
  7 | from collections import OrderedDict
  8 | 
  9 | import torch.nn.functional as F
 10 | from torch import nn, Tensor
 11 | 
 12 | from typing import Tuple, List, Dict, Optional
 13 | 
 14 | class SeperableConv(nn.Module):
 15 |     def __init__(
 16 |         self,
 17 |         inp: int,
 18 |         oup: int,
 19 |         activation_layer = None
 20 |     ) -> None:
 21 |         super(SeperableConv, self).__init__()
 22 | 
 23 |         if activation_layer is None:
 24 |             activation_layer = nn.ReLU
 25 | 
 26 |         hidden_dim = int(round(inp))
 27 | 
 28 |         layers: List[nn.Module] = []
 29 |         layers.extend([
 30 |             # dw
 31 |             nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, stride=1, padding=1, groups=hidden_dim, bias=True),
 32 |             # pw-linear
 33 |             nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, padding=0, bias=True),
 34 |             activation_layer(inplace=True),
 35 |         ])
 36 |         self.conv = nn.Sequential(*layers)
 37 |         self.out_channels = oup
 38 | 
 39 |     def forward(self, x: Tensor) -> Tensor:
 40 |         return self.conv(x)
 41 | 
 42 | 
 43 | class FeaturePyramidNetwork(nn.Module):
 44 |     """
 45 |     Module that adds a FPN from on top of a set of feature maps. This is based on
 46 |     `"Feature Pyramid Network for Object Detection" <https://arxiv.org/abs/1612.03144>`_.
 47 | 
 48 |     The feature maps are currently supposed to be in increasing depth
 49 |     order.
 50 | 
 51 |     The input to the model is expected to be an OrderedDict[Tensor], containing
 52 |     the feature maps on top of which the FPN will be added.
 53 | 
 54 |     Args:
 55 |         in_channels_list (list[int]): number of channels for each feature map that
 56 |             is passed to the module
 57 |         out_channels (int): number of channels of the FPN representation
 58 |         extra_blocks (ExtraFPNBlock or None): if provided, extra operations will
 59 |             be performed. It is expected to take the fpn features, the original
 60 |             features and the names of the original features as input, and returns
 61 |             a new list of feature maps and their corresponding names
 62 | 
 63 |     Examples::
 64 | 
 65 |         >>> m = torchvision.ops.FeaturePyramidNetwork([10, 20, 30], 5)
 66 |         >>> # get some dummy data
 67 |         >>> x = OrderedDict()
 68 |         >>> x['feat0'] = torch.rand(1, 10, 64, 64)
 69 |         >>> x['feat2'] = torch.rand(1, 20, 16, 16)
 70 |         >>> x['feat3'] = torch.rand(1, 30, 8, 8)
 71 |         >>> # compute the FPN on top of x
 72 |         >>> output = m(x)
 73 |         >>> print([(k, v.shape) for k, v in output.items()])
 74 |         >>> # returns
 75 |         >>>   [('feat0', torch.Size([1, 5, 64, 64])),
 76 |         >>>    ('feat2', torch.Size([1, 5, 16, 16])),
 77 |         >>>    ('feat3', torch.Size([1, 5, 8, 8]))]
 78 | 
 79 |     """
 80 |     def __init__(
 81 |         self,
 82 |         in_channels_list: List[int], # [24, 32, 64, 1280]
 83 |         out_channels_list: List[int], # [24, 32, 64, 64]
 84 |         fused_channels_list = List[int], # [24, 24, 32]
 85 |     ):
 86 |         super(FeaturePyramidNetwork, self).__init__()
 87 |         self.inner_blocks = nn.ModuleList()
 88 |         self.layer_blocks = nn.ModuleList()
 89 |         assert len(in_channels_list) == len(out_channels_list), 'The lengths of in_channels_list and out_channels_list should be equal.'
 90 |         for i in range(len(in_channels_list)):
 91 |             in_channels = in_channels_list[i]
 92 |             out_channels = out_channels_list[i]
 93 |             if in_channels == 0 or out_channels == 0:
 94 |                 raise ValueError("in_channels=0/out_channels=0 is currently not supported")
 95 |             inner_block_module = nn.Conv2d(in_channels, out_channels, 1)
 96 |             self.inner_blocks.append(inner_block_module)
 97 |             if i != len(in_channels_list) - 1:
 98 |                 fused_channels = fused_channels_list[i]
 99 |                 layer_block_module = SeperableConv(out_channels, fused_channels)
100 |                 self.layer_blocks.append(layer_block_module)
101 | 
102 |         # initialize parameters now to avoid modifying the initialization of top_blocks
103 |         for m in self.modules():
104 |             if isinstance(m, nn.Conv2d):
105 |                 nn.init.kaiming_uniform_(m.weight, a=1)
106 |                 nn.init.constant_(m.bias, 0)
107 | 
108 |     def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor:
109 |         """
110 |         This is equivalent to self.inner_blocks[idx](x),
111 |         but torchscript doesn't support this yet
112 |         """
113 |         num_blocks = len(self.inner_blocks)
114 |         if idx < 0:
115 |             idx += num_blocks
116 |         i = 0
117 |         out = x
118 |         for module in self.inner_blocks:
119 |             if i == idx:
120 |                 out = module(x)
121 |             i += 1
122 |         return out
123 | 
124 |     def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:
125 |         """
126 |         This is equivalent to self.layer_blocks[idx](x),
127 |         but torchscript doesn't support this yet
128 |         """
129 |         num_blocks = len(self.layer_blocks)
130 |         if idx < 0:
131 |             idx += num_blocks
132 |         i = 0
133 |         out = x
134 |         for module in self.layer_blocks:
135 |             if i == idx:
136 |                 out = module(x)
137 |             i += 1
138 |         return out
139 | 
140 |     def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:
141 |         """
142 |         Computes the FPN for a set of feature maps.
143 | 
144 |         Args:
145 |             x (OrderedDict[Tensor]): feature maps for each feature level.
146 | 
147 |         Returns:
148 |             results (Tensor): highest  maps after FPN layers.
149 |         """
150 |         # unpack OrderedDict into two lists for easier handling
151 |         names = list(x.keys())
152 |         x = list(x.values())
153 | 
154 |         last_inner = self.get_result_from_inner_blocks(x[-1], -1)
155 | 
156 | 
157 |         for idx in range(len(x)-2, -1, -1):
158 |             inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)
159 | 
160 |             # for pytorch inference
161 |             inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="bilinear", align_corners=False)
162 |             # for model convertion, please comment the above line and uncomment the following line.
163 |             # inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest")
164 |             last_inner = inner_lateral + inner_top_down
165 |             last_inner = self.get_result_from_layer_blocks(last_inner, idx)
166 |         
167 |         return last_inner
168 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch import Tensor
  4 | from torch.utils.model_zoo import load_url as load_state_dict_from_url
  5 | from typing import Callable, Any, Optional, List
  6 | 
  7 | 
  8 | __all__ = ['MobileNetV2', 'mobilenet_v2']
  9 | 
 10 | 
 11 | model_urls = {
 12 |     'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
 13 | }
 14 | 
 15 | 
 16 | def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int:
 17 |     """
 18 |     This function is taken from the original tf repo.
 19 |     It ensures that all layers have a channel number that is divisible by 8
 20 |     It can be seen here:
 21 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 22 |     """
 23 |     if min_value is None:
 24 |         min_value = divisor
 25 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 26 |     # Make sure that round down does not go down by more than 10%.
 27 |     if new_v < 0.9 * v:
 28 |         new_v += divisor
 29 |     return new_v
 30 | 
 31 | 
 32 | class ConvBNActivation(nn.Sequential):
 33 |     def __init__(
 34 |         self,
 35 |         in_planes: int,
 36 |         out_planes: int,
 37 |         kernel_size: int = 3,
 38 |         stride: int = 1,
 39 |         groups: int = 1,
 40 |         norm_layer: Optional[Callable[..., nn.Module]] = None,
 41 |         activation_layer: Optional[Callable[..., nn.Module]] = None,
 42 |         dilation: int = 1,
 43 |     ) -> None:
 44 |         padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2
 45 |         # if norm_layer is None:
 46 |         #     norm_layer = nn.BatchNorm2d
 47 |         if activation_layer is None:
 48 |             activation_layer = nn.ReLU6
 49 |         if stride == 2 and kernel_size == 3:
 50 |             super().__init__(
 51 |             nn.ZeroPad2d((0, 1, 0, 1)),
 52 |             nn.Conv2d(in_planes, out_planes, kernel_size, stride, 0, dilation=dilation, groups=groups,
 53 |                       bias=True),
 54 |             # norm_layer(out_planes),
 55 |             activation_layer(inplace=True)
 56 |         )
 57 |         else:
 58 |             super().__init__(
 59 |                 nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, dilation=dilation, groups=groups,
 60 |                         bias=True),
 61 |                 # norm_layer(out_planes),
 62 |                 activation_layer(inplace=True)
 63 |             )
 64 |         self.out_channels = out_planes
 65 | 
 66 | 
 67 | # necessary for backwards compatibility
 68 | ConvBNReLU = ConvBNActivation
 69 | 
 70 | 
 71 | class InvertedResidual(nn.Module):
 72 |     def __init__(
 73 |         self,
 74 |         inp: int,
 75 |         oup: int,
 76 |         stride: int,
 77 |         expand_ratio: int,
 78 |         norm_layer: Optional[Callable[..., nn.Module]] = None
 79 |     ) -> None:
 80 |         super(InvertedResidual, self).__init__()
 81 |         self.stride = stride
 82 |         assert stride in [1, 2]
 83 | 
 84 |         if norm_layer is None:
 85 |             norm_layer = nn.BatchNorm2d
 86 | 
 87 |         hidden_dim = int(round(inp * expand_ratio))
 88 |         self.use_res_connect = self.stride == 1 and inp == oup
 89 | 
 90 |         layers: List[nn.Module] = []
 91 |         if expand_ratio != 1:
 92 |             # pw
 93 |             layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
 94 |         
 95 |         layers.extend([
 96 |             # dw
 97 |             ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer),
 98 |             # pw-linear
 99 |             nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=True),
100 |             # norm_layer(oup),
101 |         ])
102 |         self.conv = nn.Sequential(*layers)
103 |         self.out_channels = oup
104 |         self._is_cn = stride > 1
105 | 
106 |     def forward(self, x: Tensor) -> Tensor:
107 |         if self.use_res_connect:
108 |             return x + self.conv(x)
109 |         else:
110 |             return self.conv(x)
111 | 
112 | 
113 | class MobileNetV2(nn.Module):
114 |     def __init__(
115 |         self,
116 |         num_classes: int = 1000,
117 |         width_mult: float = 1.0,
118 |         inverted_residual_setting: Optional[List[List[int]]] = None,
119 |         round_nearest: int = 8,
120 |         block: Optional[Callable[..., nn.Module]] = None,
121 |         norm_layer: Optional[Callable[..., nn.Module]] = None
122 |     ) -> None:
123 |         """
124 |         MobileNet V2 main class
125 | 
126 |         Args:
127 |             num_classes (int): Number of classes
128 |             width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
129 |             inverted_residual_setting: Network structure
130 |             round_nearest (int): Round the number of channels in each layer to be a multiple of this number
131 |             Set to 1 to turn off rounding
132 |             block: Module specifying inverted residual building block for mobilenet
133 |             norm_layer: Module specifying the normalization layer to use
134 | 
135 |         """
136 |         super(MobileNetV2, self).__init__()
137 | 
138 |         if block is None:
139 |             block = InvertedResidual
140 | 
141 |         if norm_layer is None:
142 |             norm_layer = nn.BatchNorm2d
143 | 
144 |         input_channel = 32 * 1.75
145 |         last_channel = 1280
146 | 
147 |         if inverted_residual_setting is None:
148 |             inverted_residual_setting = [
149 |                 # t, c, n, s
150 |                 [1, 16, 1, 1],
151 |                 [6, 24, 2, 2],
152 |                 [6, 32, 3, 2],
153 |                 [6, 64, 4, 2],
154 |                 [6, 96, 3, 1],
155 |                 [6, 160, 3, 2],
156 |                 [6, 320, 1, 1],
157 |             ]
158 | 
159 |         # only check the first element, assuming user knows t,c,n,s are required
160 |         if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
161 |             raise ValueError("inverted_residual_setting should be non-empty "
162 |                              "or a 4-element list, got {}".format(inverted_residual_setting))
163 | 
164 |         # building first layer
165 |         input_channel = _make_divisible(input_channel * width_mult, round_nearest)
166 |         self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
167 |         features: List[nn.Module] = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)]
168 |         # building inverted residual blocks
169 |         for t, c, n, s in inverted_residual_setting:
170 |             output_channel = _make_divisible(c * width_mult, round_nearest)
171 |             for i in range(n):
172 |                 stride = s if i == 0 else 1
173 |                 features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
174 |                 input_channel = output_channel
175 |         # building last several layers
176 |         features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer))
177 |         # make it nn.Sequential
178 |         self.features = nn.Sequential(*features)
179 | 
180 |         # building classifier
181 |         self.classifier = nn.Sequential(
182 |             nn.Dropout(0.2),
183 |             nn.Linear(self.last_channel, num_classes),
184 |         )
185 | 
186 |         # weight initialization
187 |         for m in self.modules():
188 |             if isinstance(m, nn.Conv2d):
189 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
190 |                 if m.bias is not None:
191 |                     nn.init.zeros_(m.bias)
192 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
193 |                 nn.init.ones_(m.weight)
194 |                 nn.init.zeros_(m.bias)
195 |             elif isinstance(m, nn.Linear):
196 |                 nn.init.normal_(m.weight, 0, 0.01)
197 |                 nn.init.zeros_(m.bias)
198 | 
199 |     def _forward_impl(self, x: Tensor) -> Tensor:
200 |         # This exists since TorchScript doesn't support inheritance, so the superclass method
201 |         # (this one) needs to have a name other than `forward` that can be accessed in a subclass
202 |         x = self.features(x)
203 |         # Cannot use "squeeze" as batch-size can be 1
204 |         x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
205 |         x = torch.flatten(x, 1)
206 |         x = self.classifier(x)
207 |         return x
208 | 
209 |     def forward(self, x: Tensor) -> Tensor:
210 |         return self._forward_impl(x)
211 | 
212 | 
213 | def mobilenet_v2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV2:
214 |     """
215 |     Constructs a MobileNetV2 architecture from
216 |     `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
217 | 
218 |     Args:
219 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
220 |         progress (bool): If True, displays a progress bar of the download to stderr
221 |     """
222 |     model = MobileNetV2(**kwargs)
223 |     if pretrained:
224 |         state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
225 |                                               progress=progress)
226 |         model.load_state_dict(state_dict)
227 |     return model
228 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/movenet.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # Modified by Dequan Wang and Xingyi Zhou
  6 | # Modified by Min Li
  7 | # ------------------------------------------------------------------------------
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | 
 13 | import os
 14 | import math
 15 | import logging
 16 | 
 17 | import cv2
 18 | 
 19 | from matplotlib import pyplot as plt
 20 | import numpy as np
 21 | import torch
 22 | import torch.nn as nn
 23 | from .backbone_utils import mobilenet_backbone
 24 | import torch.utils.model_zoo as model_zoo
 25 | 
 26 | BN_MOMENTUM = 0.1
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | 
 30 | class MoveNet(nn.Module):
 31 |     '''
 32 |     MoveNet from Goolge. Please refer their blog: https://blog.tensorflow.org/2021/05/next-generation-pose-detection-with-movenet-and-tensorflowjs.html
 33 | 
 34 |     '''
 35 |     def __init__(self, backbone, heads, head_conv, ft_size=48):
 36 |         super(MoveNet, self).__init__()
 37 |         self.num_joints = heads["hm_hp"]
 38 |         self.out_channels = 24
 39 |         self.backbone = backbone
 40 |         self.heads = heads
 41 |         self.ft_size = ft_size
 42 |         self.weight_to_center = self._generate_center_dist(self.ft_size).unsqueeze(2)
 43 |  
 44 |         self.dist_y, self.dist_x = self._generate_dist_map(self.ft_size)
 45 |         self.index_17 = torch.arange(0, self.num_joints).float()
 46 | 
 47 |         for head in self.heads:
 48 |             classes = self.heads[head]
 49 |             if head_conv > 0:
 50 |                 fc = nn.Sequential(
 51 |                   nn.Conv2d(self.out_channels, self.out_channels, 3, padding=1, groups=self.out_channels, bias=True),
 52 |                   nn.Conv2d(self.out_channels, head_conv, 1, 1, 0, bias=True),
 53 |                   nn.ReLU(inplace=True),
 54 |                   nn.Conv2d(head_conv, classes, 
 55 |                     kernel_size=1, stride=1, 
 56 |                     padding=0, bias=True))
 57 |             else:
 58 |                 fc = nn.Conv2d(64, classes, 
 59 |                   kernel_size=1, stride=1, 
 60 |                   padding=0, bias=True)
 61 |             self.__setattr__(head, fc)
 62 | 
 63 | 
 64 |     def forward(self, x):
 65 |         # conv forward
 66 |         # x  = x * 0.007843137718737125 - 1.0
 67 |         # specify the device
 68 |         device = x.device
 69 |         self.weight_to_center = self.weight_to_center.to(device)
 70 |         self.dist_y, self.dist_x = self.dist_y.to(device), self.dist_x.to(device)
 71 | 
 72 |         x = self.backbone(x)
 73 |         ret = {}
 74 |         for head in self.heads:
 75 |             ret[head] = self.__getattr__(head)(x)
 76 | 
 77 |         return [ret]
 78 | 
 79 |     def decode(self, x):
 80 |         kpt_heatmap, center, kpt_regress, kpt_offset = x['hm_hp'].squeeze(0).permute((1, 2, 0)), x['hm'].squeeze(0).permute((1, 2, 0)), x['hps'].squeeze(0).permute((1, 2, 0)), x['hp_offset'].squeeze(0).permute((1, 2, 0))
 81 | 
 82 |          # pose decode
 83 |         kpt_heatmap = torch.sigmoid(kpt_heatmap)
 84 |         center = torch.sigmoid(center)
 85 | 
 86 |         ct_ind = self._top_with_center(center)
 87 | 
 88 |         kpt_coor = self._center_to_kpt(kpt_regress, ct_ind)
 89 | 
 90 |         kpt_top_inds = self._kpt_from_heatmap(kpt_heatmap, kpt_coor)
 91 | 
 92 |         kpt_with_conf = self._kpt_from_offset(kpt_offset, kpt_top_inds, kpt_heatmap, self.ft_size)
 93 |         
 94 |         return kpt_with_conf
 95 | 
 96 |         
 97 |     def _draw(self, ft):
 98 |         plt.imshow(ft.numpy().reshape(self.ft_size, self.ft_size))
 99 |         plt.show()
100 | 
101 |     def _generate_center_dist(self, ft_size=48, delta=1.8):
102 |         weight_to_center = torch.zeros((int(ft_size), int(ft_size)))
103 |         y, x = np.ogrid[0:ft_size, 0:ft_size]
104 |         center_y, center_x = ft_size / 2.0, ft_size/ 2.0
105 |         y = y - center_y
106 |         x = x - center_x
107 |         weight_to_center = 1 / (np.sqrt(y * y + x * x) + delta)
108 |         weight_to_center = torch.from_numpy(weight_to_center)
109 |         return weight_to_center
110 | 
111 |     def _generate_dist_map(self, ft_size=48):
112 |         y, x = np.ogrid[0:ft_size, 0:ft_size]
113 |         y = torch.from_numpy(np.repeat(y, ft_size, axis=1)).unsqueeze(2).float()
114 |         x = torch.from_numpy(np.repeat(x, ft_size, axis=0)).unsqueeze(2).float()
115 | 
116 |         return y, x
117 | 
118 | 
119 |     def _top_with_center(self, center):
120 |         scores = center * self.weight_to_center
121 | 
122 |         top_ind = torch.argmax(scores.view(1, self.ft_size * self.ft_size, 1), dim=1)
123 |         return top_ind
124 | 
125 |     def _center_to_kpt(self, kpt_regress, ct_ind, ft_size=48):
126 |         ct_y = torch.div(ct_ind, ft_size, rounding_mode='floor')
127 |         # ct_y = (ct_ind.float() / ft_size).int().float()
128 |         ct_x = ct_ind - ct_y * ft_size
129 | 
130 |         kpt_regress = kpt_regress.view(-1, self.num_joints, 2)
131 |         ct_ind = ct_ind.unsqueeze(2).expand(ct_ind.size(0), self.num_joints, 2)
132 |         kpt_coor = kpt_regress.gather(0, ct_ind).squeeze(0)
133 |         
134 |         kpt_coor = kpt_coor + torch.cat((ct_y, ct_x), dim=1)
135 |         
136 |         return kpt_coor
137 | 
138 |     def _kpt_from_heatmap(self, kpt_heatmap, kpt_coor):
139 |         y = self.dist_y - kpt_coor[:, 0].reshape(1, 1, self.num_joints)
140 |         x = self.dist_x - kpt_coor[:, 1].reshape(1, 1, self.num_joints)
141 |         dist_weight = torch.sqrt(y * y + x * x) + 1.8
142 |         
143 |         scores = kpt_heatmap / dist_weight
144 |         scores = scores.reshape((1, self.ft_size * self.ft_size, self.num_joints))
145 |         top_inds = torch.argmax(scores, dim=1)
146 |         
147 |         return top_inds
148 |     
149 |     def _kpt_from_offset(self, kpt_offset, kpt_top_inds, kpt_heatmap, size=48):
150 |         kpts_ys = torch.div(kpt_top_inds, size, rounding_mode='floor')
151 |         # kpts_ys = (kpt_top_inds.float() / size).int().float()
152 |         kpts_xs = kpt_top_inds - kpts_ys * size
153 |         kpt_coordinate = torch.stack((kpts_ys.squeeze(0), kpts_xs.squeeze(0)), dim=1)
154 | 
155 |         kpt_heatmap = kpt_heatmap.view(-1, self.num_joints)
156 |         kpt_conf = kpt_heatmap.gather(0, kpt_top_inds).squeeze(0)
157 | 
158 |         kpt_offset = kpt_offset.view(-1, self.num_joints, 2)
159 |         kpt_top_inds = kpt_top_inds.unsqueeze(2).expand(kpt_top_inds.size(0), self.num_joints, 2)
160 |         kpt_offset_yx = kpt_offset.gather(0, kpt_top_inds).squeeze(0)
161 | 
162 |         kpt_coordinate= (kpt_offset_yx + kpt_coordinate) * (1/size)
163 |         kpt_with_conf = torch.cat([kpt_coordinate, kpt_conf.unsqueeze(1)], dim=1).reshape((1, 1, self.num_joints, 3))
164 | 
165 |         return kpt_with_conf
166 | 
167 | 
168 | 
169 | 
170 | # def get_pose_net(heads, head_conv=96, froze_backbone=True):
171 | #     backbone = mobilenet_backbone('mobilenet_v2', pretrained=False, fpn=True)
172 | #     if froze_backbone:
173 | #         for param in backbone.parameters():
174 | #             param.requires_grad = False
175 | #     model = MoveNet(backbone, heads, head_conv=head_conv)
176 | #     return model
177 | 
178 | def get_pose_net(heads, head_conv=96, froze_backbone=True, model_type = 'lighting'):
179 |     backbone = mobilenet_backbone('mobilenet_v2', pretrained=False, fpn=True, trainable_layers=0, model_type = model_type)
180 |     if froze_backbone:
181 |         for param in backbone.parameters():
182 |             param.requires_grad = False
183 |     if model_type == 'lighting':
184 |         ft_size = 48
185 |     else:
186 |         ft_size = 64
187 |     model = MoveNet(backbone, heads, head_conv=head_conv, ft_size = ft_size)
188 |     # froze
189 |     '''for k,v in model.named_parameters():
190 |         head_name = k.split('.')[0]
191 |         if head_name == 'hm' or head_name == 'hps':
192 |             v.requires_grad = False'''
193 | 
194 |     return model


--------------------------------------------------------------------------------
/src/lib/models/scatter_gather.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.nn.parallel._functions import Scatter, Gather
 4 | 
 5 | 
 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
 7 |     r"""
 8 |     Slices variables into approximately equal chunks and
 9 |     distributes them across given GPUs. Duplicates
10 |     references to objects that are not variables. Does not
11 |     support Tensors.
12 |     """
13 |     def scatter_map(obj):
14 |         if isinstance(obj, Variable):
15 |             return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
16 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
17 |         if isinstance(obj, tuple):
18 |             return list(zip(*map(scatter_map, obj)))
19 |         if isinstance(obj, list):
20 |             return list(map(list, zip(*map(scatter_map, obj))))
21 |         if isinstance(obj, dict):
22 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
23 |         return [obj for targets in target_gpus]
24 | 
25 |     return scatter_map(inputs)
26 | 
27 | 
28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
29 |     r"""Scatter with support for kwargs dictionary"""
30 |     inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
31 |     kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
32 |     if len(inputs) < len(kwargs):
33 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
34 |     elif len(kwargs) < len(inputs):
35 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
36 |     inputs = tuple(inputs)
37 |     kwargs = tuple(kwargs)
38 |     return inputs, kwargs
39 | 


--------------------------------------------------------------------------------
/src/lib/models/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | def _sigmoid(x):
10 |     y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
11 |     return y
12 | 
13 | 
14 | def _gather_feat(feat, ind, mask=None):
15 |     dim = feat.size(2)
16 |     ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
17 |     feat = feat.gather(1, ind)
18 |     if mask is not None:
19 |         mask = mask.unsqueeze(2).expand_as(feat)
20 |         feat = feat[mask]
21 |         feat = feat.view(-1, dim)
22 |     return feat
23 | 
24 | 
25 | def _gather_feat_plus(feat, ind, num_joints):
26 |     # num_objs = ind.size(1) / 17
27 |     ind = ind.view(ind.size(0), -1, num_joints)
28 |     ind = ind.unsqueeze(3).expand(ind.size(0), ind.size(1), ind.size(2), 2)
29 |     feat = feat.gather(1, ind)
30 |     return feat
31 | 
32 | 
33 | def _transpose_and_gather_feat(feat, ind):
34 |     feat = feat.permute(0, 2, 3, 1).contiguous()
35 |     feat = feat.view(feat.size(0), -1, feat.size(3))
36 |     feat = _gather_feat(feat, ind)
37 |     return feat
38 | 
39 | def _transpose_and_gather_feat_plus(feat, ind, num_joints):
40 |     feat = feat.permute(0, 2, 3, 1).contiguous()
41 |     feat = feat.view(feat.size(0), -1, num_joints, 2)
42 |     feat = _gather_feat_plus(feat, ind, num_joints)
43 |     feat = feat.view(feat.size(0), -1, 2)
44 |     return feat
45 | 
46 | 
47 | def flip_tensor(x):
48 |     return torch.flip(x, [3])
49 |     # tmp = x.detach().cpu().numpy()[..., ::-1].copy()
50 |     # return torch.from_numpy(tmp).to(x.device)
51 | 
52 | 
53 | def flip_lr(x, flip_idx):
54 |     tmp = x.detach().cpu().numpy()[..., ::-1].copy()
55 |     shape = tmp.shape
56 |     for e in flip_idx:
57 |         tmp[:, e[0], ...], tmp[:, e[1], ...] = \
58 |             tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
59 |     return torch.from_numpy(tmp.reshape(shape)).to(x.device)
60 | 
61 | 
62 | def flip_lr_off(x, flip_idx, num_joints):
63 |     tmp = x.detach().cpu().numpy()[..., ::-1].copy()
64 |     shape = tmp.shape
65 |     tmp = tmp.reshape(tmp.shape[0], num_joints, 2,
66 |                       tmp.shape[2], tmp.shape[3])
67 |     tmp[:, :, 0, :, :] *= -1
68 |     for e in flip_idx:
69 |         tmp[:, e[0], ...], tmp[:, e[1], ...] = \
70 |             tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
71 |     return torch.from_numpy(tmp.reshape(shape)).to(x.device)
72 | 


--------------------------------------------------------------------------------
/src/lib/trains/base_trainer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import time
  6 | import torch
  7 | from progress.bar import Bar
  8 | from models.data_parallel import DataParallel
  9 | from utils.utils import AverageMeter
 10 | 
 11 | 
 12 | class ModelWithLoss(torch.nn.Module):
 13 |   def __init__(self, model, loss):
 14 |     super(ModelWithLoss, self).__init__()
 15 |     self.model = model
 16 |     self.loss = loss
 17 |   
 18 |   def forward(self, batch):
 19 |     outputs = self.model(batch['input'])
 20 |     loss, loss_stats = self.loss(outputs, batch)
 21 |     return outputs[-1], loss, loss_stats
 22 | 
 23 | class BaseTrainer(object):
 24 |   def __init__(
 25 |     self, opt, model, optimizer=None):
 26 |     self.opt = opt
 27 |     self.optimizer = optimizer
 28 |     self.loss_stats, self.loss = self._get_losses(opt)
 29 |     self.model_with_loss = ModelWithLoss(model, self.loss)
 30 | 
 31 |   def set_device(self, gpus, chunk_sizes, device):
 32 |     if len(gpus) > 1:
 33 |       self.model_with_loss = DataParallel(
 34 |         self.model_with_loss, device_ids=gpus, 
 35 |         chunk_sizes=chunk_sizes).to(device)
 36 |     else:
 37 |       self.model_with_loss = self.model_with_loss.to(device)
 38 |     
 39 |     for state in self.optimizer.state.values():
 40 |       for k, v in state.items():
 41 |         if isinstance(v, torch.Tensor):
 42 |           state[k] = v.to(device=device, non_blocking=True)
 43 | 
 44 |   def run_epoch(self, phase, epoch, data_loader):
 45 |     model_with_loss = self.model_with_loss
 46 |     if phase == 'train':
 47 |       model_with_loss.train()
 48 |     else:
 49 |       if len(self.opt.gpus) > 1:
 50 |         model_with_loss = self.model_with_loss.module
 51 |       model_with_loss.eval()
 52 |       torch.cuda.empty_cache()
 53 | 
 54 |     opt = self.opt
 55 |     results = {}
 56 |     data_time, batch_time = AverageMeter(), AverageMeter()
 57 |     avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
 58 |     num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
 59 |     bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters)
 60 |     end = time.time()
 61 |     for iter_id, batch in enumerate(data_loader):
 62 |       if iter_id >= num_iters:
 63 |         break
 64 |       data_time.update(time.time() - end)
 65 | 
 66 |       for k in batch:
 67 |         if k != 'meta':
 68 |           batch[k] = batch[k].to(device=opt.device, non_blocking=True)    
 69 |       output, loss, loss_stats = model_with_loss(batch)
 70 |       loss = loss.mean()
 71 |       if phase == 'train':
 72 |         self.optimizer.zero_grad()
 73 |         loss.backward()
 74 |         self.optimizer.step()
 75 |       batch_time.update(time.time() - end)
 76 |       end = time.time()
 77 | 
 78 |       Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
 79 |         epoch, iter_id, num_iters, phase=phase,
 80 |         total=bar.elapsed_td, eta=bar.eta_td)
 81 |       for l in avg_loss_stats:
 82 |         avg_loss_stats[l].update(
 83 |           loss_stats[l].mean().item(), batch['input'].size(0))
 84 |         Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg)
 85 |       if not opt.hide_data_time:
 86 |         Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
 87 |           '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
 88 |       if opt.print_iter > 0:
 89 |         if iter_id % opt.print_iter == 0:
 90 |           print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) 
 91 |       else:
 92 |         bar.next()
 93 |       
 94 |       if opt.debug > 0:
 95 |         self.debug(batch, output, iter_id)
 96 |       
 97 |       if opt.test:
 98 |         self.save_result(output, batch, results)
 99 |       del output, loss, loss_stats
100 |     
101 |     bar.finish()
102 |     ret = {k: v.avg for k, v in avg_loss_stats.items()}
103 |     ret['time'] = bar.elapsed_td.total_seconds() / 60.
104 |     return ret, results
105 |   
106 |   def debug(self, batch, output, iter_id):
107 |     raise NotImplementedError
108 | 
109 |   def save_result(self, output, batch, results):
110 |     raise NotImplementedError
111 | 
112 |   def _get_losses(self, opt):
113 |     raise NotImplementedError
114 |   
115 |   def val(self, epoch, data_loader):
116 |     return self.run_epoch('val', epoch, data_loader)
117 | 
118 |   def train(self, epoch, data_loader):
119 |     return self.run_epoch('train', epoch, data_loader)


--------------------------------------------------------------------------------
/src/lib/trains/multi_pose.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | 
  8 | from models.losses import FocalLoss, RegL1Loss, RegLoss, RegWeightedL1Loss
  9 | from models.decode import multi_pose_decode
 10 | from models.utils import _sigmoid, flip_tensor, flip_lr_off, flip_lr
 11 | from utils.debugger import Debugger
 12 | from utils.post_process import multi_pose_post_process
 13 | from utils.oracle_utils import gen_oracle_map
 14 | from .base_trainer import BaseTrainer
 15 | 
 16 | class MultiPoseLoss(torch.nn.Module):
 17 |   def __init__(self, opt):
 18 |     super(MultiPoseLoss, self).__init__()
 19 |     self.crit = FocalLoss()
 20 |     self.crit_hm_hp = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
 21 |     self.crit_kp = RegWeightedL1Loss() if not opt.dense_hp else \
 22 |                    torch.nn.L1Loss(reduction='sum')
 23 |     self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \
 24 |                     RegLoss() if opt.reg_loss == 'sl1' else None
 25 |     self.opt = opt
 26 | 
 27 |   def forward(self, outputs, batch):
 28 |     opt = self.opt
 29 |     hm_loss, wh_loss, off_loss = 0, 0, 0
 30 |     hp_loss, off_loss, hm_hp_loss, hp_offset_loss = 0, 0, 0, 0
 31 |     for s in range(opt.num_stacks):
 32 |       output = outputs[s]
 33 |       output['hm'] = _sigmoid(output['hm'])
 34 |       if opt.hm_hp and not opt.mse_loss:
 35 |         output['hm_hp'] = _sigmoid(output['hm_hp'])
 36 |       
 37 |       if opt.eval_oracle_hmhp:
 38 |         output['hm_hp'] = batch['hm_hp']
 39 |       if opt.eval_oracle_hm:
 40 |         output['hm'] = batch['hm']
 41 |       if opt.eval_oracle_kps:
 42 |         if opt.dense_hp:
 43 |           output['hps'] = batch['dense_hps']
 44 |         else:
 45 |           output['hps'] = torch.from_numpy(gen_oracle_map(
 46 |             batch['hps'].detach().cpu().numpy(), 
 47 |             batch['ind'].detach().cpu().numpy(), 
 48 |             opt.output_res, opt.output_res)).to(opt.device)
 49 |       if opt.eval_oracle_hp_offset:
 50 |         output['hp_offset'] = torch.from_numpy(gen_oracle_map(
 51 |           batch['hp_offset'].detach().cpu().numpy(), 
 52 |           batch['hp_ind'].detach().cpu().numpy(), 
 53 |           opt.output_res, opt.output_res)).to(opt.device)
 54 | 
 55 | 
 56 |       hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks
 57 |       if opt.dense_hp:
 58 |         mask_weight = batch['dense_hps_mask'].sum() + 1e-4
 59 |         hp_loss += (self.crit_kp(output['hps'] * batch['dense_hps_mask'], 
 60 |                                  batch['dense_hps'] * batch['dense_hps_mask']) / 
 61 |                                  mask_weight) / opt.num_stacks
 62 |       else:
 63 |         hp_loss += self.crit_kp(output['hps'], batch['hps_mask'], 
 64 |                                 batch['ind'], batch['hps']) / opt.num_stacks
 65 |       if opt.wh_weight > 0:
 66 |         wh_loss += self.crit_reg(output['wh'], batch['reg_mask'],
 67 |                                  batch['ind'], batch['wh']) / opt.num_stacks
 68 |       if opt.reg_offset and opt.off_weight > 0:
 69 |         off_loss += self.crit_reg(output['reg'], batch['reg_mask'],
 70 |                                   batch['ind'], batch['reg']) / opt.num_stacks
 71 |       if opt.reg_hp_offset and opt.off_weight > 0:
 72 |         hp_offset_loss += self.crit_reg(
 73 |           output['hp_offset'], batch['hp_mask'],
 74 |           batch['hp_ind'], batch['hp_offset']) / opt.num_stacks
 75 |       if opt.hm_hp and opt.hm_hp_weight > 0:
 76 |         hm_hp_loss += self.crit_hm_hp(
 77 |           output['hm_hp'], batch['hm_hp']) / opt.num_stacks
 78 |     loss = opt.hm_weight * hm_loss + opt.wh_weight * wh_loss + \
 79 |            opt.off_weight * off_loss + opt.hp_weight * hp_loss + \
 80 |            opt.hm_hp_weight * hm_hp_loss + opt.off_weight * hp_offset_loss
 81 |     
 82 |     loss_stats = {'loss': loss, 'hm_loss': hm_loss, 'hp_loss': hp_loss, 
 83 |                   'hm_hp_loss': hm_hp_loss, 'hp_offset_loss': hp_offset_loss,
 84 |                   'wh_loss': wh_loss, 'off_loss': off_loss}
 85 |     return loss, loss_stats
 86 | 
 87 | class MultiPoseTrainer(BaseTrainer):
 88 |   def __init__(self, opt, model, optimizer=None):
 89 |     super(MultiPoseTrainer, self).__init__(opt, model, optimizer=optimizer)
 90 |   
 91 |   def _get_losses(self, opt):
 92 |     loss_states = ['loss', 'hm_loss', 'hp_loss', 'hm_hp_loss', 
 93 |                    'hp_offset_loss', 'wh_loss', 'off_loss']
 94 |     loss = MultiPoseLoss(opt)
 95 |     return loss_states, loss
 96 | 
 97 |   def debug(self, batch, output, iter_id):
 98 |     opt = self.opt
 99 |     reg = output['reg'] if opt.reg_offset else None
100 |     hm_hp = output['hm_hp'] if opt.hm_hp else None
101 |     hp_offset = output['hp_offset'] if opt.reg_hp_offset else None
102 |     dets = multi_pose_decode(
103 |       output['hm'], output['wh'], output['hps'], 
104 |       reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=opt.K)
105 |     dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
106 | 
107 |     dets[:, :, :4] *= opt.input_res / opt.output_res
108 |     dets[:, :, 5:39] *= opt.input_res / opt.output_res
109 |     dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
110 |     dets_gt[:, :, :4] *= opt.input_res / opt.output_res
111 |     dets_gt[:, :, 5:39] *= opt.input_res / opt.output_res
112 |     for i in range(1):
113 |       debugger = Debugger(
114 |         dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme)
115 |       img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
116 |       img = np.clip(((
117 |         img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8)
118 |       pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
119 |       gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
120 |       debugger.add_blend_img(img, pred, 'pred_hm')
121 |       debugger.add_blend_img(img, gt, 'gt_hm')
122 | 
123 |       debugger.add_img(img, img_id='out_pred')
124 |       for k in range(len(dets[i])):
125 |         if dets[i, k, 4] > opt.center_thresh:
126 |           debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
127 |                                  dets[i, k, 4], img_id='out_pred')
128 |           debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred')
129 | 
130 |       debugger.add_img(img, img_id='out_gt')
131 |       for k in range(len(dets_gt[i])):
132 |         if dets_gt[i, k, 4] > opt.center_thresh:
133 |           debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
134 |                                  dets_gt[i, k, 4], img_id='out_gt')
135 |           debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt')
136 | 
137 |       if opt.hm_hp:
138 |         pred = debugger.gen_colormap_hp(output['hm_hp'][i].detach().cpu().numpy())
139 |         gt = debugger.gen_colormap_hp(batch['hm_hp'][i].detach().cpu().numpy())
140 |         debugger.add_blend_img(img, pred, 'pred_hmhp')
141 |         debugger.add_blend_img(img, gt, 'gt_hmhp')
142 | 
143 |       if opt.debug == 4:
144 |         debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id))
145 |       else:
146 |         debugger.show_all_imgs(pause=True)
147 | 
148 |   def save_result(self, output, batch, results):
149 |     reg = output['reg'] if self.opt.reg_offset else None
150 |     hm_hp = output['hm_hp'] if self.opt.hm_hp else None
151 |     hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None
152 |     dets = multi_pose_decode(
153 |       output['hm'], output['wh'], output['hps'], 
154 |       reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K)
155 |     dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
156 |     
157 |     dets_out = multi_pose_post_process(
158 |       dets.copy(), batch['meta']['c'].cpu().numpy(),
159 |       batch['meta']['s'].cpu().numpy(),
160 |       output['hm'].shape[2], output['hm'].shape[3])
161 |     results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
162 | 


--------------------------------------------------------------------------------
/src/lib/trains/single_pose.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | 
  8 | from models.losses import FocalLoss, RegL1Loss, RegLoss, RegWeightedL1Loss
  9 | from models.decode import multi_pose_decode
 10 | from models.utils import _sigmoid, flip_tensor, flip_lr_off, flip_lr
 11 | from utils.debugger import Debugger
 12 | from utils.post_process import multi_pose_post_process
 13 | from utils.oracle_utils import gen_oracle_map
 14 | from .base_trainer import BaseTrainer
 15 | 
 16 | 
 17 | class SinglePoseLoss(torch.nn.Module):
 18 |     '''
 19 |     Same as MultiPoseLoss.
 20 |     Modified by Min LI to support (17 * 2) human pose local offset loss setting.
 21 |     '''
 22 | 
 23 |     def __init__(self, opt):
 24 |         super(SinglePoseLoss, self).__init__()
 25 |         self.crit = FocalLoss()
 26 |         self.crit_hm_hp = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
 27 |         self.crit_kp = RegWeightedL1Loss()
 28 |         self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \
 29 |             RegLoss() if opt.reg_loss == 'sl1' else None
 30 |         self.opt = opt
 31 | 
 32 |     def forward(self, outputs, batch):
 33 |         opt = self.opt
 34 |         hm_loss, hp_loss, hm_hp_loss, hp_offset_loss = 0, 0, 0, 0
 35 |         for s in range(opt.num_stacks):
 36 |             output = outputs[s]
 37 |             output['hm'] = _sigmoid(output['hm'])
 38 |             output['hm_hp'] = _sigmoid(output['hm_hp'])
 39 | 
 40 |             hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks
 41 | 
 42 |             hp_loss += self.crit_kp(output['hps'], batch['hps_mask'],
 43 |                                     batch['ind'], batch['hps']) / opt.num_stacks
 44 |             hp_offset_loss += self.crit_reg(
 45 |                 output['hp_offset'], batch['hp_mask'],
 46 |                 batch['hp_ind'], batch['hp_offset']) / opt.num_stacks
 47 |             hm_hp_loss += self.crit_hm_hp(
 48 |                 output['hm_hp'], batch['hm_hp']) / opt.num_stacks
 49 |         loss = opt.hm_weight * hm_loss + \
 50 |             opt.hp_weight * hp_loss + \
 51 |             opt.hm_hp_weight * hm_hp_loss + opt.off_weight * hp_offset_loss
 52 | 
 53 |         loss_stats = {'loss': loss, 'hm_loss': hm_loss, 'hp_loss': hp_loss,
 54 |                       'hm_hp_loss': hm_hp_loss, 'hp_offset_loss': hp_offset_loss}
 55 |         return loss, loss_stats
 56 | 
 57 | 
 58 | class SinglePoseTrainer(BaseTrainer):
 59 |     def __init__(self, opt, model, optimizer=None):
 60 |         super(SinglePoseTrainer, self).__init__(
 61 |             opt, model, optimizer=optimizer)
 62 | 
 63 |     def _get_losses(self, opt):
 64 |         loss_states = ['loss', 'hm_loss', 'hp_loss', 'hm_hp_loss',
 65 |                        'hp_offset_loss']
 66 |         loss = SinglePoseLoss(opt)
 67 |         return loss_states, loss
 68 | 
 69 |     def debug(self, batch, output, iter_id):
 70 |         opt = self.opt
 71 |         reg = output['reg'] if opt.reg_offset else None
 72 |         hm_hp = output['hm_hp'] if opt.hm_hp else None
 73 |         hp_offset = output['hp_offset'] if opt.reg_hp_offset else None
 74 |         dets = multi_pose_decode(
 75 |             output['hm'], output['wh'], output['hps'],
 76 |             reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=opt.K)
 77 |         dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
 78 | 
 79 |         dets[:, :, :4] *= opt.input_res / opt.output_res
 80 |         dets[:, :, 5:39] *= opt.input_res / opt.output_res
 81 |         dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
 82 |         dets_gt[:, :, :4] *= opt.input_res / opt.output_res
 83 |         dets_gt[:, :, 5:39] *= opt.input_res / opt.output_res
 84 |         for i in range(1):
 85 |             debugger = Debugger(
 86 |                 dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme)
 87 |             img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
 88 |             img = np.clip(((
 89 |                 img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8)
 90 |             pred = debugger.gen_colormap(
 91 |                 output['hm'][i].detach().cpu().numpy())
 92 |             gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
 93 |             debugger.add_blend_img(img, pred, 'pred_hm')
 94 |             debugger.add_blend_img(img, gt, 'gt_hm')
 95 | 
 96 |             debugger.add_img(img, img_id='out_pred')
 97 |             for k in range(len(dets[i])):
 98 |                 if dets[i, k, 4] > opt.center_thresh:
 99 |                     debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
100 |                                            dets[i, k, 4], img_id='out_pred')
101 |                     debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred')
102 | 
103 |             debugger.add_img(img, img_id='out_gt')
104 |             for k in range(len(dets_gt[i])):
105 |                 if dets_gt[i, k, 4] > opt.center_thresh:
106 |                     debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
107 |                                            dets_gt[i, k, 4], img_id='out_gt')
108 |                     debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt')
109 | 
110 |             if opt.hm_hp:
111 |                 pred = debugger.gen_colormap_hp(
112 |                     output['hm_hp'][i].detach().cpu().numpy())
113 |                 gt = debugger.gen_colormap_hp(
114 |                     batch['hm_hp'][i].detach().cpu().numpy())
115 |                 debugger.add_blend_img(img, pred, 'pred_hmhp')
116 |                 debugger.add_blend_img(img, gt, 'gt_hmhp')
117 | 
118 |             if opt.debug == 4:
119 |                 debugger.save_all_imgs(
120 |                     opt.debug_dir, prefix='{}'.format(iter_id))
121 |             else:
122 |                 debugger.show_all_imgs(pause=True)
123 | 
124 |     def save_result(self, output, batch, results):
125 |         reg = output['reg'] if self.opt.reg_offset else None
126 |         hm_hp = output['hm_hp'] if self.opt.hm_hp else None
127 |         hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None
128 |         dets = multi_pose_decode(
129 |             output['hm'], output['wh'], output['hps'],
130 |             reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K)
131 |         dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
132 | 
133 |         dets_out = multi_pose_post_process(
134 |             dets.copy(), batch['meta']['c'].cpu().numpy(),
135 |             batch['meta']['s'].cpu().numpy(),
136 |             output['hm'].shape[2], output['hm'].shape[3])
137 |         results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
138 | 


--------------------------------------------------------------------------------
/src/lib/trains/train_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from .multi_pose import MultiPoseTrainer
 6 | from .single_pose import SinglePoseTrainer
 7 | 
 8 | train_factory = {
 9 |   'multi_pose': MultiPoseTrainer,
10 |   'single_pose': SinglePoseTrainer
11 | }
12 | 


--------------------------------------------------------------------------------
/src/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/src/lib/utils/__init__.py


--------------------------------------------------------------------------------
/src/lib/utils/image.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # Modified by Xingyi Zhou
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import numpy as np
 13 | import cv2
 14 | import random
 15 | 
 16 | def flip(img):
 17 |   return img[:, :, ::-1].copy()  
 18 | 
 19 | def transform_preds(coords, center, scale, output_size):
 20 |     target_coords = np.zeros(coords.shape)
 21 |     trans = get_affine_transform(center, scale, 0, output_size, inv=1)
 22 |     for p in range(coords.shape[0]):
 23 |         target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
 24 |     return target_coords
 25 | 
 26 | 
 27 | def get_affine_transform(center,
 28 |                          scale,
 29 |                          rot,
 30 |                          output_size,
 31 |                          shift=np.array([0, 0], dtype=np.float32),
 32 |                          inv=0):
 33 |     if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
 34 |         scale = np.array([scale, scale], dtype=np.float32)
 35 | 
 36 |     scale_tmp = scale
 37 |     src_w = scale_tmp[0]
 38 |     dst_w = output_size[0]
 39 |     dst_h = output_size[1]
 40 | 
 41 |     rot_rad = np.pi * rot / 180
 42 |     src_dir = get_dir([0, src_w * -0.5], rot_rad)
 43 |     dst_dir = np.array([0, dst_w * -0.5], np.float32)
 44 | 
 45 |     src = np.zeros((3, 2), dtype=np.float32)
 46 |     dst = np.zeros((3, 2), dtype=np.float32)
 47 |     src[0, :] = center + scale_tmp * shift
 48 |     src[1, :] = center + src_dir + scale_tmp * shift
 49 |     dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
 50 |     dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
 51 | 
 52 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
 53 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
 54 | 
 55 |     if inv:
 56 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
 57 |     else:
 58 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
 59 | 
 60 |     return trans
 61 | 
 62 | 
 63 | def affine_transform(pt, t):
 64 |     new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
 65 |     new_pt = np.dot(t, new_pt)
 66 |     return new_pt[:2]
 67 | 
 68 | 
 69 | def get_3rd_point(a, b):
 70 |     direct = a - b
 71 |     return b + np.array([-direct[1], direct[0]], dtype=np.float32)
 72 | 
 73 | 
 74 | def get_dir(src_point, rot_rad):
 75 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
 76 | 
 77 |     src_result = [0, 0]
 78 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
 79 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
 80 | 
 81 |     return src_result
 82 | 
 83 | 
 84 | def crop(img, center, scale, output_size, rot=0):
 85 |     trans = get_affine_transform(center, scale, rot, output_size)
 86 | 
 87 |     dst_img = cv2.warpAffine(img,
 88 |                              trans,
 89 |                              (int(output_size[0]), int(output_size[1])),
 90 |                              flags=cv2.INTER_LINEAR)
 91 | 
 92 |     return dst_img
 93 | 
 94 | 
 95 | def gaussian_radius(det_size, min_overlap=0.7):
 96 |   height, width = det_size
 97 | 
 98 |   a1  = 1
 99 |   b1  = (height + width)
100 |   c1  = width * height * (1 - min_overlap) / (1 + min_overlap)
101 |   sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
102 |   r1  = (b1 + sq1) / 2
103 | 
104 |   a2  = 4
105 |   b2  = 2 * (height + width)
106 |   c2  = (1 - min_overlap) * width * height
107 |   sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
108 |   r2  = (b2 + sq2) / 2
109 | 
110 |   a3  = 4 * min_overlap
111 |   b3  = -2 * min_overlap * (height + width)
112 |   c3  = (min_overlap - 1) * width * height
113 |   sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
114 |   r3  = (b3 + sq3) / 2
115 |   return min(r1, r2, r3)
116 | 
117 | 
118 | def gaussian2D(shape, sigma=1):
119 |     m, n = [(ss - 1.) / 2. for ss in shape]
120 |     y, x = np.ogrid[-m:m+1,-n:n+1]
121 | 
122 |     h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
123 |     h[h < np.finfo(h.dtype).eps * h.max()] = 0
124 |     return h
125 | 
126 | def draw_umich_gaussian(heatmap, center, radius, k=1):
127 |   diameter = 2 * radius + 1
128 |   gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
129 |   
130 |   x, y = int(center[0]), int(center[1])
131 | 
132 |   height, width = heatmap.shape[0:2]
133 |     
134 |   left, right = min(x, radius), min(width - x, radius + 1)
135 |   top, bottom = min(y, radius), min(height - y, radius + 1)
136 | 
137 |   masked_heatmap  = heatmap[y - top:y + bottom, x - left:x + right]
138 |   masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
139 |   if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
140 |     np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
141 |   return heatmap
142 | 
143 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False):
144 |   diameter = 2 * radius + 1
145 |   gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
146 |   value = np.array(value, dtype=np.float32).reshape(-1, 1, 1)
147 |   dim = value.shape[0]
148 |   reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value
149 |   if is_offset and dim == 2:
150 |     delta = np.arange(diameter*2+1) - radius
151 |     reg[0] = reg[0] - delta.reshape(1, -1)
152 |     reg[1] = reg[1] - delta.reshape(-1, 1)
153 |   
154 |   x, y = int(center[0]), int(center[1])
155 | 
156 |   height, width = heatmap.shape[0:2]
157 |     
158 |   left, right = min(x, radius), min(width - x, radius + 1)
159 |   top, bottom = min(y, radius), min(height - y, radius + 1)
160 | 
161 |   masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
162 |   masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right]
163 |   masked_gaussian = gaussian[radius - top:radius + bottom,
164 |                              radius - left:radius + right]
165 |   masked_reg = reg[:, radius - top:radius + bottom,
166 |                       radius - left:radius + right]
167 |   if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
168 |     idx = (masked_gaussian >= masked_heatmap).reshape(
169 |       1, masked_gaussian.shape[0], masked_gaussian.shape[1])
170 |     masked_regmap = (1-idx) * masked_regmap + idx * masked_reg
171 |   regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap
172 |   return regmap
173 | 
174 | 
175 | def draw_msra_gaussian(heatmap, center, sigma):
176 |   tmp_size = sigma * 3
177 |   mu_x = int(center[0] + 0.5)
178 |   mu_y = int(center[1] + 0.5)
179 |   w, h = heatmap.shape[0], heatmap.shape[1]
180 |   ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
181 |   br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
182 |   if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0:
183 |     return heatmap
184 |   size = 2 * tmp_size + 1
185 |   x = np.arange(0, size, 1, np.float32)
186 |   y = x[:, np.newaxis]
187 |   x0 = y0 = size // 2
188 |   g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
189 |   g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
190 |   g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
191 |   img_x = max(0, ul[0]), min(br[0], h)
192 |   img_y = max(0, ul[1]), min(br[1], w)
193 |   heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
194 |     heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]],
195 |     g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
196 |   return heatmap
197 | 
198 | def grayscale(image):
199 |     return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
200 | 
201 | def lighting_(data_rng, image, alphastd, eigval, eigvec):
202 |     alpha = data_rng.normal(scale=alphastd, size=(3, ))
203 |     image += np.dot(eigvec, eigval * alpha)
204 | 
205 | def blend_(alpha, image1, image2):
206 |     image1 *= alpha
207 |     image2 *= (1 - alpha)
208 |     image1 += image2
209 | 
210 | def saturation_(data_rng, image, gs, gs_mean, var):
211 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
212 |     blend_(alpha, image, gs[:, :, None])
213 | 
214 | def brightness_(data_rng, image, gs, gs_mean, var):
215 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
216 |     image *= alpha
217 | 
218 | def contrast_(data_rng, image, gs, gs_mean, var):
219 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
220 |     blend_(alpha, image, gs_mean)
221 | 
222 | def color_aug(data_rng, image, eig_val, eig_vec):
223 |     functions = [brightness_, contrast_, saturation_]
224 |     random.shuffle(functions)
225 | 
226 |     gs = grayscale(image)
227 |     gs_mean = gs.mean()
228 |     for f in functions:
229 |         f(data_rng, image, gs, gs_mean, 0.4)
230 |     lighting_(data_rng, image, 0.1, eig_val, eig_vec)
231 | 


--------------------------------------------------------------------------------
/src/lib/utils/oracle_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import numpy as np
 6 | import numba
 7 | 
 8 | @numba.jit(nopython=True, nogil=True)
 9 | def gen_oracle_map(feat, ind, w, h):
10 |   # feat: B x maxN x featDim
11 |   # ind: B x maxN
12 |   batch_size = feat.shape[0]
13 |   max_objs = feat.shape[1]
14 |   feat_dim = feat.shape[2]
15 |   out = np.zeros((batch_size, feat_dim, h, w), dtype=np.float32)
16 |   vis = np.zeros((batch_size, h, w), dtype=np.uint8)
17 |   ds = [(0, 1), (0, -1), (1, 0), (-1, 0)]
18 |   for i in range(batch_size):
19 |     queue_ind = np.zeros((h*w*2, 2), dtype=np.int32)
20 |     queue_feat = np.zeros((h*w*2, feat_dim), dtype=np.float32)
21 |     head, tail = 0, 0
22 |     for j in range(max_objs):
23 |       if ind[i][j] > 0:
24 |         x, y = ind[i][j] % w, ind[i][j] // w
25 |         out[i, :, y, x] = feat[i][j]
26 |         vis[i, y, x] = 1
27 |         queue_ind[tail] = x, y
28 |         queue_feat[tail] = feat[i][j]
29 |         tail += 1
30 |     while tail - head > 0:
31 |       x, y = queue_ind[head]
32 |       f = queue_feat[head]
33 |       head += 1
34 |       for (dx, dy) in ds:
35 |         xx, yy = x + dx, y + dy
36 |         if xx >= 0 and yy >= 0 and xx < w and yy < h and vis[i, yy, xx] < 1:
37 |           out[i, :, yy, xx] = f
38 |           vis[i, yy, xx] = 1
39 |           queue_ind[tail] = xx, yy
40 |           queue_feat[tail] = f
41 |           tail += 1
42 |   return out


--------------------------------------------------------------------------------
/src/lib/utils/post_process.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import numpy as np
 6 | from .image import transform_preds
 7 | 
 8 | 
 9 | def get_pred_depth(depth):
10 |   return depth
11 | 
12 | def get_alpha(rot):
13 |   # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 
14 |   #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
15 |   # return rot[:, 0]
16 |   idx = rot[:, 1] > rot[:, 5]
17 |   alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi)
18 |   alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi)
19 |   return alpha1 * idx + alpha2 * (1 - idx)
20 |   
21 | 
22 | 
23 | def ctdet_post_process(dets, c, s, h, w, num_classes):
24 |   # dets: batch x max_dets x dim
25 |   # return 1-based class det dict
26 |   ret = []
27 |   for i in range(dets.shape[0]):
28 |     top_preds = {}
29 |     dets[i, :, :2] = transform_preds(
30 |           dets[i, :, 0:2], c[i], s[i], (w, h))
31 |     dets[i, :, 2:4] = transform_preds(
32 |           dets[i, :, 2:4], c[i], s[i], (w, h))
33 |     classes = dets[i, :, -1]
34 |     for j in range(num_classes):
35 |       inds = (classes == j)
36 |       top_preds[j + 1] = np.concatenate([
37 |         dets[i, inds, :4].astype(np.float32),
38 |         dets[i, inds, 4:5].astype(np.float32)], axis=1).tolist()
39 |     ret.append(top_preds)
40 |   return ret
41 | 
42 | 
43 | def multi_pose_post_process(dets, c, s, h, w):
44 |   # dets: batch x max_dets x 40
45 |   # return list of 39 in image coord
46 |   ret = []
47 |   for i in range(dets.shape[0]):
48 |     bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h))
49 |     pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h))
50 |     top_preds = np.concatenate(
51 |       [bbox.reshape(-1, 4), dets[i, :, 4:5], 
52 |        pts.reshape(-1, 34)], axis=1).astype(np.float32).tolist()
53 |     ret.append({np.ones(1, dtype=np.int32)[0]: top_preds})
54 |   return ret
55 | 
56 | def single_pose_post_process(dets, h, w):
57 |   # post_process for Movenet especially
58 |   # restore is the original size is not square.
59 |   longEdge = max(h, w)
60 |   dets[:, 0] = dets[:, 0] * longEdge
61 |   dets[:, 1] = dets[:, 1] * longEdge
62 |   if h > w:
63 |     dets[:, 1] = dets[:, 1] - (h - w) // 2
64 |   elif w > h:
65 |     dets[:, 0] = dets[:, 0] - (w - h) // 2
66 |   return dets


--------------------------------------------------------------------------------
/src/lib/utils/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | 
 7 | class AverageMeter(object):
 8 |     """Computes and stores the average and current value"""
 9 |     def __init__(self):
10 |         self.reset()
11 | 
12 |     def reset(self):
13 |         self.val = 0
14 |         self.avg = 0
15 |         self.sum = 0
16 |         self.count = 0
17 | 
18 |     def update(self, val, n=1):
19 |         self.val = val
20 |         self.sum += val * n
21 |         self.count += n
22 |         if self.count > 0:
23 |           self.avg = self.sum / self.count


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import _init_paths
  6 | 
  7 | import os
  8 | 
  9 | import torch
 10 | import torch.utils.data
 11 | from opts import opts
 12 | from models.model import create_model, load_model, save_model
 13 | from models.data_parallel import DataParallel
 14 | from logger import Logger
 15 | from datasets.dataset_factory import get_dataset
 16 | from trains.train_factory import train_factory
 17 | 
 18 | 
 19 | def main(opt):
 20 |     torch.manual_seed(opt.seed)
 21 |     torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
 22 |     Dataset = get_dataset(opt.dataset, opt.task)
 23 |     opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
 24 |     print(opt)
 25 | 
 26 |     logger = Logger(opt)
 27 | 
 28 |     os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
 29 |     opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')
 30 | 
 31 |     print('Creating model...')
 32 |     model = create_model(opt.arch, opt.heads,
 33 |                          opt.head_conv, opt.froze_backbone)
 34 |     optimizer = torch.optim.Adam(model.parameters(), opt.lr)
 35 |     start_epoch = 0
 36 |     if opt.load_model != '':
 37 |         model, optimizer, start_epoch = load_model(
 38 |             model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step)
 39 | 
 40 | 
 41 |     Trainer = train_factory[opt.task]
 42 |     trainer = Trainer(opt, model, optimizer)
 43 |     trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)
 44 | 
 45 |     print('Setting up data...')
 46 |     val_loader = torch.utils.data.DataLoader(
 47 |         Dataset(opt, 'val'),
 48 |         batch_size=1,
 49 |         shuffle=False,
 50 |         num_workers=1,
 51 |         pin_memory=True
 52 |     )
 53 | 
 54 |     if opt.test:
 55 |         _, preds = trainer.val(0, val_loader)
 56 |         val_loader.dataset.run_eval(preds, opt.save_dir)
 57 |         return
 58 | 
 59 |     train_loader = torch.utils.data.DataLoader(
 60 |         Dataset(opt, 'train'),
 61 |         batch_size=opt.batch_size,
 62 |         shuffle=True,
 63 |         num_workers=opt.num_workers,
 64 |         pin_memory=True,
 65 |         drop_last=True
 66 |     )
 67 | 
 68 |     print('Starting training...')
 69 |     best = 1e10
 70 |     for epoch in range(start_epoch + 1, opt.num_epochs + 1):
 71 |         mark = epoch if opt.save_all else 'last'
 72 |         log_dict_train, _ = trainer.train(epoch, train_loader)
 73 |         logger.write('epoch: {} |'.format(epoch))
 74 |         for k, v in log_dict_train.items():
 75 |             logger.scalar_summary('train_{}'.format(k), v, epoch)
 76 |             logger.write('{} {:8f} | '.format(k, v))
 77 |         if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
 78 |             save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)),
 79 |                        epoch, model, optimizer)
 80 |             with torch.no_grad():
 81 |                 log_dict_val, preds = trainer.val(epoch, val_loader)
 82 |             for k, v in log_dict_val.items():
 83 |                 logger.scalar_summary('val_{}'.format(k), v, epoch)
 84 |                 logger.write('{} {:8f} | '.format(k, v))
 85 |             if log_dict_val[opt.metric] < best:
 86 |                 best = log_dict_val[opt.metric]
 87 |                 save_model(os.path.join(opt.save_dir, 'model_best.pth'),
 88 |                            epoch, model)
 89 |         else:
 90 |             save_model(os.path.join(opt.save_dir, 'model_last.pth'),
 91 |                        epoch, model, optimizer)
 92 |         logger.write('\n')
 93 |         if epoch in opt.lr_step:
 94 |             save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)),
 95 |                        epoch, model, optimizer)
 96 |             lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1))
 97 |             print('Drop LR to', lr)
 98 |             for param_group in optimizer.param_groups:
 99 |                 param_group['lr'] = lr
100 |     logger.close()
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     opt = opts().parse()
105 |     main(opt)
106 | 


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import _init_paths
 6 | 
 7 | import os
 8 | import json
 9 | import cv2
10 | import numpy as np
11 | import time
12 | from progress.bar import Bar
13 | import torch
14 | 
15 | # from external.nms import soft_nms
16 | from opts import opts
17 | from logger import Logger
18 | from utils.utils import AverageMeter
19 | from datasets.dataset_factory import dataset_factory
20 | from detectors.detector_factory import detector_factory
21 | 
22 | 
23 | def test(opt):
24 |     os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
25 | 
26 |     Dataset = dataset_factory[opt.dataset]
27 |     opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
28 |     print(opt)
29 |     Logger(opt)
30 |     Detector = detector_factory[opt.task]
31 | 
32 |     split = 'val' if not opt.trainval else 'test'
33 |     dataset = Dataset(opt, split)
34 |     detector = Detector(opt)
35 | 
36 |     results = {}
37 |     num_iters = len(dataset)
38 |     bar = Bar('{}'.format(opt.exp_id), max=num_iters)
39 |     time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
40 |     avg_time_stats = {t: AverageMeter() for t in time_stats}
41 |     for ind in range(num_iters):
42 |         img_id = dataset.images[ind]
43 |         img_info = dataset.coco.loadImgs(ids=[img_id])[0]
44 |         img_path = os.path.join(dataset.img_dir, img_info['file_name'])
45 | 
46 |         ret = detector.run(img_path)
47 | 
48 |         results[img_id] = ret['results']
49 | 
50 |         Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
51 |             ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
52 |         for t in avg_time_stats:
53 |             avg_time_stats[t].update(ret[t])
54 |             Bar.suffix = Bar.suffix + \
55 |                 '|{} {:.3f} '.format(t, avg_time_stats[t].avg)
56 |         bar.next()
57 |     bar.finish()
58 |     dataset.run_eval(results, opt.save_dir)
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     opt = opts().parse()
63 |     test(opt)
64 | 


--------------------------------------------------------------------------------
/src/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, '../lib')
12 | add_path(lib_path)
13 | 


--------------------------------------------------------------------------------
/src/tools/convert_active_to_coco.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Split `Active` dataset into training and test sets.
  3 | Move this file to `{$movenet}/data/active` and run it.
  4 | Author: Min LI
  5 | 
  6 | TODO: Check whether keypoint mapping from MPII to COCO is correct.
  7 | '''
  8 | from PIL import Image
  9 | import os
 10 | import os.path as osp
 11 | import numpy as np
 12 | import json
 13 | import shutil
 14 | import random
 15 | 
 16 | db_type = 'train' # train, test
 17 | train_percentage = 0.9
 18 | annot_path = "annotations/active.json"
 19 | train_save_path = "annotations/active_train.json"
 20 | val_save_path = "annotations/active_val.json"
 21 | 
 22 | if not osp.isdir('train'):
 23 |     os.makedirs('train')
 24 | if not osp.isdir('val'):
 25 |     os.makedirs('val')
 26 | 
 27 | 
 28 | print("Loading Acitve dataset...")
 29 | with open(annot_path) as json_file:
 30 |     active = json.load(json_file)
 31 | '''
 32 | MPII: 0 - r ankle, 1 - r knee, 2 - r hip, 3 - l hip, 4 - l knee, 5 - l ankle, 6 - pelvis, 7 - thorax, 8 - upper neck, 9 - head top, 10 - r wrist, 11 - r elbow, 12 - r shoulder, 13 - l shoulder, 14 - l elbow, 15 - l wrist
 33 | 
 34 | COCO_PERSON_KEYPOINT_NAMES = [
 35 |     'nose', 0
 36 |     'left_eye', 1
 37 |     'right_eye', 2
 38 |     'left_ear', 3
 39 |     'right_ear', 4
 40 |     'left_shoulder', 5
 41 |     'right_shoulder', 6
 42 |     'left_elbow', 7
 43 |     'right_elbow', 8
 44 |     'left_wrist', 9
 45 |     'right_wrist', 10
 46 |     'left_hip', 11
 47 |     'right_hip', 12
 48 |     'left_knee', 13
 49 |     'right_knee', 14
 50 |     'left_ankle', 15
 51 |     'right_ankle' 16
 52 | ]
 53 | '''
 54 | joint_mapping = {'0': 16, '1': 14, '2': 12, '3': 11, '4': 13, '5': 15, '6': -1, '7': -1, '8': -1, '9': 0, '10': 10, '11': 8, '12': 6, '13': 5, '14': 7, '15': 9}
 55 | joint_num = 17
 56 | img_num = len(active)
 57 | random_index = list(range(img_num))
 58 | random.shuffle(random_index)
 59 | train_index = random_index[:int(img_num * train_percentage) + 1]
 60 | val_index = random_index[int(img_num * train_percentage) + 1:]
 61 | 
 62 | print("image size: ", img_num)
 63 | print("train size: ", int(img_num * train_percentage))
 64 | print("val size: ", img_num -int(img_num * train_percentage))
 65 | 
 66 | aid = 0
 67 | coco_train = {'images': [], 'categories': [], 'annotations': []}
 68 | 
 69 | for img_id in train_index:
 70 |     
 71 |     filename = 'images/' + str(active[img_id]['image'])#filename
 72 |     filename_target = 'train/' + str(active[img_id]['image'])
 73 |     shutil.copy(filename, filename_target)
 74 |     img = Image.open(osp.join('.', filename))
 75 |     w,h = img.size
 76 |     img_dict = {'id': aid, 'file_name': str(active[img_id]['image']), 'width': w, 'height': h}
 77 |     coco_train['images'].append(img_dict)
 78 |     
 79 |     bbox = np.zeros((4)) # xmin, ymin, w, h
 80 |     kps = np.zeros((joint_num, 3)) # xcoord, ycoord, vis
 81 |     ori_kps = []
 82 | 
 83 |     #kps
 84 |     for jid in range(16):
 85 |         if (joint_mapping[str(jid)] == -1): continue
 86 |         kps[joint_mapping[str(jid)]][0] = active[img_id]["joints"][jid][0]
 87 |         kps[joint_mapping[str(jid)]][1] = active[img_id]["joints"][jid][1]
 88 |         kps[joint_mapping[str(jid)]][2] = active[img_id]["joint_vis"][jid] + 1
 89 |         ori_kps.append([active[img_id]["joints"][jid][0],active[img_id]["joints"][jid][1]])
 90 |     kps[1:5] = np.zeros((4, 3))
 91 |     ori_kps = np.asarray(ori_kps)
 92 | 
 93 |     #bbox extract from annotated kps
 94 |     
 95 |     xmin = np.min(ori_kps[:,0])
 96 |     ymin = np.min(ori_kps[:,1])
 97 |     xmax = np.max(ori_kps[:,0])
 98 |     ymax = np.max(ori_kps[:,1])
 99 |     width = xmax - xmin - 1
100 |     height = ymax - ymin - 1
101 |     
102 |     # corrupted bounding box
103 |     if width <= 0 or height <= 0:
104 |         continue
105 |     # 20% extend    
106 |     else:
107 |         bbox[0] = ((xmin + xmax)/2. - width/2*1.2) if(((xmin + xmax)/2. - width/2*1.2)>0) else 0 
108 |         bbox[1] = ((ymin + ymax)/2. - height/2*1.2) if(((ymin + ymax)/2. - height/2*1.2)>0) else 0
109 |         bbox[2] = width*1.2 if ((bbox[0]+width*1.2)<w) else (w-bbox[0])
110 |         bbox[3] = height*1.2 if ((bbox[1]+height*1.2)<w) else (h-bbox[1])
111 | 
112 |     person_dict = {'id': aid, 'image_id': aid, 'category_id': 1, 'area': bbox[2]*bbox[3],'bbox':bbox.tolist(), 'iscrowd': 0, 'keypoints': kps.reshape(-1).tolist(), 'num_keypoints':int(np.sum(kps[:,2]==2))}
113 |     coco_train['annotations'].append(person_dict)
114 |     aid += 1
115 | 
116 | category = {"supercategory": "person","id": 1,"name": "person","keypoints": ["nose","left_eye","right_eye","left_ear","right_ear","left_shoulder","right_shoulder","left_elbow","right_elbow","left_wrist","right_wrist","left_hip","right_hip","left_knee","right_knee","left_ankle","right_ankle"],"skeleton": [[16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13],[6,7],[6,8],[7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]]}
117 | 
118 | coco_train['categories'] = [category]
119 | 
120 | with open(train_save_path, 'w') as f:
121 |     json.dump(coco_train, f)
122 | 
123 | coco_val = {'images': [], 'categories': [], 'annotations': []}
124 | 
125 | for img_id in val_index:
126 |     
127 |     filename = 'images/' + str(active[img_id]['image'])#filename
128 |     filename_target = 'val/' + str(active[img_id]['image'])
129 |     shutil.copy(filename, filename_target)
130 |     img = Image.open(osp.join('.', filename))
131 |     w,h = img.size
132 |     img_dict = {'id': aid, 'file_name': str(active[img_id]['image']), 'width': w, 'height': h}
133 |     coco_val['images'].append(img_dict)
134 |     
135 |     bbox = np.zeros((4)) # xmin, ymin, w, h
136 |     kps = np.zeros((joint_num, 3)) # xcoord, ycoord, vis
137 |     ori_kps = []
138 |     #kps
139 |     for jid in range(16):
140 |         if (joint_mapping[str(jid)] == -1): continue
141 |         kps[joint_mapping[str(jid)]][0] = active[img_id]["joints"][jid][0]
142 |         kps[joint_mapping[str(jid)]][1] = active[img_id]["joints"][jid][1]
143 |         kps[joint_mapping[str(jid)]][2] = active[img_id]["joint_vis"][jid] + 1
144 |         ori_kps.append([active[img_id]["joints"][jid][0],active[img_id]["joints"][jid][1]])
145 |     kps[1:5] = np.zeros((4, 3))
146 | 
147 | 
148 |     #bbox extract from annotated kps
149 |     ori_kps = np.asarray(ori_kps)
150 |     xmin = np.min(ori_kps[:,0])
151 |     ymin = np.min(ori_kps[:,1])
152 |     xmax = np.max(ori_kps[:,0])
153 |     ymax = np.max(ori_kps[:,1])
154 |     width = xmax - xmin - 1
155 |     height = ymax - ymin - 1
156 |     
157 |     # corrupted bounding box
158 |     if width <= 0 or height <= 0:
159 |         continue
160 |     # 20% extend    
161 |     else:
162 |         bbox[0] = ((xmin + xmax)/2. - width/2*1.2) if(((xmin + xmax)/2. - width/2*1.2)>0) else 0 
163 |         bbox[1] = ((ymin + ymax)/2. - height/2*1.2) if(((ymin + ymax)/2. - height/2*1.2)>0) else 0
164 |         bbox[2] = width*1.2 if ((bbox[0]+width*1.2)<w) else (w-bbox[0])
165 |         bbox[3] = height*1.2 if ((bbox[1]+height*1.2)<w) else (h-bbox[1])
166 | 
167 |     person_dict = {'id': aid, 'image_id': aid, 'category_id': 1, 'area': bbox[2]*bbox[3],'bbox':bbox.tolist(), 'iscrowd': 0, 'keypoints': kps.reshape(-1).tolist(), 'num_keypoints':int(np.sum(kps[:,2]==2))}
168 |     coco_val['annotations'].append(person_dict)
169 |     aid += 1
170 | 
171 | category = {"supercategory": "person","id": 1,"name": "person","keypoints": ["nose","left_eye","right_eye","left_ear","right_ear","left_shoulder","right_shoulder","left_elbow","right_elbow","left_wrist","right_wrist","left_hip","right_hip","left_knee","right_knee","left_ankle","right_ankle"],"skeleton": [[16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13],[6,7],[6,8],[7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]]}
172 | 
173 | coco_val['categories'] = [category]
174 | 
175 | with open(val_save_path, 'w') as f:
176 |     json.dump(coco_val, f)


--------------------------------------------------------------------------------
/src/tools/convert_kitti_to_coco.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pickle
  6 | import json
  7 | import numpy as np
  8 | import cv2
  9 | DATA_PATH = '../../data/kitti/'
 10 | DEBUG = False
 11 | # VAL_PATH = DATA_PATH + 'training/label_val/'
 12 | import os
 13 | SPLITS = ['3dop', 'subcnn'] 
 14 | import _init_paths
 15 | from utils.ddd_utils import compute_box_3d, project_to_image, alpha2rot_y
 16 | from utils.ddd_utils import draw_box_3d, unproject_2d_to_3d
 17 | 
 18 | '''
 19 | #Values    Name      Description
 20 | ----------------------------------------------------------------------------
 21 |    1    type         Describes the type of object: 'Car', 'Van', 'Truck',
 22 |                      'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
 23 |                      'Misc' or 'DontCare'
 24 |    1    truncated    Float from 0 (non-truncated) to 1 (truncated), where
 25 |                      truncated refers to the object leaving image boundaries
 26 |    1    occluded     Integer (0,1,2,3) indicating occlusion state:
 27 |                      0 = fully visible, 1 = partly occluded
 28 |                      2 = largely occluded, 3 = unknown
 29 |    1    alpha        Observation angle of object, ranging [-pi..pi]
 30 |    4    bbox         2D bounding box of object in the image (0-based index):
 31 |                      contains left, top, right, bottom pixel coordinates
 32 |    3    dimensions   3D object dimensions: height, width, length (in meters)
 33 |    3    location     3D object location x,y,z in camera coordinates (in meters)
 34 |    1    rotation_y   Rotation ry around Y-axis in camera coordinates [-pi..pi]
 35 |    1    score        Only for results: Float, indicating confidence in
 36 |                      detection, needed for p/r curves, higher is better.
 37 | '''
 38 | 
 39 | def _bbox_to_coco_bbox(bbox):
 40 |   return [(bbox[0]), (bbox[1]),
 41 |           (bbox[2] - bbox[0]), (bbox[3] - bbox[1])]
 42 | 
 43 | def read_clib(calib_path):
 44 |   f = open(calib_path, 'r')
 45 |   for i, line in enumerate(f):
 46 |     if i == 2:
 47 |       calib = np.array(line[:-1].split(' ')[1:], dtype=np.float32)
 48 |       calib = calib.reshape(3, 4)
 49 |       return calib
 50 | 
 51 | cats = ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck',  'Person_sitting',
 52 |         'Tram', 'Misc', 'DontCare']
 53 | cat_ids = {cat: i + 1 for i, cat in enumerate(cats)}
 54 | # cat_info = [{"name": "pedestrian", "id": 1}, {"name": "vehicle", "id": 2}]
 55 | F = 721
 56 | H = 384 # 375
 57 | W = 1248 # 1242
 58 | EXT = [45.75, -0.34, 0.005]
 59 | CALIB = np.array([[F, 0, W / 2, EXT[0]], [0, F, H / 2, EXT[1]], 
 60 |                   [0, 0, 1, EXT[2]]], dtype=np.float32)
 61 | 
 62 | cat_info = []
 63 | for i, cat in enumerate(cats):
 64 |   cat_info.append({'name': cat, 'id': i + 1})
 65 | 
 66 | for SPLIT in SPLITS:
 67 |   image_set_path = DATA_PATH + 'ImageSets_{}/'.format(SPLIT)
 68 |   ann_dir = DATA_PATH + 'training/label_2/'
 69 |   calib_dir = DATA_PATH + '{}/calib/'
 70 |   splits = ['train', 'val']
 71 |   # splits = ['trainval', 'test']
 72 |   calib_type = {'train': 'training', 'val': 'training', 'trainval': 'training',
 73 |                 'test': 'testing'}
 74 | 
 75 |   for split in splits:
 76 |     ret = {'images': [], 'annotations': [], "categories": cat_info}
 77 |     image_set = open(image_set_path + '{}.txt'.format(split), 'r')
 78 |     image_to_id = {}
 79 |     for line in image_set:
 80 |       if line[-1] == '\n':
 81 |         line = line[:-1]
 82 |       image_id = int(line)
 83 |       calib_path = calib_dir.format(calib_type[split]) + '{}.txt'.format(line)
 84 |       calib = read_clib(calib_path)
 85 |       image_info = {'file_name': '{}.png'.format(line),
 86 |                     'id': int(image_id),
 87 |                     'calib': calib.tolist()}
 88 |       ret['images'].append(image_info)
 89 |       if split == 'test':
 90 |         continue
 91 |       ann_path = ann_dir + '{}.txt'.format(line)
 92 |       # if split == 'val':
 93 |       #   os.system('cp {} {}/'.format(ann_path, VAL_PATH))
 94 |       anns = open(ann_path, 'r')
 95 |       
 96 |       if DEBUG:
 97 |         image = cv2.imread(
 98 |           DATA_PATH + 'images/trainval/' + image_info['file_name'])
 99 | 
100 |       for ann_ind, txt in enumerate(anns):
101 |         tmp = txt[:-1].split(' ')
102 |         cat_id = cat_ids[tmp[0]]
103 |         truncated = int(float(tmp[1]))
104 |         occluded = int(tmp[2])
105 |         alpha = float(tmp[3])
106 |         bbox = [float(tmp[4]), float(tmp[5]), float(tmp[6]), float(tmp[7])]
107 |         dim = [float(tmp[8]), float(tmp[9]), float(tmp[10])]
108 |         location = [float(tmp[11]), float(tmp[12]), float(tmp[13])]
109 |         rotation_y = float(tmp[14])
110 | 
111 |         ann = {'image_id': image_id,
112 |                'id': int(len(ret['annotations']) + 1),
113 |                'category_id': cat_id,
114 |                'dim': dim,
115 |                'bbox': _bbox_to_coco_bbox(bbox),
116 |                'depth': location[2],
117 |                'alpha': alpha,
118 |                'truncated': truncated,
119 |                'occluded': occluded,
120 |                'location': location,
121 |                'rotation_y': rotation_y}
122 |         ret['annotations'].append(ann)
123 |         if DEBUG and tmp[0] != 'DontCare':
124 |           box_3d = compute_box_3d(dim, location, rotation_y)
125 |           box_2d = project_to_image(box_3d, calib)
126 |           # print('box_2d', box_2d)
127 |           image = draw_box_3d(image, box_2d)
128 |           x = (bbox[0] + bbox[2]) / 2
129 |           '''
130 |           print('rot_y, alpha2rot_y, dlt', tmp[0], 
131 |                 rotation_y, alpha2rot_y(alpha, x, calib[0, 2], calib[0, 0]),
132 |                 np.cos(
133 |                   rotation_y - alpha2rot_y(alpha, x, calib[0, 2], calib[0, 0])))
134 |           '''
135 |           depth = np.array([location[2]], dtype=np.float32)
136 |           pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
137 |                             dtype=np.float32)
138 |           pt_3d = unproject_2d_to_3d(pt_2d, depth, calib)
139 |           pt_3d[1] += dim[0] / 2
140 |           print('pt_3d', pt_3d)
141 |           print('location', location)
142 |       if DEBUG:
143 |         cv2.imshow('image', image)
144 |         cv2.waitKey()
145 | 
146 | 
147 |     print("# images: ", len(ret['images']))
148 |     print("# annotations: ", len(ret['annotations']))
149 |     # import pdb; pdb.set_trace()
150 |     out_path = '{}/annotations/kitti_{}_{}.json'.format(DATA_PATH, SPLIT, split)
151 |     json.dump(ret, open(out_path, 'w'))
152 |   
153 | 


--------------------------------------------------------------------------------
/src/tools/convert_mpii_to_coco.py:
--------------------------------------------------------------------------------
  1 | from scipy.io import loadmat, savemat
  2 | from PIL import Image
  3 | import os
  4 | import os.path as osp
  5 | import numpy as np
  6 | import json
  7 | 
  8 | # run this code in the 'mpii_human_pose_v1_u12_2' folder
  9 | 
 10 | def check_empty(list,name):
 11 |     
 12 |     try:
 13 |         list[name]
 14 |     except ValueError:
 15 |         return True
 16 | 
 17 |     if len(list[name]) > 0:
 18 |         return False
 19 |     else:
 20 |         return True
 21 | 
 22 | 
 23 | db_type = 'train' # train, test
 24 | annot_file = loadmat('mpii_human_pose_v1_u12_1')['RELEASE']
 25 | save_path = '../annotations/' + db_type + '.json'
 26 | 
 27 | joint_num = 16
 28 | img_num = len(annot_file['annolist'][0][0][0])
 29 | 
 30 | aid = 0
 31 | coco = {'images': [], 'categories': [], 'annotations': []}
 32 | for img_id in range(img_num):
 33 |     
 34 |     if ((db_type == 'train' and annot_file['img_train'][0][0][0][img_id] == 1) or (db_type == 'test' and annot_file['img_train'][0][0][0][img_id] == 0)) and \
 35 |         check_empty(annot_file['annolist'][0][0][0][img_id],'annorect') == False: #any person is annotated
 36 |     
 37 |         filename = 'images/' + str(annot_file['annolist'][0][0][0][img_id]['image'][0][0][0][0]) #filename
 38 |         img = Image.open(osp.join('..', filename))
 39 |         w,h = img.size
 40 |         img_dict = {'id': img_id, 'file_name': filename, 'width': w, 'height': h}
 41 |         coco['images'].append(img_dict)
 42 | 
 43 |         if db_type == 'test':
 44 |             continue
 45 |         
 46 |         person_num = len(annot_file['annolist'][0][0][0][img_id]['annorect'][0]) #person_num
 47 |         joint_annotated = np.zeros((person_num,joint_num))
 48 |         for pid in range(person_num):
 49 |             
 50 |             if check_empty(annot_file['annolist'][0][0][0][img_id]['annorect'][0][pid],'annopoints') == False: #kps is annotated
 51 |                 
 52 |                 bbox = np.zeros((4)) # xmin, ymin, w, h
 53 |                 kps = np.zeros((joint_num,3)) # xcoord, ycoord, vis
 54 | 
 55 |                 #kps
 56 |                 annot_joint_num = len(annot_file['annolist'][0][0][0][img_id]['annorect'][0][pid]['annopoints']['point'][0][0][0])
 57 |                 for jid in range(annot_joint_num):
 58 |                     annot_jid = annot_file['annolist'][0][0][0][img_id]['annorect'][0][pid]['annopoints']['point'][0][0][0][jid]['id'][0][0]
 59 |                     kps[annot_jid][0] = annot_file['annolist'][0][0][0][img_id]['annorect'][0][pid]['annopoints']['point'][0][0][0][jid]['x'][0][0]
 60 |                     kps[annot_jid][1] = annot_file['annolist'][0][0][0][img_id]['annorect'][0][pid]['annopoints']['point'][0][0][0][jid]['y'][0][0]
 61 |                     kps[annot_jid][2] = 1
 62 |                
 63 |                 #bbox extract from annotated kps
 64 |                 annot_kps = kps[kps[:,2]==1,:].reshape(-1,3)
 65 |                 xmin = np.min(annot_kps[:,0])
 66 |                 ymin = np.min(annot_kps[:,1])
 67 |                 xmax = np.max(annot_kps[:,0])
 68 |                 ymax = np.max(annot_kps[:,1])
 69 |                 width = xmax - xmin - 1
 70 |                 height = ymax - ymin - 1
 71 |                 
 72 |                 # corrupted bounding box
 73 |                 if width <= 0 or height <= 0:
 74 |                     continue
 75 |                 # 20% extend    
 76 |                 else:
 77 |                     bbox[0] = (xmin + xmax)/2. - width/2*1.2
 78 |                     bbox[1] = (ymin + ymax)/2. - height/2*1.2
 79 |                     bbox[2] = width*1.2
 80 |                     bbox[3] = height*1.2
 81 | 
 82 | 
 83 |                 person_dict = {'id': aid, 'image_id': img_id, 'category_id': 1, 'area': bbox[2]*bbox[3], 'bbox': bbox.tolist(), 'iscrowd': 0, 'keypoints': kps.reshape(-1).tolist(), 'num_keypoints': int(np.sum(kps[:,2]==1))}
 84 |                 coco['annotations'].append(person_dict)
 85 |                 aid += 1
 86 | 
 87 | category = {
 88 |     "supercategory": "person",
 89 |     "id": 1,  # to be same as COCO, not using 0
 90 |     "name": "person",
 91 |     "skeleton": [[0,1],
 92 |         [1,2], 
 93 |         [2,6], 
 94 |         [7,12], 
 95 |         [12,11], 
 96 |         [11,10], 
 97 |         [5,4], 
 98 |         [4,3], 
 99 |         [3,6], 
100 |         [7,13], 
101 |         [13,14], 
102 |         [14,15], 
103 |         [6,7], 
104 |         [7,8], 
105 |         [8,9]] ,
106 |     "keypoints": ["r_ankle", "r_knee","r_hip", 
107 |                     "l_hip", "l_knee", "l_ankle",
108 |                   "pelvis", "throax",
109 |                   "upper_neck", "head_top",
110 |                   "r_wrist", "r_elbow", "r_shoulder",
111 |                   "l_shoulder", "l_elbow", "l_wrist"]}
112 | 
113 | coco['categories'] = [category]
114 | 
115 | with open(save_path, 'w') as f:
116 |     json.dump(coco, f)


--------------------------------------------------------------------------------
/src/tools/draw.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | angles = [14,11,7,4,9,23,26,22,10,15,14,15,13,10,11,11,6,7,6,4,4,6,13,36,64,75,86,113,130,133,137,140,148,148,133,119,29,11,8,4,3,6,9,23,51,66,100,108,135,144,154,157,158,144,110,71,15,6,7,7,8,10,33,89,103,120,130,153,153,153,152,141,104,54,7,3,-3,-3,-3,-3,-3,-3,-3,-3,0,13,10,6,12,12,12,17,13,14,13,13,11,7,8,9,37,71,84,104,123,130,143,147,149,141,113,75,39,21,14,12,13,10,14,38,79,102,130,143,149,146,144,136,137,137,149,157,154,92,55,20,10,12,6,5,9,23,55,97,109,124,134,140,148,154,158,159,161,141,101,56,29,13,10,9,7,5,5,4,2,1,5,12,48,58,70,83,96,110,133]
 3 | x = []
 4 | index = 1
 5 | 
 6 | for i in range(len(angles)):
 7 |     x.append(index)
 8 |     index+=10
 9 | 
10 | plt.plot(x,angles,'s-',color = 'g',label="ATT-RLSTM")#s-:方形
11 | plt.xlabel("region length")#横坐标名字
12 | plt.ylabel("accuracy")#纵坐标名字
13 | plt.legend(loc = "best")#图例
14 | plt.show()
15 | 
16 | 


--------------------------------------------------------------------------------
/src/tools/eval_coco.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | from pycocotools.cocoeval import COCOeval
 7 | import sys
 8 | import cv2
 9 | import numpy as np
10 | import pickle
11 | import os
12 | 
13 | this_dir = os.path.dirname(__file__)
14 | ANN_PATH = this_dir + '../../data/coco/annotations/instances_val2017.json'
15 | print(ANN_PATH)
16 | if __name__ == '__main__':
17 |   pred_path = sys.argv[1]
18 |   coco = coco.COCO(ANN_PATH)
19 |   dets = coco.loadRes(pred_path)
20 |   img_ids = coco.getImgIds()
21 |   num_images = len(img_ids)
22 |   coco_eval = COCOeval(coco, dets, "bbox")
23 |   coco_eval.evaluate()
24 |   coco_eval.accumulate()
25 |   coco_eval.summarize()
26 | 
27 |   
28 | 


--------------------------------------------------------------------------------
/src/tools/eval_coco_hp.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | from pycocotools.cocoeval import COCOeval
 7 | import sys
 8 | import cv2
 9 | import numpy as np
10 | import pickle
11 | import os
12 | 
13 | this_dir = os.path.dirname(__file__)
14 | ANN_PATH = this_dir + '../../data/coco/annotations/person_keypoints_val2017.json'
15 | print(ANN_PATH)
16 | if __name__ == '__main__':
17 |   pred_path = sys.argv[1]
18 |   coco = coco.COCO(ANN_PATH)
19 |   dets = coco.loadRes(pred_path)
20 |   img_ids = coco.getImgIds()
21 |   num_images = len(img_ids)
22 |   coco_eval = COCOeval(coco, dets, "keypoints")
23 |   coco_eval.evaluate()
24 |   coco_eval.accumulate()
25 |   coco_eval.summarize()
26 |   coco_eval = COCOeval(coco, dets, "bbox")
27 |   coco_eval.evaluate()
28 |   coco_eval.accumulate()
29 |   coco_eval.summarize()
30 |   
31 | 


--------------------------------------------------------------------------------
/src/tools/filter_hp.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pathlib import Path
  3 | import sys
  4 | import shutil
  5 | 
  6 | '''
  7 | Filtering images with single-human (or double-human) poses in COCO dataset.
  8 | Usage:
  9 | Put the file under `data/` directory and run it.
 10 | By default, it will copy the images to `active_coco` directory.
 11 | 
 12 | '''
 13 | 
 14 | class CocoFilter():
 15 |     """ 
 16 |     Filters the COCO dataset
 17 |     """
 18 |     def filter_human_pose(self):
 19 |         image_infos = self.coco['images']
 20 |         annotation_infos = self.coco['annotations']
 21 | 
 22 |         annotation_infos_by_image_id = {}
 23 |         for annotation_info in annotation_infos:
 24 |             image_id = annotation_info['image_id']
 25 |             if image_id in annotation_infos_by_image_id:
 26 |                 annotation_infos_by_image_id[image_id].append(annotation_info)
 27 |             else:
 28 |                 annotation_infos_by_image_id[image_id] = [annotation_info]
 29 |             
 30 |         image_ids = list(annotation_infos_by_image_id.keys())
 31 | 
 32 |         image_id_to_image_info = {}
 33 |         for image_info in image_infos:
 34 |             image_id_to_image_info[image_info['id']] = image_info
 35 |         
 36 |         filtered_person_image_ids = list(filter(lambda image_id: len(annotation_infos_by_image_id[image_id]) <= self.counts, image_ids))
 37 |         # image_infos
 38 |         filtered_image_infos = list(map(lambda image_id: image_id_to_image_info[image_id], filtered_person_image_ids))
 39 |         self.new_images = filtered_image_infos
 40 |         print("Filtered image length: ", len(filtered_image_infos))
 41 |         print("E.g.,", self.new_images[0])
 42 |         # annotation_infos
 43 |         filterted_annotation_infos = list(filter(lambda annotation_info: annotation_info["image_id"] in filtered_person_image_ids, annotation_infos))
 44 |         self.new_annotations = filterted_annotation_infos
 45 |         print("Filtered annotation length: ", len(filterted_annotation_infos))
 46 |         print("E.g.,", self.new_annotations[0])
 47 |     
 48 |     def move_files(self):
 49 |         print("Start moving data...")
 50 |         for image_info in self.new_images:
 51 |             file_name = image_info["file_name"]
 52 |             file_path = self.in_data_path / Path(file_name)
 53 |             file_path_target = self.out_data_path / Path(file_name)
 54 |             shutil.copy(file_path, file_path_target)
 55 |         print("Completed moving data.")
 56 | 
 57 | 
 58 |     def main(self, args):
 59 |         # Open json
 60 |         self.input_json_path = Path('coco', 'annotations', args.input_json)
 61 |         self.output_json_path = Path('active_coco', 'annotations', args.output_json)
 62 |         # data dir
 63 |         self.in_data_path = Path('coco', args.split)
 64 |         self.out_data_path = Path('active_coco', args.split)
 65 |         # self.out_data_pathmkdir(parents=True, exist_ok=True)
 66 |         self.counts = args.counts
 67 | 
 68 |         # Verify input path exists
 69 |         if not self.input_json_path.exists():
 70 |             print('Input json path not found.')
 71 |             print('Quitting early.')
 72 |             quit()
 73 | 
 74 |         # Verify output path does not already exist
 75 |         if self.output_json_path.exists():
 76 |             should_continue = input('Output path already exists. Overwrite? (y/n) ').lower()
 77 |             if should_continue != 'y' and should_continue != 'yes':
 78 |                 print('Quitting early.')
 79 |                 quit()
 80 |         
 81 |         # Load the json
 82 |         print('Loading json file...')
 83 |         with open(self.input_json_path) as json_file:
 84 |             self.coco = json.load(json_file)
 85 |         
 86 |         # Filter to specific categories
 87 |         print('Filtering...')
 88 |         self.filter_human_pose()
 89 | 
 90 |         # Build new JSON
 91 |         new_master_json = {
 92 |             'info': self.coco['info'],
 93 |             'licenses': self.coco['licenses'],
 94 |             'images': self.new_images,
 95 |             'annotations': self.new_annotations,
 96 |             'categories': self.coco['categories']
 97 |         }
 98 | 
 99 |         # Write the JSON to a file
100 |         print('Saving new json file...')
101 |         with open(self.output_json_path, 'w+') as output_file:
102 |             json.dump(new_master_json, output_file)
103 | 
104 |         print('Filtered json saved.')
105 |         
106 |         self.move_files()
107 | 
108 | 
109 | if __name__ == "__main__":
110 |     import argparse
111 | 
112 |     parser = argparse.ArgumentParser(description="Filter COCO JSON: "
113 |     "Filters a COCO Keypoints JSON file to only include specified maximum human counts. "
114 |     "This includes images, and annotations. Does not modify 'info' or 'licenses'.")
115 |     
116 |     parser.add_argument("-i", "--input_json", dest="input_json", default="person_keypoints_train2017.json",
117 |         help="path to a json file in coco format")
118 |     parser.add_argument("-o", "--output_json", dest="output_json", default="person_keypoints_train2017.json",
119 |         help="path to save the output json")
120 |     parser.add_argument("-s", "--split", dest="split", default="train2017",
121 |         help="The split of data: train/val")
122 |     parser.add_argument("-c", "--counts", dest="counts", type=int, default=2,
123 |         help="Maximun human counts in a single image, e.g. -c 2 for training data, -c 1 for validation data")
124 | 
125 |     args = parser.parse_args()
126 | 
127 |     cf = CocoFilter()
128 |     cf.main(args)
129 | 


--------------------------------------------------------------------------------
/src/tools/get_kitti.sh:
--------------------------------------------------------------------------------
 1 | mkdir kitti
 2 | cd kitti
 3 | wget http://www.cvlibs.net/download.php?file=data_object_image_2.zip
 4 | wget http://www.cvlibs.net/download.php?file=data_object_label_2.zip
 5 | wget http://www.cvlibs.net/download.php?file=data_object_calib.zip
 6 | unzip data_object_image_2.zip
 7 | unzip data_object_label_2.zip
 8 | unzip data_object_calib.zip
 9 | 
10 | 


--------------------------------------------------------------------------------
/src/tools/get_pascal_voc.sh:
--------------------------------------------------------------------------------
 1 | mkdir voc
 2 | cd voc
 3 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
 4 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
 5 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar
 6 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
 7 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar
 8 | tar xvf VOCtrainval_06-Nov-2007.tar
 9 | tar xvf VOCtest_06-Nov-2007.tar
10 | tar xvf VOCdevkit_08-Jun-2007.tar
11 | tar xvf VOCtrainval_11-May-2012.tar
12 | tar xvf VOCdevkit_18-May-2011.tar
13 | rm VOCtrainval_06-Nov-2007.tar
14 | rm VOCtest_06-Nov-2007.tar
15 | rm VOCdevkit_08-Jun-2007.tar
16 | rm VOCtrainval_11-May-2012.tar
17 | rm VOCdevkit_18-May-2011.tar
18 | mkdir images
19 | cp VOCdevkit/VOC2007/JPEGImages/* images/
20 | cp VOCdevkit/VOC2012/JPEGImages/* images/
21 | wget https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip
22 | unzip PASCAL_VOC.zip
23 | rm PASCAL_VOC.zip
24 | mv PASCAL_VOC annotations/
25 | cd ..
26 | python merge_pascal_json.py
27 | 


--------------------------------------------------------------------------------
/src/tools/merge_active_coco_json.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import os
 3 | import os.path as osp
 4 | import numpy as np
 5 | import json
 6 | 
 7 | coco_path = "person_keypoints_train2017_filtered.json"
 8 | active_path = "active_coco.json"
 9 | 
10 | save_path = "person_keypoints_train2017_filtered_merged.json"
11 | 
12 | print("Loading Acitve and COCO dataset...")
13 | with open(coco_path) as json_file:
14 |     coco = json.load(json_file)
15 | with open(active_path) as json_file:
16 |     active = json.load(json_file)
17 | 
18 | images = []
19 | for coco_image in coco['images']:
20 |   images.append(coco_image)
21 | for active_image in active['images']:
22 |   images.append(active_image)
23 | annotations = []
24 | for coco_annotation in coco['annotations']:
25 |   annotations.append(coco_annotation)
26 | for active_annotation in active['annotations']:
27 |   annotations.append(active_annotation)
28 | new_master_json = {
29 |             'info': coco['info'],
30 |             'licenses': coco['licenses'],
31 |             'images': images,
32 |             'annotations': annotations,
33 |             'categories': coco['categories']
34 |         }
35 | 
36 | img_num = len(images)
37 | print("image size: ", img_num)
38 | annotations_num = len(annotations)
39 | print("annotation size: ", annotations_num)
40 | 
41 | with open(save_path, 'w') as f:
42 |     json.dump(new_master_json, f)


--------------------------------------------------------------------------------
/src/tools/reval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # Modified by Xingyi Zhou
 9 | # --------------------------------------------------------
10 | 
11 | # Reval = re-eval. Re-evaluate saved detections.
12 | from __future__ import absolute_import
13 | from __future__ import division
14 | from __future__ import print_function
15 | 
16 | import sys
17 | import os.path as osp
18 | sys.path.insert(0, osp.join(osp.dirname(__file__), 'voc_eval_lib'))
19 | 
20 | from model.test import apply_nms
21 | from datasets.pascal_voc import pascal_voc
22 | import pickle
23 | import os, argparse
24 | import numpy as np
25 | import json
26 | 
27 | def parse_args():
28 |   """
29 |   Parse input arguments
30 |   """
31 |   parser = argparse.ArgumentParser(description='Re-evaluate results')
32 |   parser.add_argument('detection_file', type=str)
33 |   parser.add_argument('--output_dir', help='results directory', type=str)
34 |   parser.add_argument('--imdb', dest='imdb_name',
35 |                       help='dataset to re-evaluate',
36 |                       default='voc_2007_test', type=str)
37 |   parser.add_argument('--matlab', dest='matlab_eval',
38 |                       help='use matlab for evaluation',
39 |                       action='store_true')
40 |   parser.add_argument('--comp', dest='comp_mode', help='competition mode',
41 |                       action='store_true')
42 |   parser.add_argument('--nms', dest='apply_nms', help='apply nms',
43 |                       action='store_true')
44 | 
45 |   if len(sys.argv) == 1:
46 |     parser.print_help()
47 |     sys.exit(1)
48 | 
49 |   args = parser.parse_args()
50 |   return args
51 | 
52 | 
53 | def from_dets(imdb_name, detection_file, args):
54 |   imdb = pascal_voc('test', '2007')
55 |   imdb.competition_mode(args.comp_mode)
56 |   imdb.config['matlab_eval'] = args.matlab_eval
57 |   with open(os.path.join(detection_file), 'rb') as f:
58 |     if 'json' in detection_file:
59 |       dets = json.load(f)
60 |     else:
61 |       dets = pickle.load(f, encoding='latin1')
62 |   # import pdb; pdb.set_trace()
63 |   if args.apply_nms:
64 |     print('Applying NMS to all detections')
65 |     test_nms = 0.3
66 |     nms_dets = apply_nms(dets, test_nms)
67 |   else:
68 |     nms_dets = dets
69 | 
70 |   print('Evaluating detections')
71 |   imdb.evaluate_detections(nms_dets)
72 | 
73 | 
74 | if __name__ == '__main__':
75 |   args = parse_args()
76 | 
77 |   imdb_name = args.imdb_name
78 |   from_dets(imdb_name, args.detection_file, args)
79 | 


--------------------------------------------------------------------------------
/src/tools/tflite_weight_viewer.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_hub as hub
 3 | from tensorflow_docs.vis import embed
 4 | import numpy as np
 5 | import cv2
 6 | 
 7 | # Import matplotlib libraries
 8 | from matplotlib import pyplot as plt
 9 | from matplotlib.collections import LineCollection
10 | import matplotlib.patches as patches
11 | 
12 | # Some modules to display an animation using imageio.
13 | import imageio
14 | 
15 | 
16 | interpreter = tf.lite.Interpreter(model_path="../../models/lite-model_movenet_singlepose_lightning_3.tflite")
17 | interpreter.allocate_tensors()
18 | 
19 | '''
20 | Check input/output details
21 | '''
22 | input_details = interpreter.get_input_details()
23 | output_details = interpreter.get_output_details()
24 | 
25 | print("== Input details ==")
26 | print("name:", input_details[0]['name'])
27 | print("shape:", input_details[0]['shape'])
28 | print("type:", input_details[0]['dtype'])
29 | print("\n== Output details ==")
30 | print("name:", output_details[0]['name'])
31 | print("shape:", output_details[0]['shape'])
32 | print("type:", output_details[0]['dtype'])
33 | 
34 | 
35 | '''
36 | This gives a list of dictionaries. 
37 | '''
38 | tensor_details = interpreter.get_tensor_details()
39 | 
40 | for dict in tensor_details:
41 |     i = dict['index']
42 |     tensor_name = dict['name']
43 |     shape = dict['shape']
44 |     # scales = dict['quantization_parameters']['scales']
45 |     # zero_points = dict['quantization_parameters']['zero_points']
46 |     # tensor = interpreter.tensor(i)()
47 | 
48 |     print(i, type, tensor_name, shape)# , scales.shape, zero_points.shape, tensor.shape)
49 | 


--------------------------------------------------------------------------------
/src/tools/vis_pred.py:
--------------------------------------------------------------------------------
  1 | import pycocotools.coco as coco
  2 | from pycocotools.cocoeval import COCOeval
  3 | import sys
  4 | import cv2
  5 | import numpy as np
  6 | import pickle
  7 | IMG_PATH = '../../data/coco/val2017/'
  8 | ANN_PATH = '../../data/coco/annotations/instances_val2017.json'
  9 | DEBUG = True
 10 | 
 11 | def _coco_box_to_bbox(box):
 12 |   bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
 13 |                   dtype=np.int32)
 14 |   return bbox
 15 | 
 16 | _cat_ids = [
 17 |   1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 
 18 |   14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 
 19 |   24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 
 20 |   37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 
 21 |   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 
 22 |   58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 
 23 |   72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 
 24 |   82, 84, 85, 86, 87, 88, 89, 90
 25 | ]
 26 | num_classes = 80
 27 | _classes = {
 28 |   ind + 1: cat_id for ind, cat_id in enumerate(_cat_ids)
 29 | }
 30 | _to_order = {cat_id: ind for ind, cat_id in enumerate(_cat_ids)}
 31 | coco = coco.COCO(ANN_PATH)
 32 | CAT_NAMES = [coco.loadCats([_classes[i + 1]])[0]['name'] \
 33 |               for i in range(num_classes)]
 34 | COLORS = [((np.random.random((3, )) * 0.6 + 0.4)*255).astype(np.uint8) \
 35 |               for _ in range(num_classes)]
 36 | 
 37 | 
 38 | def add_box(image, bbox, sc, cat_id):
 39 |   cat_id = _to_order[cat_id]
 40 |   cat_name = CAT_NAMES[cat_id]
 41 |   cat_size  = cv2.getTextSize(cat_name + '0', cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
 42 |   color = np.array(COLORS[cat_id]).astype(np.int32).tolist()
 43 |   txt = '{}{:.0f}'.format(cat_name, sc * 10)
 44 |   if bbox[1] - cat_size[1] - 2 < 0:
 45 |     cv2.rectangle(image,
 46 |                   (bbox[0], bbox[1] + 2),
 47 |                   (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),
 48 |                   color, -1)
 49 |     cv2.putText(image, txt, 
 50 |                 (bbox[0], bbox[1] + cat_size[1] + 2), 
 51 |                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1)
 52 |   else:
 53 |     cv2.rectangle(image,
 54 |                   (bbox[0], bbox[1] - cat_size[1] - 2),
 55 |                   (bbox[0] + cat_size[0], bbox[1] - 2),
 56 |                   color, -1)
 57 |     cv2.putText(image, txt, 
 58 |                 (bbox[0], bbox[1] - 2), 
 59 |                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1)
 60 |   cv2.rectangle(image,
 61 |                 (bbox[0], bbox[1]),
 62 |                 (bbox[2], bbox[3]),
 63 |                 color, 2)
 64 |   return image
 65 | 
 66 | if __name__ == '__main__':
 67 |   dets = []
 68 |   img_ids = coco.getImgIds()
 69 |   num_images = len(img_ids)
 70 |   for k in range(1, len(sys.argv)):
 71 |     pred_path = sys.argv[k]
 72 |     dets.append(coco.loadRes(pred_path))
 73 |   # import pdb; pdb.set_trace()
 74 |   for i, img_id in enumerate(img_ids):
 75 |     img_info = coco.loadImgs(ids=[img_id])[0]
 76 |     img_path = IMG_PATH + img_info['file_name']
 77 |     img = cv2.imread(img_path)
 78 |     gt_ids = coco.getAnnIds(imgIds=[img_id])
 79 |     gts = coco.loadAnns(gt_ids)
 80 |     gt_img = img.copy()
 81 |     for j, pred in enumerate(gts):
 82 |       bbox = _coco_box_to_bbox(pred['bbox'])
 83 |       cat_id = pred['category_id']
 84 |       gt_img = add_box(gt_img, bbox, 0, cat_id)
 85 |     for k in range(len(dets)):
 86 |       pred_ids = dets[k].getAnnIds(imgIds=[img_id])
 87 |       preds = dets[k].loadAnns(pred_ids)
 88 |       pred_img = img.copy()
 89 |       for j, pred in enumerate(preds):
 90 |         bbox = _coco_box_to_bbox(pred['bbox'])
 91 |         sc = pred['score']
 92 |         cat_id = pred['category_id']
 93 |         if sc > 0.2:
 94 |           pred_img = add_box(pred_img, bbox, sc, cat_id)
 95 |       cv2.imshow('pred{}'.format(k), pred_img)
 96 |       # cv2.imwrite('vis/{}_pred{}.png'.format(i, k), pred_img)
 97 |     cv2.imshow('gt', gt_img)
 98 |     # cv2.imwrite('vis/{}_gt.png'.format(i), gt_img)
 99 |     cv2.waitKey()
100 |   # coco_eval.evaluate()
101 |   # coco_eval.accumulate()
102 |   # coco_eval.summarize()
103 | 
104 |   
105 | 


--------------------------------------------------------------------------------