├── .gitignore ├── .travis.yml ├── LICENSE ├── NOTICE ├── README.md ├── data └── .gitignore ├── exp └── .gitignore ├── experiments ├── single_pose_movenet_active.sh └── single_pose_movenet_coco.sh ├── images └── NOTICE ├── models ├── .gitignore └── movenet.pth ├── readme ├── DATA.md ├── DEVELOP.md ├── GETTING_STARTED.md ├── INSTALL.md ├── MODEL_ZOO.md ├── det1.png ├── det2.png ├── fig2.png ├── pose1.png ├── pose2.png └── pose3.png ├── requirements.txt └── src ├── _init_paths.py ├── demo.py ├── lib ├── datasets │ ├── dataset │ │ ├── active.py │ │ └── coco_hp.py │ ├── dataset_factory.py │ └── sample │ │ ├── multi_pose.py │ │ └── single_pose.py ├── detectors │ ├── base_detector.py │ ├── detector_factory.py │ ├── multi_pose.py │ └── single_pose.py ├── external │ ├── .gitignore │ ├── Makefile │ ├── __init__.py │ ├── nms.pyx │ └── setup.py ├── logger.py ├── models │ ├── data_parallel.py │ ├── decode.py │ ├── losses.py │ ├── model.py │ ├── networks │ │ ├── backbone_utils.py │ │ ├── feature_pyramid_network.py │ │ ├── mobilenetv2.py │ │ └── movenet.py │ ├── scatter_gather.py │ └── utils.py ├── opts.py ├── trains │ ├── base_trainer.py │ ├── multi_pose.py │ ├── single_pose.py │ └── train_factory.py └── utils │ ├── __init__.py │ ├── debugger.py │ ├── image.py │ ├── oracle_utils.py │ ├── post_process.py │ └── utils.py ├── main.py ├── test.py └── tools ├── _init_paths.py ├── calc_coco_overlap.py ├── convert_active_to_coco.py ├── convert_kitti_to_coco.py ├── convert_mpii_to_coco.py ├── draw.py ├── eval_coco.py ├── eval_coco_hp.py ├── filter_hp.py ├── get_kitti.sh ├── get_pascal_voc.sh ├── merge_active_coco_json.py ├── reval.py ├── tflite_weight_viewer.py └── vis_pred.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.jpg 2 | *.txt 3 | pre/* 4 | legacy/* 5 | src/_site/* 6 | .idea/* 7 | .jekyll-cache 8 | .DS_Store 9 | debug/* 10 | *.DS_Store 11 | *.json 12 | images/* 13 | images_bak/* 14 | *.mat 15 | src/.vscode/* 16 | preds/* 17 | *.h5 18 | *.pth 19 | *.checkpoint 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | env/ 31 | build/ 32 | develop-eggs/ 33 | dist/ 34 | downloads/ 35 | eggs/ 36 | .eggs/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | .hypothesis/ 66 | 67 | # Translations 68 | *.mo 69 | *.pot 70 | 71 | # Django stuff: 72 | *.log 73 | local_settings.py 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # celery beat schedule file 95 | celerybeat-schedule 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # dotenv 101 | .env 102 | 103 | # virtualenv 104 | .venv 105 | venv/ 106 | ENV/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # mypy 119 | .mypy_cache/ 120 | 121 | # image file in images/ 122 | images/*.jpg 123 | images/active/*.jpg 124 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | group: travis_latest 2 | dist: xenial # ubuntu-16.04 3 | language: python 4 | cache: pip 5 | python: 6 | - 3.6 7 | - 3.7 8 | install: 9 | - pip install flake8 10 | - pip install -r requirements.txt 11 | before_script: 12 | # stop the build if there are Python syntax errors or undefined names 13 | - flake8 . --count --select=E9,F63,F72,F82 --show-source --statistics 14 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 15 | - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 16 | script: 17 | - true # add other tests here 18 | notifications: 19 | on_success: change 20 | on_failure: change # `always` will be the setting once code changes slow down 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Xingyi Zhou 4 | All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Portions of this software are derived from tf-faster-rcnn. 2 | 3 | ============================================================================== 4 | tf-faster-rcnn licence 5 | ============================================================================== 6 | 7 | MIT License 8 | 9 | Copyright (c) 2017 Xinlei Chen 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy 12 | of this software and associated documentation files (the "Software"), to deal 13 | in the Software without restriction, including without limitation the rights 14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 | copies of the Software, and to permit persons to whom the Software is 16 | furnished to do so, subject to the following conditions: 17 | 18 | The above copyright notice and this permission notice shall be included in all 19 | copies or substantial portions of the Software. 20 | 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 | SOFTWARE. 28 | 29 | 30 | Portions of this software are derived from human-pose-estimation.pytorch. 31 | 32 | ============================================================================== 33 | human-pose-estimation.pytorch licence 34 | ============================================================================== 35 | MIT License 36 | 37 | Copyright (c) Microsoft Corporation. All rights reserved. 38 | 39 | Permission is hereby granted, free of charge, to any person obtaining a copy 40 | of this software and associated documentation files (the "Software"), to deal 41 | in the Software without restriction, including without limitation the rights 42 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 43 | copies of the Software, and to permit persons to whom the Software is 44 | furnished to do so, subject to the following conditions: 45 | 46 | The above copyright notice and this permission notice shall be included in all 47 | copies or substantial portions of the Software. 48 | 49 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 50 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 51 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 52 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 53 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 54 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 55 | SOFTWARE 56 | 57 | Portions of this software are derived from CornerNet. 58 | 59 | ============================================================================== 60 | CornerNet licence 61 | ============================================================================== 62 | 63 | BSD 3-Clause License 64 | 65 | Copyright (c) 2018, University of Michigan 66 | All rights reserved. 67 | 68 | Redistribution and use in source and binary forms, with or without 69 | modification, are permitted provided that the following conditions are met: 70 | 71 | * Redistributions of source code must retain the above copyright notice, this 72 | list of conditions and the following disclaimer. 73 | 74 | * Redistributions in binary form must reproduce the above copyright notice, 75 | this list of conditions and the following disclaimer in the documentation 76 | and/or other materials provided with the distribution. 77 | 78 | * Neither the name of the copyright holder nor the names of its 79 | contributors may be used to endorse or promote products derived from 80 | this software without specific prior written permission. 81 | 82 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 83 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 84 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 85 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 86 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 87 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 88 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 89 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 90 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 91 | 92 | 93 | Portions of this software are derived from DCNv2. 94 | 95 | ============================================================================== 96 | DCNv2 licence 97 | ============================================================================== 98 | 99 | BSD 3-Clause License 100 | 101 | Copyright (c) 2019, Charles Shang 102 | All rights reserved. 103 | 104 | Redistribution and use in source and binary forms, with or without 105 | modification, are permitted provided that the following conditions are met: 106 | 107 | 1. Redistributions of source code must retain the above copyright notice, this 108 | list of conditions and the following disclaimer. 109 | 110 | 2. Redistributions in binary form must reproduce the above copyright notice, 111 | this list of conditions and the following disclaimer in the documentation 112 | and/or other materials provided with the distribution. 113 | 114 | 3. Neither the name of the copyright holder nor the names of its 115 | contributors may be used to endorse or promote products derived from 116 | this software without specific prior written permission. 117 | 118 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 119 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 120 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 121 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 122 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 123 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 124 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 125 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 126 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 127 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 128 | 129 | ============================================================================== 130 | DLA licence 131 | ============================================================================== 132 | 133 | BSD 3-Clause License 134 | 135 | Copyright (c) 2018, Fisher Yu 136 | All rights reserved. 137 | 138 | Redistribution and use in source and binary forms, with or without 139 | modification, are permitted provided that the following conditions are met: 140 | 141 | * Redistributions of source code must retain the above copyright notice, this 142 | list of conditions and the following disclaimer. 143 | 144 | * Redistributions in binary form must reproduce the above copyright notice, 145 | this list of conditions and the following disclaimer in the documentation 146 | and/or other materials provided with the distribution. 147 | 148 | * Neither the name of the copyright holder nor the names of its 149 | contributors may be used to endorse or promote products derived from 150 | this software without specific prior written permission. 151 | 152 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 153 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 154 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 155 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 156 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 157 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 158 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 159 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 160 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 161 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /exp/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /experiments/single_pose_movenet_active.sh: -------------------------------------------------------------------------------- 1 | # finetune scripts for movenet 2 | cd /Users/rachel/PycharmProjects/movenet/src 3 | # python main.py single_pose --exp_id yoga_movenet --dataset active --arch movenet --batch_size 24 --master_batch 4 --lr 5e-4 --gpus 0,1,2,3 --num_epochs 150 --lr_step 30 60 90 --num_workers 16 --load_model ../models/movenet.pth 4 | # test 7e -3 5e-3 1e-3 5e-4 1e-4 5 | # python test.py single_pose --exp_id yoga_movenet --dataset active --arch movenet --resume 6 | # # flip test 7 | # python test.py single_pose --exp_id yoga_movenet --dataset active --arch movenet --resume --flip_test 8 | python demo.py single_pose --dataset active --arch movenet --demo ../images/1111error/ --load_model ../models/movenet_thunder.pth --K 1 --gpus -1 --debug 2 #--vis_thresh 0.0 --not_reg_offset 9 | 10 | cd .. 11 | -------------------------------------------------------------------------------- /experiments/single_pose_movenet_coco.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | python main.py single_pose --exp_id coac_movenet --dataset active_coco --dataset active_coco --arch movenet --batch_size 24 --master_batch 4 --lr 2.5e-4 --gpus 0,1,2,3 --num_epochs 250 --lr_step 120,150,180,200,230 --num_workers 16 --resume #--load_model ../models/ctdet_movenet.pth 3 | # test 4 | python test.py single_pose --exp_id coac_movenet --dataset active_coco --arch movenet --keep_res --resume 5 | # flip test 6 | python test.py single_pose --exp_id coac_movenet --dataset active_coco --arch movenet --keep_res --resume --flip_test 7 | cd .. -------------------------------------------------------------------------------- /images/NOTICE: -------------------------------------------------------------------------------- 1 | The demo images are licensed as United States government work: 2 | https://www.usa.gov/government-works 3 | 4 | The image files were obtained on Jan 13, 2018 from the following 5 | URLs. 6 | 7 | 16004479832_a748d55f21_k.jpg 8 | https://www.flickr.com/photos/archivesnews/16004479832 9 | 10 | 18124840932_e42b3e377c_k.jpg 11 | https://www.flickr.com/photos/usnavy/18124840932 12 | 13 | 33887522274_eebd074106_k.jpg 14 | https://www.flickr.com/photos/usaid_pakistan/33887522274 15 | 16 | 15673749081_767a7fa63a_k.jpg 17 | https://www.flickr.com/photos/usnavy/15673749081 18 | 19 | 34501842524_3c858b3080_k.jpg 20 | https://www.flickr.com/photos/departmentofenergy/34501842524 21 | 22 | 24274813513_0cfd2ce6d0_k.jpg 23 | https://www.flickr.com/photos/dhsgov/24274813513 24 | 25 | 19064748793_bb942deea1_k.jpg 26 | https://www.flickr.com/photos/statephotos/19064748793 27 | 28 | 33823288584_1d21cf0a26_k.jpg 29 | https://www.flickr.com/photos/cbpphotos/33823288584 30 | 31 | 17790319373_bd19b24cfc_k.jpg 32 | https://www.flickr.com/photos/secdef/17790319373 33 | -------------------------------------------------------------------------------- /models/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /models/movenet.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/models/movenet.pth -------------------------------------------------------------------------------- /readme/DATA.md: -------------------------------------------------------------------------------- 1 | # Dataset preparation 2 | 3 | If you want to reproduce the results in the paper for benchmark evaluation and training, you will need to setup dataset. 4 | 5 | 6 | ### COCO 7 | - Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download). 8 | - Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download). 9 | - Place the data (or create symlinks) to make the data folder like: 10 | 11 | ~~~ 12 | ${CenterNet_ROOT} 13 | |-- data 14 | `-- |-- coco 15 | `-- |-- annotations 16 | | |-- instances_train2017.json 17 | | |-- instances_val2017.json 18 | | |-- person_keypoints_train2017.json 19 | | |-- person_keypoints_val2017.json 20 | | |-- image_info_test-dev2017.json 21 | |---|-- train2017 22 | |---|-- val2017 23 | `---|-- test2017 24 | ~~~ 25 | 26 | - [Optional] If you want to train ExtremeNet, generate extreme point annotation from segmentation: 27 | 28 | ~~~ 29 | cd $CenterNet_ROOT/tools/ 30 | python gen_coco_extreme_points.py 31 | ~~~ 32 | It generates `instances_extreme_train2017.json` and `instances_extreme_val2017.json` in `data/coco/annotations/`. 33 | 34 | ### Pascal VOC 35 | 36 | - Run 37 | 38 | ~~~ 39 | cd $CenterNet_ROOT/tools/ 40 | bash get_pascal_voc.sh 41 | ~~~ 42 | - The above script includes: 43 | - Download, unzip, and move Pascal VOC images from the [VOC website](http://host.robots.ox.ac.uk/pascal/VOC/). 44 | - [Download](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip) Pascal VOC annotation in COCO format (from [Detectron](https://github.com/facebookresearch/Detectron/tree/master/detectron/datasets/data)). 45 | - Combine train/val 2007/2012 annotation files into a single json. 46 | 47 | 48 | - Move the created `voc` folder to `data` (or create symlinks) to make the data folder like: 49 | 50 | ~~~ 51 | ${CenterNet_ROOT} 52 | |-- data 53 | `-- |-- voc 54 | `-- |-- annotations 55 | | |-- pascal_trainval0712.json 56 | | |-- pascal_test2017.json 57 | |-- images 58 | | |-- 000001.jpg 59 | | ...... 60 | `-- VOCdevkit 61 | 62 | ~~~ 63 | The `VOCdevkit` folder is needed to run the evaluation script from [faster rcnn](https://github.com/rbgirshick/py-faster-rcnn/blob/master/tools/reval.py). 64 | 65 | ### KITTI 66 | 67 | - Download [images](http://www.cvlibs.net/download.php?file=data_object_image_2.zip), [annotations](http://www.cvlibs.net/download.php?file=data_object_label_2.zip), and [calibrations](http://www.cvlibs.net/download.php?file=data_object_calib.zip) from [KITTI website](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d) and unzip. 68 | 69 | - Download the train-val split of [3DOP](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz) and [SubCNN](https://github.com/tanshen/SubCNN/tree/master/fast-rcnn/data/KITTI) and place the data as below 70 | 71 | ~~~ 72 | ${CenterNet_ROOT} 73 | |-- data 74 | `-- |-- kitti 75 | `-- |-- training 76 | | |-- image_2 77 | | |-- label_2 78 | | |-- calib 79 | |-- ImageSets_3dop 80 | | |-- test.txt 81 | | |-- train.txt 82 | | |-- val.txt 83 | | |-- trainval.txt 84 | `-- ImageSets_subcnn 85 | |-- test.txt 86 | |-- train.txt 87 | |-- val.txt 88 | |-- trainval.txt 89 | ~~~ 90 | 91 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. You can set `DEBUG=True` in `line 5` to visualize the annotation. 92 | 93 | - Link image folder 94 | 95 | ~~~ 96 | cd ${CenterNet_ROOT}/data/kitti/ 97 | mkdir images 98 | ln -s training/image_2 images/trainval 99 | ~~~ 100 | 101 | - The data structure should look like: 102 | 103 | ~~~ 104 | ${CenterNet_ROOT} 105 | |-- data 106 | `-- |-- kitti 107 | `-- |-- annotations 108 | | |-- kitti_3dop_train.json 109 | | |-- kitti_3dop_val.json 110 | | |-- kitti_subcnn_train.json 111 | | |-- kitti_subcnn_val.json 112 | `-- images 113 | |-- trainval 114 | |-- test 115 | ~~~ 116 | -------------------------------------------------------------------------------- /readme/DEVELOP.md: -------------------------------------------------------------------------------- 1 | # Develop 2 | 3 | This document provides tutorials to develop CenterNet. `lib/src/opts` lists a few more options that the current version supports. 4 | 5 | ## New dataset 6 | Basically there are three steps: 7 | 8 | - Convert the dataset annotation to [COCO format](http://cocodataset.org/#format-data). Please refer to [src/tools/convert_kitti_to_coco.py](../src/tools/convert_kitti_to_coco.py) for an example to convert kitti format to coco format. 9 | - Create a dataset intilization file in `src/lib/datasets/dataset`. In most cases you can just copy `src/lib/datasets/dataset/coco.py` to your dataset name and change the category information, and annotation path. 10 | - Import your dataset at `src/lib/datasets/dataset_factory`. 11 | 12 | ## New task 13 | 14 | You will need to add files to `src/lib/datasets/sample/`, `src/lib/datasets/trains/`, and `src/lib/datasets/detectors/`, which specify the data generation during training, the training targets, and the testing, respectively. 15 | 16 | ## New architecture 17 | 18 | - Add your model file to `src/lib/models/networks/`. The model should accept a dict `heads` of `{name: channels}`, which specify the name of each network output and its number of channels. Make sure your model returns a list (for multiple stages. Single stage model should return a list containing a single element.). The element of the list is a dict contraining the same keys with `heads`. 19 | - Add your model in `model_factory` of `src/lib/models/model.py`. 20 | -------------------------------------------------------------------------------- /readme/GETTING_STARTED.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | This document provides tutorials to train and evaluate CenterNet. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md). 4 | 5 | ## Benchmark evaluation 6 | 7 | First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`. 8 | 9 | ### COCO 10 | 11 | To evaluate COCO object detection with DLA 12 | run 13 | 14 | ~~~ 15 | python test.py ctdet --exp_id coco_dla --keep_res --load_model ../models/ctdet_coco_dla_2x.pth 16 | ~~~ 17 | 18 | This will give an AP of `37.4` if setup correctly. `--keep_res` is for keep the original image resolution. Without `--keep_res` it will resize the images to `512 x 512`. You can add `--flip_test` and `--flip_test --test_scales 0.5,0.75,1,1.25,1.5` to the above commend, for flip test and multi_scale test, respectively. The expected APs are `39.2` and `41.7`, respectively. 19 | 20 | To test with hourglass net, run 21 | 22 | ~~~ 23 | python test.py ctdet --exp_id coco_hg --arch hourglass --fix_res --load_model ../models/ctdet_coco_hg.pth 24 | ~~~ 25 | 26 | Similarly, to evaluate human pose estimation, run the following command for dla 27 | 28 | ~~~ 29 | python test.py multi_pose --exp_id dla --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test 30 | ~~~ 31 | 32 | and the following for hourglass 33 | 34 | ~~~ 35 | python test.py multi_pose --exp_id hg --arch hourglass --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test 36 | ~~~ 37 | 38 | The expected results can be found in the model zoo. 39 | 40 | ### Pascal 41 | 42 | To evaluate object detection on Pascal VOC (test2007), run 43 | 44 | ~~~ 45 | python test.py ctdet --exp_id dla --dataset pascal --load_model ../models/ctdet_pascal_dla.pth --flip_test 46 | ~~~ 47 | 48 | Note that we fix the resolution during testing. 49 | And you can change to other network architectures and resolutions by specifying `--arch` and `--input_res 512`. 50 | 51 | ### KITTI 52 | 53 | To evaluate the kitti dataset, first compile the evaluation tool (from [here](https://github.com/prclibo/kitti_eval)): 54 | 55 | ~~~ 56 | cd CenterNet_ROOT/src/tools/kitti_eval 57 | g++ -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp -O3 58 | ~~~ 59 | 60 | Then run the evaluation with pretrained model: 61 | 62 | ~~~ 63 | python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --load_model ../models/ddd_3dop.pth 64 | ~~~ 65 | 66 | to evaluate the 3DOP split. For the subcnn split, change `--kitti_split` to `subcnn` and load the corresponding models. 67 | Note that test time augmentation is not trivially applicable for 3D orientation. 68 | 69 | ## Training 70 | 71 | We have packed all the training scripts in the [experiments](../experiments) folder. 72 | The experiment names are correspond to the model name in the [model zoo](MODEL_ZOO.md). 73 | The number of GPUs for each experiments can be found in the scripts and the model zoo. 74 | In the case that you don't have 8 GPUs, you can follow the [linear learning rate rule](https://arxiv.org/abs/1706.02677) to scale the learning rate as batch size. 75 | For example, to train COCO object detection with dla on 2 GPUs, run 76 | 77 | ~~~ 78 | python main.py ctdet --exp_id coco_dla --batch_size 32 --master_batch 15 --lr 1.25e-4 --gpus 0,1 79 | ~~~ 80 | 81 | The default learning rate is `1.25e-4` for batch size `32` (on 2 GPUs). 82 | By default, pytorch evenly splits the total batch size to each GPUs. 83 | `--master_batch` allows using different batchsize for the master GPU, which usually costs more memory than other GPUs. 84 | If it encounters GPU memory out, using slightly less batch size (e.g., `112` of `128`) with the same learning is fine. 85 | 86 | If the training is terminated before finishing, you can use the same commond with `--resume` to resume training. It will found the lastest model with the same `exp_id`. 87 | 88 | Our HourglassNet model is finetuned from the pretrained [ExtremeNet model](https://drive.google.com/file/d/1omiOUjWCrFbTJREypuZaODu0bOlF_7Fg/view?usp=sharing) (from the [ExtremeNet repo](https://github.com/xingyizhou/ExtremeNet)). 89 | You will need to download the model, run `python convert_hourglass_weight.py` to convert the model format, and load the model for training (see the [script](../experiments/ctdet_coco_hg.sh)). 90 | -------------------------------------------------------------------------------- /readme/INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | 4 | The code was tested on Ubuntu 16.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v0.4.1. NVIDIA GPUs are needed for both training and testing. 5 | After install Anaconda: 6 | 7 | 0. [Optional but recommended] create a new conda environment. 8 | 9 | ~~~ 10 | conda create --name CenterNet python=3.6 11 | ~~~ 12 | And activate the environment. 13 | 14 | ~~~ 15 | conda activate CenterNet 16 | ~~~ 17 | 18 | 1. Install pytorch0.4.1: 19 | 20 | ~~~ 21 | conda install pytorch=0.4.1 torchvision -c pytorch 22 | ~~~ 23 | 24 | And disable cudnn batch normalization(Due to [this issue](https://github.com/xingyizhou/pytorch-pose-hg-3d/issues/16)). 25 | 26 | ~~~ 27 | # PYTORCH=/path/to/pytorch # usually ~/anaconda3/envs/CenterNet/lib/python3.6/site-packages/ 28 | # for pytorch v0.4.0 29 | sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py 30 | # for pytorch v0.4.1 31 | sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py 32 | ~~~ 33 | 34 | For other pytorch version, you can manually open `torch/nn/functional.py` and find the line with `torch.batch_norm` and replace the `torch.backends.cudnn.enabled` with `False`. We observed slight worse training results without doing so. 35 | 36 | 2. Install [COCOAPI](https://github.com/cocodataset/cocoapi): 37 | 38 | ~~~ 39 | # COCOAPI=/path/to/clone/cocoapi 40 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI 41 | cd $COCOAPI/PythonAPI 42 | make 43 | python setup.py install --user 44 | ~~~ 45 | 46 | 3. Clone this repo: 47 | 48 | ~~~ 49 | CenterNet_ROOT=/path/to/clone/CenterNet 50 | git clone https://github.com/xingyizhou/CenterNet $CenterNet_ROOT 51 | ~~~ 52 | 53 | 54 | 4. Install the requirements 55 | 56 | ~~~ 57 | pip install -r requirements.txt 58 | ~~~ 59 | 60 | 61 | 5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_0.4)). 62 | 63 | ~~~ 64 | cd $CenterNet_ROOT/src/lib/models/networks/DCNv2 65 | ./make.sh 66 | ~~~ 67 | 6. [Optional, only required if you are using extremenet or multi-scale testing] Compile NMS if your want to use multi-scale testing or test ExtremeNet. 68 | 69 | ~~~ 70 | cd $CenterNet_ROOT/src/lib/external 71 | make 72 | ~~~ 73 | 74 | 7. Download pertained models for [detection]() or [pose estimation]() and move them to `$CenterNet_ROOT/models/`. More models can be found in [Model zoo](MODEL_ZOO.md). 75 | -------------------------------------------------------------------------------- /readme/MODEL_ZOO.md: -------------------------------------------------------------------------------- 1 | # MODEL ZOO 2 | 3 | ### Common settings and notes 4 | 5 | - The experiments are run with pytorch 0.4.1, CUDA 9.0, and CUDNN 7.1. 6 | - Training times are measured on our servers with 8 TITAN V GPUs (12 GB Memeory). 7 | - Testing times are measured on our local machine with TITAN Xp GPU. 8 | - The models can be downloaded directly from [Google drive](https://drive.google.com/open?id=1px-Xg7jXSC79QqgsD1AAGJQkuf5m0zh_). 9 | 10 | ## Object Detection 11 | 12 | 13 | ### COCO 14 | 15 | | Model | GPUs |Train time(h)| Test time (ms) | AP | Download | 16 | |--------------------------|------|-------------|----------------|--------------------|-----------| 17 | |[ctdet\_coco\_hg](../experiments/ctdet_coco_hg.sh) | 5 |109 | 71 / 129 / 674 | 40.3 / 42.2 / 45.1 | [model](https://drive.google.com/open?id=1cNyDmyorOduMRsgXoUnuyUiF6tZNFxaG) | 18 | |[ctdet\_coco\_dla\_1x](../experiments/ctdet_coco_dla_1x.sh) | 8 | 57 | 19 / 36 / 248 | 36.3 / 38.2 / 40.7 | [model](https://drive.google.com/open?id=1r89_KNXyDyvUp8NggduG9uKQTMU2DsK_) | 19 | |[ctdet\_coco\_dla\_2x](../experiments/ctdet_coco_dla_2x.sh) | 8 | 92 | 19 / 36 / 248 | 37.4 / 39.2 / 41.7 | [model](https://drive.google.com/open?id=1pl_-ael8wERdUREEnaIfqOV_VF2bEVRT) | 20 | |[ctdet\_coco\_resdcn101](../experiments/ctdet_coco_resdcn101.sh)| 8 | 65 | 22 / 40 / 259 | 34.6 / 36.2 / 39.3 | [model](https://drive.google.com/open?id=1bTJCbAc1szA9lWU-fvVw52lqR3U2TTry) | 21 | |[ctdet\_coco\_resdcn18](../experiments/ctdet_coco_resdcn18.sh) | 4 | 28 | 7 / 14 / 81 | 28.1 / 30.0 / 33.2 | [model](https://drive.google.com/open?id=1b-_sjq1Pe_dVxt5SeFmoadMfiPTPZqpz) | 22 | |[exdet\_coco\_hg](../experiments/exdet_coco_hg.sh) | 5 |215 | 134 / 246/1340 | 35.8 / 39.8 / 42.4 | [model](https://drive.google.com/open?id=1-5bT5ZF8bXriJ-wAvOjJFrBLvZV2-mlV) | 23 | |[exdet\_coco\_dla](../experiments/exdet_coco_dla.sh) | 8 |133 | 51 / 90 / 481 | 33.0 / 36.5 / 38.5 | [model](https://drive.google.com/open?id=1PFcEqN0KjFuq9XaqzB7TkVD3pvXZx04e) | 24 | 25 | #### Notes 26 | 27 | - All models are trained on COCO train 2017 and evaluated on val 2017. 28 | - We show test time and AP with no augmentation / flip augmentation / multi scale (0.5, 0.75, 1, 1.25, 1.5) augmentation. 29 | - Results on COCO test-dev can be found in the paper or add `--trainval` for `test.py`. 30 | - exdet is our re-implementation of [ExtremeNet](https://github.com/xingyizhou/ExtremeNet). The testing does not include edge aggregation. 31 | - For dla and resnets, `1x` means the training schedule that train 140 epochs with learning rate dropped 10 times at the 90 and 120 epoch (following [SimpleBaseline](https://github.com/Microsoft/human-pose-estimation.pytorch)). `2x` means train 230 epochs with learning rate dropped 10 times at the 180 and 210 epoch. The training schedules are **not** carefully investigated. 32 | - The hourglass trained schedule follows [ExtremeNet](https://github.com/xingyizhou/ExtremeNet): trains 50 epochs (approximately 250000 iterations in batch size 24) and drops learning rate at the 40 epoch. 33 | - Testing time include network forwarding time, decoding time, and nms time (for ExtremeNet). 34 | - We observed up to 0.4 AP performance jitter due to randomness in training. 35 | 36 | ### Pascal VOC 37 | 38 | | Model |GPUs| Train time (h)| Test time (ms) | mAP | Download | 39 | |---------------------------------|----|---------------|----------------|------|-----------| 40 | |[ctdet\_pascal\_dla\_384](../experiments/ctdet_pascal_dla_384.sh) | 1 |15 | 20 | 79.3 | [model](https://drive.google.com/open?id=1IC3FZkxAQHm2rxoIGmS4YluYpZxwYkJf) | 41 | |[ctdet\_pascal\_dla\_512](../experiments/ctdet_pascal_dla_512.sh) | 2 |15 | 30 | 80.7 | [model](https://drive.google.com/open?id=1jIfK9EyqzNcupxGsp3YRnEiewrIG4_Ma) | 42 | |[ctdet\_pascal\_resdcn18\_384](../experiments/ctdet_pascal_resdcn18_384.sh) | 1 |3 | 7 | 72.6 | [model](https://drive.google.com/open?id=1Kq27D1uoPZK42j2alDWmCGyqRU2ob1BX) | 43 | |[ctdet\_pascal\_resdcn18\_512](../experiments/ctdet_pascal_resdcn18_512.sh) | 1 |5 | 10 | 75.7 | [model](https://drive.google.com/open?id=1MRUJTTJ4-ZDN0Y-zQOqQBqjrQMcXFzet) | 44 | |[ctdet\_pascal\_resdcn101\_384](../experiments/ctdet_pascal_resdcn101_384.sh)| 2 |7 | 22 | 77.1 | [model](https://drive.google.com/open?id=11YXE04zILuXA5-kaYQEEg0ljNKBe6GPO) | 45 | |[ctdet\_pascal\_resdcn101\_512](../experiments/ctdet_pascal_resdcn101_512.sh)| 4 |7 | 33 | 78.7 | [model](https://drive.google.com/open?id=1xhEf-a_y2Di6YdyPpCIj0-kVFjQvDf9N) | 46 | 47 | #### Notes 48 | - All models are trained on trainval 07+12 and tested on test 2007. 49 | - Flip test is used by default. 50 | - Training schedule: train for 70 epochs with learning rate dropped 10 times at the 45 and 60 epoch. 51 | - We observed up to 1 mAP performance jitter due to randomness in training. 52 | 53 | ## Human pose estimation 54 | 55 | ### COCO 56 | 57 | | Model | GPUs |Train time(h)| Test time (ms) | AP | Download | 58 | |--------------------------|------|-------------|----------------|-------------|-----------| 59 | |[multi\_pose\_hg_1x](../experiments/multi_pose_hg_1x.sh) | 5 |62 | 151 | 58.7 | [model](https://drive.google.com/open?id=1HBB5KRaSj-m-vtpGESm7_3evNP5Y84RS) | 60 | |[multi\_pose\_hg_3x](../experiments/multi_pose_hg_3x.sh) | 5 |188 | 151 | 64.0 | [model](https://drive.google.com/open?id=1n6EvwhTbz7LglVXXlL9irJia7YuakHdB) | 61 | |[multi\_pose\_dla_1x](../experiments/multi_pose_dla_1x.sh) | 8 |30 | 44 | 54.7 | [model](https://drive.google.com/open?id=1VeiRtuXfCbmhQNGV-XWL6elUzpuWN-4K) | 62 | |[multi\_pose\_dla_3x](../experiments/multi_pose_dla_3x.sh) | 8 |70 | 44 | 58.9 | [model](https://drive.google.com/open?id=1PO1Ax_GDtjiemEmDVD7oPWwqQkUu28PI) | 63 | 64 | #### Notes 65 | - All models are trained on keypoint train 2017 images which contains at least one human with keypoint annotations (64115 images). 66 | - The evaluation is done on COCO keypoint val 2017 (5000 images). 67 | - Flip test is used by default. 68 | - The models are fine-tuned from the corresponding center point detection models. 69 | - Dla training schedule: `1x`: train for 140 epochs with learning rate dropped 10 times at the 90 and 120 epoch.`3x`: train for 320 epochs with learning rate dropped 10 times at the 270 and 300 epoch. 70 | - Hourglass training schedule: `1x`: train for 50 epochs with learning rate dropped 10 times at the 40 epoch.`3x`: train for 150 epochs with learning rate dropped 10 times at the 130 epoch. 71 | 72 | ## 3D bounding box detection 73 | 74 | #### Notes 75 | - The 3dop split is from [3DOP](https://papers.nips.cc/paper/5644-3d-object-proposals-for-accurate-object-class-detection) and the suborn split is from [SubCNN](https://github.com/tanshen/SubCNN). 76 | - No augmentation is used in testing. 77 | - The models are trained for 70 epochs with learning rate dropped at the 45 and 60 epoch. 78 | 79 | ### KITTI 3DOP split 80 | 81 | |Model |GPUs|Train time|Test time|AP-E|AP-M|AP-H|AOS-E|AOS-M|AOS-H|BEV-E|BEV-M|BEV-H| Download | 82 | |------------|----|----------|---------|----|----|----|-----|-----|-----|-----|-----|-----|----------| 83 | |[ddd_3dop](../experiments/ddd_3dop.sh)|2 | 7h | 31ms |96.9|87.8|79.2|93.9 |84.3 |75.7 |34.0 |30.5 |26.8 | [model](https://drive.google.com/open?id=1znsM6E-aVTkATreDuUVxoU0ajL1az8rz)| 84 | 85 | ### KITTI SubCNN split 86 | 87 | |Model |GPUs|Train time|Test time|AP-E|AP-M|AP-H|AOS-E|AOS-M|AOS-H|BEV-E|BEV-M|BEV-H| Download | 88 | |------------|----|----------|---------|----|----|----|-----|-----|-----|-----|-----|-----|----------| 89 | |[ddd_sub](../experiments/ddd_sub.sh) |2 | 7h | 31ms |89.6|79.8|70.3|85.7 |75.2 |65.9 |34.9 |27.7 |26.4 | [model](https://drive.google.com/open?id=15XuJxTxCBnA8O37M_ghjppnWmVnjC0Hp)| -------------------------------------------------------------------------------- /readme/det1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/det1.png -------------------------------------------------------------------------------- /readme/det2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/det2.png -------------------------------------------------------------------------------- /readme/fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/fig2.png -------------------------------------------------------------------------------- /readme/pose1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/pose1.png -------------------------------------------------------------------------------- /readme/pose2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/pose2.png -------------------------------------------------------------------------------- /readme/pose3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/readme/pose3.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | Cython 3 | numba 4 | progress 5 | matplotlib 6 | scipy 7 | -------------------------------------------------------------------------------- /src/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /src/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | 7 | import os 8 | import cv2 9 | 10 | import torch 11 | 12 | from opts import opts 13 | from detectors.detector_factory import detector_factory 14 | 15 | image_ext = ['jpg', 'jpeg', 'png', 'webp'] 16 | video_ext = ['mp4', 'mov', 'avi', 'mkv'] 17 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] 18 | 19 | def demo(opt): 20 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 21 | opt.debug = max(opt.debug, 1) 22 | Detector = detector_factory[opt.task] 23 | detector = Detector(opt) 24 | 25 | if opt.demo == 'webcam' or \ 26 | opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext: 27 | cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo) 28 | detector.pause = False 29 | while True: 30 | success, img = cam.read() 31 | if not success: 32 | print("no more images, close.") 33 | return 34 | if opt.debug < 4: 35 | cv2.imshow('input', img) 36 | ret = detector.run(img) 37 | time_str = '' 38 | for stat in time_stats: 39 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat]) 40 | print(time_str) 41 | if opt.debug < 4 and cv2.waitKey(1) == 27: 42 | return # esc to quit 43 | else: 44 | if os.path.isdir(opt.demo): 45 | image_names = [] 46 | ls = os.listdir(opt.demo) 47 | for file_name in sorted(ls): 48 | ext = file_name[file_name.rfind('.') + 1:].lower() 49 | if ext in image_ext: 50 | image_names.append(os.path.join(opt.demo, file_name)) 51 | else: 52 | image_names = [opt.demo] 53 | 54 | for (image_name) in image_names: 55 | ret = detector.run(image_name) 56 | time_str = '' 57 | for stat in time_stats: 58 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat]) 59 | print(time_str) 60 | if __name__ == '__main__': 61 | opt = opts().init() 62 | demo(opt) 63 | -------------------------------------------------------------------------------- /src/lib/datasets/dataset/active.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | import torch.utils.data as data 12 | 13 | 14 | class ACTIVE(data.Dataset): 15 | """ 16 | The modified single-pose version of COCO human pose estimation dataset, naming `Active` dataset here. The main difference is that we limit the human counts in one single image to be less than or equal to 2. 17 | The order of joints: 18 | KEYPOINT_DICT = { 19 | 'nose': 0, 20 | 'left_eye': 1, 21 | 'right_eye': 2, 22 | 'left_ear': 3, 23 | 'right_ear': 4, 24 | 'left_shoulder': 5, 25 | 'right_shoulder': 6, 26 | 'left_elbow': 7, 27 | 'right_elbow': 8, 28 | 'left_wrist': 9, 29 | 'right_wrist': 10, 30 | 'left_hip': 11, 31 | 'right_hip': 12, 32 | 'left_knee': 13, 33 | 'right_knee': 14, 34 | 'left_ankle': 15, 35 | 'right_ankle': 16 36 | } 37 | """ 38 | num_classes = 1 39 | num_joints = 17 40 | default_resolution = [192, 192] # mli: for movenet-lightning 41 | mean = np.array([1., 1., 1.], 42 | dtype=np.float32).reshape(1, 1, 3) 43 | std = np.array([1., 1., 1.], 44 | dtype=np.float32).reshape(1, 1, 3) 45 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 46 | [11, 12], [13, 14], [15, 16]] 47 | 48 | def __init__(self, opt, split, sp=False): 49 | super(ACTIVE, self).__init__() 50 | self.edges = [[0, 1], [0, 2], [1, 3], [2, 4], 51 | [4, 6], [3, 5], [5, 6], 52 | [5, 7], [7, 9], [6, 8], [8, 10], 53 | [6, 12], [5, 11], [11, 12], 54 | [12, 14], [14, 16], [11, 13], [13, 15]] 55 | 56 | self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] 57 | self.data_dir = os.path.join(opt.data_dir, opt.dataset) # mli: the dir name is specified by `opt.dataset` 58 | self.img_dir = os.path.join(self.data_dir, '{}'.format(split)) 59 | if split == 'test': 60 | raise ValueError('No supported for the testing dataset.') 61 | else: 62 | self.annot_path = os.path.join( 63 | self.data_dir, 'annotations', 64 | '{}_{}.json').format(opt.dataset, split) 65 | self.max_objs = 2 # mli: only consider the images with less than 2 human objects 66 | self._data_rng = np.random.RandomState(123) 67 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], 68 | dtype=np.float32) 69 | self._eig_vec = np.array([ 70 | [-0.58752847, -0.69563484, 0.41340352], 71 | [-0.5832747, 0.00994535, -0.81221408], 72 | [-0.56089297, 0.71832671, 0.41158938] 73 | ], dtype=np.float32) 74 | self.split = split 75 | self.opt = opt 76 | 77 | print('==> initializing {} {} data.'.format(opt.dataset, split)) 78 | self.coco = coco.COCO(self.annot_path) 79 | image_ids = self.coco.getImgIds() 80 | 81 | if split == 'train': 82 | self.images = [] 83 | for img_id in image_ids: 84 | idxs = self.coco.getAnnIds(imgIds=[img_id]) 85 | if len(idxs) > 0: 86 | self.images.append(img_id) 87 | else: 88 | self.images = image_ids 89 | self.num_samples = len(self.images) 90 | print('Loaded {} {} samples'.format(split, self.num_samples)) 91 | 92 | def _to_float(self, x): 93 | return float("{:.2f}".format(x)) 94 | 95 | def bbox_from_kpt(self, kpts): 96 | bbox = np.zeros((4)) 97 | xmin = np.min(kpts[:,0]) 98 | ymin = np.min(kpts[:,1]) 99 | xmax = np.max(kpts[:,0]) 100 | ymax = np.max(kpts[:,1]) 101 | width = xmax - xmin - 1 102 | height = ymax - ymin - 1 103 | 104 | # corrupted bounding box 105 | if width <= 0 or height <= 0: 106 | return bbox 107 | # 20% extend 108 | else: 109 | bbox[0] = (xmin + xmax)/2. - width/2*1.2 110 | bbox[1] = (ymin + ymax)/2. - height/2*1.2 111 | bbox[2] = width*1.2 112 | bbox[3] = height*1.2 113 | return bbox 114 | 115 | def convert_eval_format(self, all_dets): 116 | # import pdb; pdb.set_trace() 117 | print() 118 | detections = [] 119 | for image_id in all_dets: 120 | category_id = 1 121 | dets = all_dets[image_id] 122 | bbox = self.bbox_from_kpt(dets) 123 | bbox_out = list(map(self._to_float, bbox)) 124 | score = np.sum(dets[:, 2]) / 4 125 | keypoints = np.concatenate([ 126 | dets[:, [1, 0]], 127 | np.ones((17, 1), dtype=np.float32)], axis=1) 128 | keypoints[1:5] = np.zeros((4, 3)) 129 | keypoints = keypoints.reshape(51).tolist() 130 | keypoints = list(map(self._to_float, keypoints)) 131 | detection = { 132 | "image_id": int(image_id), 133 | "category_id": int(category_id), 134 | "bbox": bbox_out, 135 | "score": float("{:.2f}".format(score)), 136 | "keypoints": keypoints 137 | } 138 | detections.append(detection) 139 | return detections 140 | 141 | def __len__(self): 142 | return self.num_samples 143 | 144 | def save_results(self, results, save_dir): 145 | json.dump(self.convert_eval_format(results), 146 | open('{}/results.json'.format(save_dir), 'w')) 147 | 148 | def run_eval(self, results, save_dir): 149 | self.save_results(results, save_dir) 150 | coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir)) 151 | coco_eval = COCOeval(self.coco, coco_dets, "keypoints") 152 | coco_eval.evaluate() 153 | coco_eval.accumulate() 154 | coco_eval.summarize() 155 | -------------------------------------------------------------------------------- /src/lib/datasets/dataset/coco_hp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | import torch.utils.data as data 12 | 13 | 14 | class COCOHP(data.Dataset): 15 | """ 16 | The order of joints: 17 | KEYPOINT_DICT = { 18 | 'nose': 0, 19 | 'left_eye': 1, 20 | 'right_eye': 2, 21 | 'left_ear': 3, 22 | 'right_ear': 4, 23 | 'left_shoulder': 5, 24 | 'right_shoulder': 6, 25 | 'left_elbow': 7, 26 | 'right_elbow': 8, 27 | 'left_wrist': 9, 28 | 'right_wrist': 10, 29 | 'left_hip': 11, 30 | 'right_hip': 12, 31 | 'left_knee': 13, 32 | 'right_knee': 14, 33 | 'left_ankle': 15, 34 | 'right_ankle': 16 35 | } 36 | """ 37 | num_classes = 1 38 | num_joints = 17 39 | default_resolution = [512, 512] 40 | mean = np.array([0.40789654, 0.44719302, 0.47026115], 41 | dtype=np.float32).reshape(1, 1, 3) 42 | std = np.array([0.28863828, 0.27408164, 0.27809835], 43 | dtype=np.float32).reshape(1, 1, 3) 44 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 45 | [11, 12], [13, 14], [15, 16]] 46 | 47 | def __init__(self, opt, split, sp=False): 48 | super(COCOHP, self).__init__() 49 | self.edges = [[0, 1], [0, 2], [1, 3], [2, 4], 50 | [4, 6], [3, 5], [5, 6], 51 | [5, 7], [7, 9], [6, 8], [8, 10], 52 | [6, 12], [5, 11], [11, 12], 53 | [12, 14], [14, 16], [11, 13], [13, 15]] 54 | 55 | self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] 56 | self.data_dir = os.path.join(opt.data_dir, 'coco') 57 | self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split)) 58 | if split == 'test': 59 | self.annot_path = os.path.join( 60 | self.data_dir, 'annotations', 61 | 'image_info_test-dev2017.json').format(split) 62 | else: 63 | self.annot_path = os.path.join( 64 | self.data_dir, 'annotations', 65 | 'person_keypoints_{}2017.json').format(split) 66 | self.max_objs = 32 67 | self._data_rng = np.random.RandomState(123) 68 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], 69 | dtype=np.float32) 70 | self._eig_vec = np.array([ 71 | [-0.58752847, -0.69563484, 0.41340352], 72 | [-0.5832747, 0.00994535, -0.81221408], 73 | [-0.56089297, 0.71832671, 0.41158938] 74 | ], dtype=np.float32) 75 | self.split = split 76 | self.opt = opt 77 | 78 | print('==> initializing coco 2017 {} data.'.format(split)) 79 | self.coco = coco.COCO(self.annot_path) 80 | image_ids = self.coco.getImgIds() 81 | 82 | if split == 'train': 83 | self.images = [] 84 | for img_id in image_ids: 85 | idxs = self.coco.getAnnIds(imgIds=[img_id]) 86 | if len(idxs) > 0: 87 | self.images.append(img_id) 88 | else: 89 | self.images = image_ids 90 | self.num_samples = len(self.images) 91 | print('Loaded {} {} samples'.format(split, self.num_samples)) 92 | 93 | def _to_float(self, x): 94 | return float("{:.2f}".format(x)) 95 | 96 | def convert_eval_format(self, all_bboxes): 97 | # import pdb; pdb.set_trace() 98 | detections = [] 99 | for image_id in all_bboxes: 100 | for cls_ind in all_bboxes[image_id]: 101 | category_id = 1 102 | for dets in all_bboxes[image_id][cls_ind]: 103 | bbox = dets[:4] 104 | bbox[2] -= bbox[0] 105 | bbox[3] -= bbox[1] 106 | score = dets[4] 107 | bbox_out = list(map(self._to_float, bbox)) 108 | keypoints = np.concatenate([ 109 | np.array(dets[5:39], dtype=np.float32).reshape(-1, 2), 110 | np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist() 111 | keypoints = list(map(self._to_float, keypoints)) 112 | 113 | detection = { 114 | "image_id": int(image_id), 115 | "category_id": int(category_id), 116 | "bbox": bbox_out, 117 | "score": float("{:.2f}".format(score)), 118 | "keypoints": keypoints 119 | } 120 | detections.append(detection) 121 | return detections 122 | 123 | def __len__(self): 124 | return self.num_samples 125 | 126 | def save_results(self, results, save_dir): 127 | json.dump(self.convert_eval_format(results), 128 | open('{}/results.json'.format(save_dir), 'w')) 129 | 130 | def run_eval(self, results, save_dir): 131 | self.save_results(results, save_dir) 132 | coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir)) 133 | coco_eval = COCOeval(self.coco, coco_dets, "keypoints") 134 | coco_eval.evaluate() 135 | coco_eval.accumulate() 136 | coco_eval.summarize() 137 | coco_eval = COCOeval(self.coco, coco_dets, "bbox") 138 | coco_eval.evaluate() 139 | coco_eval.accumulate() 140 | coco_eval.summarize() 141 | -------------------------------------------------------------------------------- /src/lib/datasets/dataset_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | 6 | from .sample.multi_pose import MultiPoseDataset 7 | from .sample.single_pose import SinglePoseDataset 8 | 9 | from .dataset.coco_hp import COCOHP 10 | from .dataset.active import ACTIVE 11 | 12 | 13 | dataset_factory = { 14 | 'coco_hp': COCOHP, 15 | 'active': ACTIVE, 16 | 'active_coco': ACTIVE, 17 | } 18 | 19 | _sample_factory = { 20 | 'multi_pose': MultiPoseDataset, 21 | 'single_pose': SinglePoseDataset, 22 | } 23 | 24 | 25 | def get_dataset(dataset, task): 26 | class Dataset(dataset_factory[dataset], _sample_factory[task]): 27 | pass 28 | return Dataset 29 | 30 | -------------------------------------------------------------------------------- /src/lib/datasets/sample/multi_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch.utils.data as data 6 | import numpy as np 7 | import torch 8 | import json 9 | import cv2 10 | import os 11 | from utils.image import flip, color_aug 12 | from utils.image import get_affine_transform, affine_transform 13 | from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian 14 | from utils.image import draw_dense_reg 15 | import math 16 | 17 | class MultiPoseDataset(data.Dataset): 18 | def _coco_box_to_bbox(self, box): 19 | bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]], 20 | dtype=np.float32) 21 | return bbox 22 | 23 | def _get_border(self, border, size): 24 | i = 1 25 | while size - border // i <= border // i: 26 | i *= 2 27 | return border // i 28 | 29 | def __getitem__(self, index): 30 | img_id = self.images[index] 31 | file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] 32 | img_path = os.path.join(self.img_dir, file_name) 33 | ann_ids = self.coco.getAnnIds(imgIds=[img_id]) 34 | anns = self.coco.loadAnns(ids=ann_ids) 35 | num_objs = min(len(anns), self.max_objs) 36 | 37 | img = cv2.imread(img_path) 38 | 39 | height, width = img.shape[0], img.shape[1] 40 | c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) 41 | s = max(img.shape[0], img.shape[1]) * 1.0 42 | rot = 0 43 | 44 | flipped = False 45 | if self.split == 'train': 46 | if not self.opt.not_rand_crop: 47 | s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) 48 | w_border = self._get_border(128, img.shape[1]) 49 | h_border = self._get_border(128, img.shape[0]) 50 | c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) 51 | c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) 52 | else: 53 | sf = self.opt.scale 54 | cf = self.opt.shift 55 | c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) 56 | c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) 57 | s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) 58 | if np.random.random() < self.opt.aug_rot: 59 | rf = self.opt.rotate 60 | rot = np.clip(np.random.randn()*rf, -rf*2, rf*2) 61 | 62 | if np.random.random() < self.opt.flip: 63 | flipped = True 64 | img = img[:, ::-1, :] 65 | c[0] = width - c[0] - 1 66 | 67 | 68 | trans_input = get_affine_transform( 69 | c, s, rot, [self.opt.input_res, self.opt.input_res]) 70 | inp = cv2.warpAffine(img, trans_input, 71 | (self.opt.input_res, self.opt.input_res), 72 | flags=cv2.INTER_LINEAR) 73 | inp = (inp.astype(np.float32) / 255.) 74 | if self.split == 'train' and not self.opt.no_color_aug: 75 | color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) 76 | inp = (inp - self.mean) / self.std 77 | inp = inp.transpose(2, 0, 1) 78 | 79 | output_res = self.opt.output_res 80 | num_joints = self.num_joints 81 | trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res]) 82 | trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) 83 | 84 | hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32) 85 | hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32) 86 | dense_kps = np.zeros((num_joints, 2, output_res, output_res), 87 | dtype=np.float32) 88 | dense_kps_mask = np.zeros((num_joints, output_res, output_res), 89 | dtype=np.float32) 90 | wh = np.zeros((self.max_objs, 2), dtype=np.float32) 91 | kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) 92 | reg = np.zeros((self.max_objs, 2), dtype=np.float32) 93 | ind = np.zeros((self.max_objs), dtype=np.int64) 94 | reg_mask = np.zeros((self.max_objs), dtype=np.uint8) 95 | kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8) 96 | hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) 97 | hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) 98 | hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) 99 | 100 | draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ 101 | draw_umich_gaussian 102 | 103 | gt_det = [] 104 | for k in range(num_objs): 105 | ann = anns[k] 106 | bbox = self._coco_box_to_bbox(ann['bbox']) 107 | cls_id = int(ann['category_id']) - 1 108 | pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) 109 | if flipped: 110 | bbox[[0, 2]] = width - bbox[[2, 0]] - 1 111 | pts[:, 0] = width - pts[:, 0] - 1 112 | for e in self.flip_idx: 113 | pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() 114 | bbox[:2] = affine_transform(bbox[:2], trans_output) 115 | bbox[2:] = affine_transform(bbox[2:], trans_output) 116 | bbox = np.clip(bbox, 0, output_res - 1) 117 | h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] 118 | if (h > 0 and w > 0) or (rot != 0): 119 | radius = gaussian_radius((math.ceil(h), math.ceil(w))) 120 | radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius)) 121 | ct = np.array( 122 | [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) 123 | ct_int = ct.astype(np.int32) 124 | wh[k] = 1. * w, 1. * h 125 | ind[k] = ct_int[1] * output_res + ct_int[0] 126 | reg[k] = ct - ct_int 127 | reg_mask[k] = 1 128 | num_kpts = pts[:, 2].sum() 129 | if num_kpts == 0: 130 | hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 131 | reg_mask[k] = 0 132 | 133 | hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) 134 | hp_radius = self.opt.hm_gauss \ 135 | if self.opt.mse_loss else max(0, int(hp_radius)) 136 | for j in range(num_joints): 137 | if pts[j, 2] > 0: 138 | pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot) 139 | if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ 140 | pts[j, 1] >= 0 and pts[j, 1] < output_res: 141 | kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int 142 | kps_mask[k, j * 2: j * 2 + 2] = 1 143 | pt_int = pts[j, :2].astype(np.int32) 144 | hp_offset[k * num_joints + j] = pts[j, :2] - pt_int 145 | hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] 146 | hp_mask[k * num_joints + j] = 1 147 | if self.opt.dense_hp: 148 | # must be before draw center hm gaussian 149 | draw_dense_reg(dense_kps[j], hm[cls_id], ct_int, 150 | pts[j, :2] - ct_int, radius, is_offset=True) 151 | draw_gaussian(dense_kps_mask[j], ct_int, radius) 152 | draw_gaussian(hm_hp[j], pt_int, hp_radius) 153 | draw_gaussian(hm[cls_id], ct_int, radius) 154 | gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 155 | ct[0] + w / 2, ct[1] + h / 2, 1] + 156 | pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) 157 | if rot != 0: 158 | hm = hm * 0 + 0.9999 159 | reg_mask *= 0 160 | kps_mask *= 0 161 | ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 162 | 'hps': kps, 'hps_mask': kps_mask} 163 | if self.opt.dense_hp: 164 | dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res) 165 | dense_kps_mask = dense_kps_mask.reshape( 166 | num_joints, 1, output_res, output_res) 167 | dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1) 168 | dense_kps_mask = dense_kps_mask.reshape( 169 | num_joints * 2, output_res, output_res) 170 | ret.update({'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask}) 171 | del ret['hps'], ret['hps_mask'] 172 | if self.opt.reg_offset: 173 | ret.update({'reg': reg}) 174 | if self.opt.hm_hp: 175 | ret.update({'hm_hp': hm_hp}) 176 | if self.opt.reg_hp_offset: 177 | ret.update({'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask}) 178 | if self.opt.debug > 0 or not self.split == 'train': 179 | gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ 180 | np.zeros((1, 40), dtype=np.float32) 181 | meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} 182 | ret['meta'] = meta 183 | return ret 184 | -------------------------------------------------------------------------------- /src/lib/datasets/sample/single_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch.utils.data as data 6 | import numpy as np 7 | import torch 8 | import json 9 | import cv2 10 | import os 11 | from utils.image import flip, color_aug 12 | from utils.image import get_affine_transform, affine_transform 13 | from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian 14 | from utils.image import draw_dense_reg 15 | import math 16 | 17 | 18 | class SinglePoseDataset(data.Dataset): 19 | def _coco_box_to_bbox(self, box): 20 | bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]], 21 | dtype=np.float32) 22 | return bbox 23 | 24 | def _get_border(self, border, size): 25 | i = 1 26 | while size - border // i <= border // i: 27 | i *= 2 28 | return border // i 29 | 30 | def __getitem__(self, index): 31 | img_id = self.images[index] 32 | file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] 33 | img_path = os.path.join(self.img_dir, file_name) 34 | ann_ids = self.coco.getAnnIds(imgIds=[img_id]) 35 | anns = self.coco.loadAnns(ids=ann_ids) 36 | num_objs = min(len(anns), self.max_objs) 37 | 38 | img = cv2.imread(img_path) 39 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) 40 | 41 | height, width = img.shape[0], img.shape[1] 42 | c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) 43 | s = max(img.shape[0], img.shape[1]) * 1.0 44 | rot = 0 45 | 46 | flipped = False 47 | if self.split == 'train': 48 | if not self.opt.not_rand_crop: 49 | s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) 50 | w_border = self._get_border(128, img.shape[1]) 51 | h_border = self._get_border(128, img.shape[0]) 52 | c[0] = np.random.randint( 53 | low=w_border, high=img.shape[1] - w_border) 54 | c[1] = np.random.randint( 55 | low=h_border, high=img.shape[0] - h_border) 56 | else: 57 | sf = self.opt.scale 58 | cf = self.opt.shift 59 | c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) 60 | c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) 61 | s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) 62 | if np.random.random() < self.opt.aug_rot: 63 | rf = self.opt.rotate 64 | rot = np.clip(np.random.randn()*rf, -rf*2, rf*2) 65 | 66 | if np.random.random() < self.opt.flip: 67 | flipped = True 68 | img = img[:, ::-1, :] 69 | c[0] = width - c[0] - 1 70 | 71 | trans_input = get_affine_transform( 72 | c, s, rot, [self.opt.input_res, self.opt.input_res]) 73 | inp = cv2.warpAffine(img, trans_input, 74 | (self.opt.input_res, self.opt.input_res), 75 | flags=cv2.INTER_LINEAR) 76 | inp = (inp.astype(np.float32) / 127.5) 77 | if self.split == 'train' and not self.opt.no_color_aug: 78 | color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) 79 | inp = (inp - self.mean) / self.std 80 | inp = inp.transpose(2, 0, 1) 81 | 82 | output_res = self.opt.output_res 83 | num_joints = self.num_joints 84 | trans_output_rot = get_affine_transform( 85 | c, s, rot, [output_res, output_res]) 86 | trans_output = get_affine_transform(c, s, 0, [output_res, output_res]) 87 | 88 | hm = np.zeros((self.num_classes, output_res, 89 | output_res), dtype=np.float32) 90 | hm_hp = np.zeros((num_joints, output_res, output_res), 91 | dtype=np.float32) 92 | kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32) 93 | ind = np.zeros((self.max_objs), dtype=np.int64) 94 | kps_mask = np.zeros( 95 | (self.max_objs, self.num_joints * 2), dtype=np.uint8) 96 | hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32) 97 | hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64) 98 | hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64) 99 | 100 | draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ 101 | draw_umich_gaussian 102 | 103 | gt_det = [] 104 | for k in range(num_objs): 105 | ann = anns[k] 106 | bbox = self._coco_box_to_bbox(ann['bbox']) 107 | cls_id = int(ann['category_id']) - 1 108 | pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3) 109 | if flipped: 110 | bbox[[0, 2]] = width - bbox[[2, 0]] - 1 111 | pts[:, 0] = width - pts[:, 0] - 1 112 | for e in self.flip_idx: 113 | pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy() 114 | bbox[:2] = affine_transform(bbox[:2], trans_output) 115 | bbox[2:] = affine_transform(bbox[2:], trans_output) 116 | bbox = np.clip(bbox, 0, output_res - 1) 117 | h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] 118 | if (h > 0 and w > 0) or (rot != 0): 119 | radius = gaussian_radius((math.ceil(h), math.ceil(w))) 120 | radius = self.opt.hm_gauss if self.opt.mse_loss else max( 121 | 0, int(radius)) 122 | ct = np.array( 123 | [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) 124 | ct_int = ct.astype(np.int32) 125 | ind[k] = ct_int[1] * output_res + ct_int[0] 126 | num_kpts = pts[:, 2].sum() 127 | if num_kpts == 0: 128 | hm[cls_id, ct_int[1], ct_int[0]] = 0.9999 129 | 130 | hp_radius = gaussian_radius((math.ceil(h), math.ceil(w))) 131 | hp_radius = self.opt.hm_gauss \ 132 | if self.opt.mse_loss else max(0, int(hp_radius)) 133 | for j in range(num_joints): 134 | if pts[j, 2] > 0: 135 | pts[j, :2] = affine_transform( 136 | pts[j, :2], trans_output_rot) 137 | if pts[j, 0] >= 0 and pts[j, 0] < output_res and \ 138 | pts[j, 1] >= 0 and pts[j, 1] < output_res: 139 | # TODO: Check the ordering of y,x here. 140 | # kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int 141 | kps[k, j * 2] = pts[j, 1:2] - ct_int[1] 142 | kps[k, j * 2 + 1] = pts[j, 0:1] - ct_int[0] 143 | 144 | kps_mask[k, j * 2: j * 2 + 2] = 1 145 | pt_int = pts[j, :2].astype(np.int32) 146 | # hp_offset[k * num_joints + j] = pts[j, :2] - pt_int 147 | # TODO: Check the ordering of y,x here. 148 | hp_offset[k * num_joints + j][0] = pts[j, 1:2] - pt_int[1] 149 | hp_offset[k * num_joints + j][1] = pts[j, 0:1] - pt_int[0] 150 | hp_ind[k * num_joints + j] = pt_int[1] * \ 151 | output_res + pt_int[0] 152 | hp_mask[k * num_joints + j] = 1 153 | draw_gaussian(hm_hp[j], pt_int, hp_radius) 154 | draw_gaussian(hm[cls_id], ct_int, radius) 155 | gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 156 | ct[0] + w / 2, ct[1] + h / 2, 1] + 157 | pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id]) 158 | if rot != 0: 159 | hm = hm * 0 + 0.9999 160 | # reg_mask *= 0 161 | kps_mask *= 0 162 | # ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 163 | # 'hps': kps, 'hps_mask': kps_mask} 164 | # ret = {'input': inp, 'hm': hm, 'ind': ind, 165 | # 'hps': kps, 'hps_mask': kps_mask} 166 | # if self.opt.hm_hp: 167 | # ret.update({'hm_hp': hm_hp}) 168 | # if self.opt.reg_hp_offset: 169 | # ret.update({'hp_offset': hp_offset, 170 | # 'hp_ind': hp_ind, 'hp_mask': hp_mask}) 171 | ret = {'input': inp, 'hm': hm, 'ind': ind, 172 | 'hps': kps, 'hps_mask': kps_mask, 173 | 'hm_hp': hm_hp, 'hp_offset': hp_offset, 174 | 'hp_ind': hp_ind, 'hp_mask': hp_mask} 175 | if self.opt.debug > 0 or not self.split == 'train': 176 | gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ 177 | np.zeros((1, 40), dtype=np.float32) 178 | meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} 179 | ret['meta'] = meta 180 | return ret 181 | -------------------------------------------------------------------------------- /src/lib/detectors/base_detector.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import cv2 6 | import numpy as np 7 | from progress.bar import Bar 8 | import time 9 | import torch 10 | 11 | from models.model import create_model, load_model 12 | from utils.image import get_affine_transform 13 | from utils.debugger import Debugger 14 | 15 | 16 | class BaseDetector(object): 17 | def __init__(self, opt): 18 | if opt.gpus[0] >= 0: 19 | opt.device = torch.device('cuda') 20 | else: 21 | opt.device = torch.device('cpu') 22 | 23 | print('Creating model...') 24 | self.model = create_model(opt.arch, opt.heads, 25 | opt.head_conv, opt.froze_backbone) 26 | self.model = load_model(self.model, opt.load_model) 27 | self.model = self.model.to(opt.device) 28 | self.model.eval() 29 | 30 | self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) 31 | self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) 32 | self.max_per_image = 100 33 | self.num_classes = opt.num_classes 34 | self.opt = opt 35 | self.pause = True 36 | self.global_num = 0 37 | 38 | def pre_process(self, image, meta=None): 39 | height, width = image.shape[0:2] 40 | 41 | # padding all images to be square. 42 | if height > width: 43 | diff = height - width 44 | image = cv2.copyMakeBorder( 45 | image, 0, 0, int(diff//2), int(diff//2 + diff%2), 46 | cv2.BORDER_CONSTANT, value=(0,0,0)) 47 | elif height < width: 48 | diff = width - height 49 | image = cv2.copyMakeBorder( 50 | image, int(diff//2), int(diff//2+diff%2), 0, 0, 51 | cv2.BORDER_CONSTANT, value=(0,0,0)) 52 | 53 | new_height = 256#192 54 | new_width = 256#192 55 | 56 | inp_height = new_height 57 | inp_width = new_width 58 | c = np.array([new_width // 2, new_height // 2], dtype=np.float32) 59 | s = np.array([inp_width, inp_height], dtype=np.float32) 60 | 61 | inp_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR) 62 | inp_image = cv2.cvtColor(inp_image, cv2.COLOR_BGR2RGB).astype(np.float32) 63 | inp_image = ((inp_image / 127.5 - self.mean) / 64 | self.std).astype(np.float32) 65 | images = inp_image.transpose(2, 0, 1).reshape( 66 | 1, 3, inp_height, inp_width) 67 | images = torch.from_numpy(images) 68 | meta = {'c': c, 's': s, 69 | 'in_height': height, 70 | 'in_width': width, 71 | 'out_height': inp_height // self.opt.down_ratio, 72 | 'out_width': inp_width // self.opt.down_ratio} 73 | return images, meta 74 | 75 | def process(self, images, return_time=False): 76 | raise NotImplementedError 77 | 78 | def post_process(self, dets, meta, scale=1): 79 | raise NotImplementedError 80 | 81 | def merge_outputs(self, detections): 82 | raise NotImplementedError 83 | 84 | def debug(self, debugger, images, dets, output, scale=1): 85 | raise NotImplementedError 86 | 87 | def show_results(self, debugger, image, results): 88 | raise NotImplementedError 89 | 90 | def run(self, image_or_path_or_tensor, meta=None): 91 | load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 92 | merge_time, tot_time = 0, 0 93 | debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3), 94 | theme=self.opt.debugger_theme) 95 | start_time = time.time() 96 | if isinstance(image_or_path_or_tensor, np.ndarray): 97 | image = image_or_path_or_tensor 98 | elif type(image_or_path_or_tensor) == type(''): 99 | image = cv2.imread(image_or_path_or_tensor) 100 | 101 | loaded_time = time.time() 102 | load_time += (loaded_time - start_time) 103 | 104 | # detections = [] 105 | scale_start_time = time.time() 106 | 107 | images, meta = self.pre_process(image, meta) 108 | 109 | images = images.to(self.opt.device) 110 | # torch.cuda.synchronize() 111 | pre_process_time = time.time() 112 | pre_time += pre_process_time - scale_start_time 113 | output, dets, forward_time = self.process(images, return_time=True) 114 | # torch.cuda.synchronize() 115 | net_time += forward_time - pre_process_time 116 | decode_time = time.time() 117 | dec_time += decode_time - forward_time 118 | if self.opt.debug >= 2: 119 | self.debug(debugger, images, dets, output) 120 | dets = self.post_process(dets, meta) 121 | # torch.cuda.synchronize() 122 | post_process_time = time.time() 123 | post_time += post_process_time - decode_time 124 | results = dets 125 | 126 | # results = self.merge_outputs(detections) 127 | # torch.cuda.synchronize() 128 | end_time = time.time() 129 | merge_time += end_time - post_process_time 130 | tot_time += end_time - start_time 131 | 132 | if self.opt.debug >= 1: 133 | self.show_results(debugger, image, results, prefix=self.global_num) 134 | self.global_num += 1 135 | 136 | return {'results': results, 'tot': tot_time, 'load': load_time, 137 | 'pre': pre_time, 'net': net_time, 'dec': dec_time, 138 | 'post': post_time, 'merge': merge_time} 139 | -------------------------------------------------------------------------------- /src/lib/detectors/detector_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | 6 | from .multi_pose import MultiPoseDetector 7 | from .single_pose import SinglePoseDetector 8 | 9 | detector_factory = { 10 | 'multi_pose': MultiPoseDetector, 11 | 'single_pose': SinglePoseDetector 12 | } 13 | -------------------------------------------------------------------------------- /src/lib/detectors/multi_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import cv2 6 | import numpy as np 7 | from progress.bar import Bar 8 | import time 9 | import torch 10 | 11 | try: 12 | from external.nms import soft_nms_39 13 | except: 14 | print('NMS not imported! If you need it,' 15 | ' do \n cd $CenterNet_ROOT/src/lib/external \n make') 16 | from models.decode import multi_pose_decode 17 | from models.utils import flip_tensor, flip_lr_off, flip_lr 18 | from utils.image import get_affine_transform 19 | from utils.post_process import multi_pose_post_process 20 | from utils.debugger import Debugger 21 | 22 | from .base_detector import BaseDetector 23 | 24 | class MultiPoseDetector(BaseDetector): 25 | def __init__(self, opt): 26 | super(MultiPoseDetector, self).__init__(opt) 27 | self.flip_idx = opt.flip_idx 28 | 29 | def process(self, images, return_time=False): 30 | with torch.no_grad(): 31 | torch.cuda.synchronize() 32 | output = self.model(images)[-1] 33 | output['hm'] = output['hm'].sigmoid_() 34 | if self.opt.hm_hp and not self.opt.mse_loss: 35 | output['hm_hp'] = output['hm_hp'].sigmoid_() 36 | 37 | reg = output['reg'] if self.opt.reg_offset else None 38 | hm_hp = output['hm_hp'] if self.opt.hm_hp else None 39 | hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None 40 | torch.cuda.synchronize() 41 | forward_time = time.time() 42 | 43 | if self.opt.flip_test: 44 | num_joints = self.opt.heads["hm_hp"] 45 | output['hm'] = (output['hm'][0:1] + flip_tensor(output['hm'][1:2])) / 2 46 | output['wh'] = (output['wh'][0:1] + flip_tensor(output['wh'][1:2])) / 2 47 | output['hps'] = (output['hps'][0:1] + 48 | flip_lr_off(output['hps'][1:2], self.flip_idx, num_joints)) / 2 49 | hm_hp = (hm_hp[0:1] + flip_lr(hm_hp[1:2], self.flip_idx)) / 2 \ 50 | if hm_hp is not None else None 51 | reg = reg[0:1] if reg is not None else None 52 | hp_offset = hp_offset[0:1] if hp_offset is not None else None 53 | 54 | dets = multi_pose_decode( 55 | output['hm'], output['wh'], output['hps'], 56 | reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K) 57 | 58 | if return_time: 59 | return output, dets, forward_time 60 | else: 61 | return output, dets 62 | 63 | def post_process(self, dets, meta, scale=1): 64 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 65 | dets = multi_pose_post_process( 66 | dets.copy(), [meta['c']], [meta['s']], 67 | meta['out_height'], meta['out_width']) 68 | for j in range(1, self.num_classes + 1): 69 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 39) 70 | # import pdb; pdb.set_trace() 71 | dets[0][j][:, :4] /= scale 72 | dets[0][j][:, 5:] /= scale 73 | return dets[0] 74 | 75 | def merge_outputs(self, detections): 76 | results = {} 77 | results[1] = np.concatenate( 78 | [detection[1] for detection in detections], axis=0).astype(np.float32) 79 | if self.opt.nms or len(self.opt.test_scales) > 1: 80 | soft_nms_39(results[1], Nt=0.5, method=2) 81 | results[1] = results[1].tolist() 82 | return results 83 | 84 | def debug(self, debugger, images, dets, output, scale=1): 85 | dets = dets.detach().cpu().numpy().copy() 86 | dets[:, :, :4] *= self.opt.down_ratio 87 | dets[:, :, 5:39] *= self.opt.down_ratio 88 | img = images[0].detach().cpu().numpy().transpose(1, 2, 0) 89 | img = np.clip((( 90 | img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8) 91 | pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy()) 92 | debugger.add_blend_img(img, pred, 'pred_hm') 93 | if self.opt.hm_hp: 94 | pred = debugger.gen_colormap_hp( 95 | output['hm_hp'][0].detach().cpu().numpy()) 96 | debugger.add_blend_img(img, pred, 'pred_hmhp') 97 | 98 | def show_results(self, debugger, image, results): 99 | debugger.add_img(image, img_id='multi_pose') 100 | for bbox in results[1]: 101 | if bbox[4] > self.opt.vis_thresh: 102 | debugger.add_coco_bbox(bbox[:4], 0, bbox[4], img_id='multi_pose') 103 | debugger.add_coco_hp(bbox[5:39], img_id='multi_pose') 104 | debugger.show_all_imgs(pause=self.pause) 105 | -------------------------------------------------------------------------------- /src/lib/detectors/single_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import cv2 6 | import numpy as np 7 | from progress.bar import Bar 8 | import time 9 | import torch 10 | 11 | try: 12 | from external.nms import soft_nms_39 13 | except: 14 | print('NMS not imported! If you need it,' 15 | ' do \n cd $CenterNet_ROOT/src/lib/external \n make') 16 | from models.decode import single_pose_decode 17 | from models.utils import flip_tensor, flip_lr_off, flip_lr 18 | from utils.image import get_affine_transform 19 | from utils.post_process import multi_pose_post_process, single_pose_post_process 20 | from utils.debugger import Debugger 21 | 22 | from .base_detector import BaseDetector 23 | 24 | 25 | class SinglePoseDetector(BaseDetector): 26 | def __init__(self, opt): 27 | super(SinglePoseDetector, self).__init__(opt) 28 | self.flip_idx = opt.flip_idx 29 | self.vis_thresh = opt.vis_thresh 30 | 31 | def process(self, images, return_time=False): 32 | with torch.no_grad(): 33 | # torch.cuda.synchronize() 34 | output = self.model(images)[0] 35 | dets = self.model.decode(output) 36 | # torch.cuda.synchronize() 37 | forward_time = time.time() 38 | 39 | if return_time: 40 | return output, dets, forward_time 41 | else: 42 | return output, dets 43 | 44 | def post_process(self, dets, meta): 45 | dets = dets[0, 0, :, :] 46 | dets = dets.cpu().numpy() 47 | dets = single_pose_post_process( 48 | dets.copy(), 49 | meta['in_height'], meta['in_width']) 50 | return dets 51 | 52 | def merge_outputs(self, detections): 53 | results = {} 54 | results[1] = np.concatenate( 55 | [detection[1] for detection in detections], axis=0).astype(np.float32) 56 | if self.opt.nms or len(self.opt.test_scales) > 1: 57 | soft_nms_39(results[1], Nt=0.5, method=2) 58 | results[1] = results[1].tolist() 59 | return results 60 | 61 | def debug(self, debugger, images, dets, output): 62 | img = images[0].detach().cpu().numpy().transpose(1, 2, 0) 63 | img = np.clip((( 64 | img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8) 65 | pred = debugger.gen_colormap(torch.sigmoid(output['hm'][0]).detach().cpu().numpy()) 66 | debugger.add_blend_img(img, pred, 'pred_hm') 67 | pred = debugger.gen_colormap_hp( 68 | torch.sigmoid(output['hm_hp'][0]).detach().cpu().numpy()) 69 | debugger.add_blend_img(img, pred, 'pred_hmhp') 70 | 71 | def show_results(self, debugger, image, results, prefix=''): 72 | debugger.add_img(image, img_id='single_pose') 73 | debugger.add_coco_hp(results, img_id='single_pose', vis_thresh=self.vis_thresh) 74 | if self.opt.debug < 4: 75 | debugger.show_all_imgs(pause=self.pause) 76 | else: 77 | debugger.save_all_imgs(prefix=prefix) 78 | -------------------------------------------------------------------------------- /src/lib/external/.gitignore: -------------------------------------------------------------------------------- 1 | bbox.c 2 | bbox.cpython-35m-x86_64-linux-gnu.so 3 | bbox.cpython-36m-x86_64-linux-gnu.so 4 | 5 | nms.c 6 | nms.cpython-35m-x86_64-linux-gnu.so 7 | nms.cpython-36m-x86_64-linux-gnu.so 8 | -------------------------------------------------------------------------------- /src/lib/external/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /src/lib/external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/src/lib/external/__init__.py -------------------------------------------------------------------------------- /src/lib/external/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | 6 | extensions = [ 7 | Extension( 8 | "nms", 9 | ["nms.pyx"], 10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] 11 | ) 12 | ] 13 | 14 | setup( 15 | name="coco", 16 | ext_modules=cythonize(extensions), 17 | include_dirs=[numpy.get_include()] 18 | ) 19 | -------------------------------------------------------------------------------- /src/lib/logger.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 6 | import os 7 | import time 8 | import sys 9 | import torch 10 | USE_TENSORBOARD = True 11 | try: 12 | import tensorboardX 13 | print('Using tensorboardX') 14 | except: 15 | USE_TENSORBOARD = False 16 | 17 | class Logger(object): 18 | def __init__(self, opt): 19 | """Create a summary writer logging to log_dir.""" 20 | if not os.path.exists(opt.save_dir): 21 | os.makedirs(opt.save_dir) 22 | if not os.path.exists(opt.debug_dir): 23 | os.makedirs(opt.debug_dir) 24 | 25 | time_str = time.strftime('%Y-%m-%d-%H-%M') 26 | 27 | args = dict((name, getattr(opt, name)) for name in dir(opt) 28 | if not name.startswith('_')) 29 | file_name = os.path.join(opt.save_dir, 'opt.txt') 30 | with open(file_name, 'wt') as opt_file: 31 | opt_file.write('==> torch version: {}\n'.format(torch.__version__)) 32 | opt_file.write('==> cudnn version: {}\n'.format( 33 | torch.backends.cudnn.version())) 34 | opt_file.write('==> Cmd:\n') 35 | opt_file.write(str(sys.argv)) 36 | opt_file.write('\n==> Opt:\n') 37 | for k, v in sorted(args.items()): 38 | opt_file.write(' %s: %s\n' % (str(k), str(v))) 39 | 40 | log_dir = opt.save_dir + '/logs_{}'.format(time_str) 41 | if USE_TENSORBOARD: 42 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir) 43 | else: 44 | if not os.path.exists(os.path.dirname(log_dir)): 45 | os.mkdir(os.path.dirname(log_dir)) 46 | if not os.path.exists(log_dir): 47 | os.mkdir(log_dir) 48 | self.log = open(log_dir + '/log.txt', 'w') 49 | try: 50 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir)) 51 | except: 52 | pass 53 | self.start_line = True 54 | 55 | def write(self, txt): 56 | if self.start_line: 57 | time_str = time.strftime('%Y-%m-%d-%H-%M') 58 | self.log.write('{}: {}'.format(time_str, txt)) 59 | else: 60 | self.log.write(txt) 61 | self.start_line = False 62 | if '\n' in txt: 63 | self.start_line = True 64 | self.log.flush() 65 | 66 | def close(self): 67 | self.log.close() 68 | 69 | def scalar_summary(self, tag, value, step): 70 | """Log a scalar variable.""" 71 | if USE_TENSORBOARD: 72 | self.writer.add_scalar(tag, value, step) 73 | -------------------------------------------------------------------------------- /src/lib/models/data_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules import Module 3 | from torch.nn.parallel.scatter_gather import gather 4 | from torch.nn.parallel.replicate import replicate 5 | from torch.nn.parallel.parallel_apply import parallel_apply 6 | 7 | 8 | from .scatter_gather import scatter_kwargs 9 | 10 | class _DataParallel(Module): 11 | r"""Implements data parallelism at the module level. 12 | 13 | This container parallelizes the application of the given module by 14 | splitting the input across the specified devices by chunking in the batch 15 | dimension. In the forward pass, the module is replicated on each device, 16 | and each replica handles a portion of the input. During the backwards 17 | pass, gradients from each replica are summed into the original module. 18 | 19 | The batch size should be larger than the number of GPUs used. It should 20 | also be an integer multiple of the number of GPUs so that each chunk is the 21 | same size (so that each GPU processes the same number of samples). 22 | 23 | See also: :ref:`cuda-nn-dataparallel-instead` 24 | 25 | Arbitrary positional and keyword inputs are allowed to be passed into 26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim 27 | specified (default 0). Primitive types will be broadcasted, but all 28 | other types will be a shallow copy and can be corrupted if written to in 29 | the model's forward pass. 30 | 31 | Args: 32 | module: module to be parallelized 33 | device_ids: CUDA devices (default: all devices) 34 | output_device: device location of output (default: device_ids[0]) 35 | 36 | Example:: 37 | 38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) 39 | >>> output = net(input_var) 40 | """ 41 | 42 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well 43 | 44 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 45 | super(_DataParallel, self).__init__() 46 | 47 | if not torch.cuda.is_available(): 48 | self.module = module 49 | self.device_ids = [] 50 | return 51 | 52 | if device_ids is None: 53 | device_ids = list(range(torch.cuda.device_count())) 54 | if output_device is None: 55 | output_device = device_ids[0] 56 | self.dim = dim 57 | self.module = module 58 | self.device_ids = device_ids 59 | self.chunk_sizes = chunk_sizes 60 | self.output_device = output_device 61 | if len(self.device_ids) == 1: 62 | self.module.cuda(device_ids[0]) 63 | 64 | def forward(self, *inputs, **kwargs): 65 | if not self.device_ids: 66 | return self.module(*inputs, **kwargs) 67 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) 68 | if len(self.device_ids) == 1: 69 | return self.module(*inputs[0], **kwargs[0]) 70 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 71 | outputs = self.parallel_apply(replicas, inputs, kwargs) 72 | return self.gather(outputs, self.output_device) 73 | 74 | def replicate(self, module, device_ids): 75 | return replicate(module, device_ids) 76 | 77 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes): 78 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes) 79 | 80 | def parallel_apply(self, replicas, inputs, kwargs): 81 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) 82 | 83 | def gather(self, outputs, output_device): 84 | return gather(outputs, output_device, dim=self.dim) 85 | 86 | 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): 88 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids. 89 | 90 | This is the functional version of the DataParallel module. 91 | 92 | Args: 93 | module: the module to evaluate in parallel 94 | inputs: inputs to the module 95 | device_ids: GPU ids on which to replicate module 96 | output_device: GPU location of the output Use -1 to indicate the CPU. 97 | (default: device_ids[0]) 98 | Returns: 99 | a Variable containing the result of module(input) located on 100 | output_device 101 | """ 102 | if not isinstance(inputs, tuple): 103 | inputs = (inputs,) 104 | 105 | if device_ids is None: 106 | device_ids = list(range(torch.cuda.device_count())) 107 | 108 | if output_device is None: 109 | output_device = device_ids[0] 110 | 111 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) 112 | if len(device_ids) == 1: 113 | return module(*inputs[0], **module_kwargs[0]) 114 | used_device_ids = device_ids[:len(inputs)] 115 | replicas = replicate(module, used_device_ids) 116 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) 117 | return gather(outputs, output_device, dim) 118 | 119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 120 | if chunk_sizes is None: 121 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 122 | standard_size = True 123 | for i in range(1, len(chunk_sizes)): 124 | if chunk_sizes[i] != chunk_sizes[0]: 125 | standard_size = False 126 | if standard_size: 127 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 128 | return _DataParallel(module, device_ids, output_device, dim, chunk_sizes) -------------------------------------------------------------------------------- /src/lib/models/losses.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Portions of this code are from 3 | # CornerNet (https://github.com/princeton-vl/CornerNet) 4 | # Copyright (c) 2018, University of Michigan 5 | # Licensed under the BSD 3-Clause License 6 | # ------------------------------------------------------------------------------ 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import torch 12 | import torch.nn as nn 13 | from .utils import _transpose_and_gather_feat, _transpose_and_gather_feat_plus 14 | import torch.nn.functional as F 15 | 16 | 17 | def _slow_neg_loss(pred, gt): 18 | '''focal loss from CornerNet''' 19 | pos_inds = gt.eq(1) 20 | neg_inds = gt.lt(1) 21 | 22 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 23 | 24 | loss = 0 25 | pos_pred = pred[pos_inds] 26 | neg_pred = pred[neg_inds] 27 | 28 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) 29 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights 30 | 31 | num_pos = pos_inds.float().sum() 32 | pos_loss = pos_loss.sum() 33 | neg_loss = neg_loss.sum() 34 | 35 | if pos_pred.nelement() == 0: 36 | loss = loss - neg_loss 37 | else: 38 | loss = loss - (pos_loss + neg_loss) / num_pos 39 | return loss 40 | 41 | 42 | def _neg_loss(pred, gt): 43 | ''' Modified focal loss. Exactly the same as CornerNet. 44 | Runs faster and costs a little bit more memory 45 | Arguments: 46 | pred (batch x c x h x w) 47 | gt_regr (batch x c x h x w) 48 | ''' 49 | pos_inds = gt.eq(1).float() 50 | neg_inds = gt.lt(1).float() 51 | 52 | neg_weights = torch.pow(1 - gt, 4) 53 | 54 | loss = 0 55 | 56 | pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds 57 | neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds 58 | 59 | num_pos = pos_inds.float().sum() 60 | pos_loss = pos_loss.sum() 61 | neg_loss = neg_loss.sum() 62 | 63 | if num_pos == 0: 64 | loss = loss - neg_loss 65 | else: 66 | loss = loss - (pos_loss + neg_loss) / num_pos 67 | return loss 68 | 69 | def _not_faster_neg_loss(pred, gt): 70 | pos_inds = gt.eq(1).float() 71 | neg_inds = gt.lt(1).float() 72 | num_pos = pos_inds.float().sum() 73 | neg_weights = torch.pow(1 - gt, 4) 74 | 75 | loss = 0 76 | trans_pred = pred * neg_inds + (1 - pred) * pos_inds 77 | weight = neg_weights * neg_inds + pos_inds 78 | all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight 79 | all_loss = all_loss.sum() 80 | 81 | if num_pos > 0: 82 | all_loss /= num_pos 83 | loss -= all_loss 84 | return loss 85 | 86 | def _slow_reg_loss(regr, gt_regr, mask): 87 | num = mask.float().sum() 88 | mask = mask.unsqueeze(2).expand_as(gt_regr) 89 | 90 | regr = regr[mask] 91 | gt_regr = gt_regr[mask] 92 | 93 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False) 94 | regr_loss = regr_loss / (num + 1e-4) 95 | return regr_loss 96 | 97 | def _reg_loss(regr, gt_regr, mask): 98 | ''' L1 regression loss 99 | Arguments: 100 | regr (batch x max_objects x dim) 101 | gt_regr (batch x max_objects x dim) 102 | mask (batch x max_objects) 103 | ''' 104 | num = mask.float().sum() 105 | mask = mask.unsqueeze(2).expand_as(gt_regr).float() 106 | 107 | regr = regr * mask 108 | gt_regr = gt_regr * mask 109 | 110 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False) 111 | regr_loss = regr_loss / (num + 1e-4) 112 | return regr_loss 113 | 114 | class FocalLoss(nn.Module): 115 | '''nn.Module warpper for focal loss''' 116 | def __init__(self): 117 | super(FocalLoss, self).__init__() 118 | self.neg_loss = _neg_loss 119 | 120 | def forward(self, out, target): 121 | return self.neg_loss(out, target) 122 | 123 | class RegLoss(nn.Module): 124 | '''Regression loss for an output tensor 125 | Arguments: 126 | output (batch x dim x h x w) 127 | mask (batch x max_objects) 128 | ind (batch x max_objects) 129 | target (batch x max_objects x dim) 130 | ''' 131 | def __init__(self): 132 | super(RegLoss, self).__init__() 133 | 134 | def forward(self, output, mask, ind, target): 135 | pred = _transpose_and_gather_feat(output, ind) 136 | loss = _reg_loss(pred, target, mask) 137 | return loss 138 | 139 | class RegL1Loss(nn.Module): 140 | def __init__(self): 141 | super(RegL1Loss, self).__init__() 142 | 143 | def forward(self, output, mask, ind, target): 144 | num_joints = output.shape[1] // 2 145 | pred = _transpose_and_gather_feat_plus(output, ind, num_joints) 146 | mask = mask.unsqueeze(2).expand_as(pred).float() 147 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 148 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 149 | loss = loss / (mask.sum() + 1e-4) 150 | return loss 151 | 152 | class NormRegL1Loss(nn.Module): 153 | def __init__(self): 154 | super(NormRegL1Loss, self).__init__() 155 | 156 | def forward(self, output, mask, ind, target): 157 | pred = _transpose_and_gather_feat(output, ind) 158 | mask = mask.unsqueeze(2).expand_as(pred).float() 159 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 160 | pred = pred / (target + 1e-4) 161 | target = target * 0 + 1 162 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 163 | loss = loss / (mask.sum() + 1e-4) 164 | return loss 165 | 166 | class RegWeightedL1Loss(nn.Module): 167 | def __init__(self): 168 | super(RegWeightedL1Loss, self).__init__() 169 | 170 | def forward(self, output, mask, ind, target): 171 | pred = _transpose_and_gather_feat(output, ind) 172 | mask = mask.float() 173 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 174 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 175 | loss = loss / (mask.sum() + 1e-4) 176 | return loss 177 | 178 | class L1Loss(nn.Module): 179 | def __init__(self): 180 | super(L1Loss, self).__init__() 181 | 182 | def forward(self, output, mask, ind, target): 183 | pred = _transpose_and_gather_feat(output, ind) 184 | mask = mask.unsqueeze(2).expand_as(pred).float() 185 | loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 186 | return loss 187 | 188 | class BinRotLoss(nn.Module): 189 | def __init__(self): 190 | super(BinRotLoss, self).__init__() 191 | 192 | def forward(self, output, mask, ind, rotbin, rotres): 193 | pred = _transpose_and_gather_feat(output, ind) 194 | loss = compute_rot_loss(pred, rotbin, rotres, mask) 195 | return loss 196 | 197 | def compute_res_loss(output, target): 198 | return F.smooth_l1_loss(output, target, reduction='elementwise_mean') 199 | 200 | # TODO: weight 201 | def compute_bin_loss(output, target, mask): 202 | mask = mask.expand_as(output) 203 | output = output * mask.float() 204 | return F.cross_entropy(output, target, reduction='elementwise_mean') 205 | 206 | def compute_rot_loss(output, target_bin, target_res, mask): 207 | # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 208 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 209 | # target_bin: (B, 128, 2) [bin1_cls, bin2_cls] 210 | # target_res: (B, 128, 2) [bin1_res, bin2_res] 211 | # mask: (B, 128, 1) 212 | # import pdb; pdb.set_trace() 213 | output = output.view(-1, 8) 214 | target_bin = target_bin.view(-1, 2) 215 | target_res = target_res.view(-1, 2) 216 | mask = mask.view(-1, 1) 217 | loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask) 218 | loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask) 219 | loss_res = torch.zeros_like(loss_bin1) 220 | if target_bin[:, 0].nonzero().shape[0] > 0: 221 | idx1 = target_bin[:, 0].nonzero()[:, 0] 222 | valid_output1 = torch.index_select(output, 0, idx1.long()) 223 | valid_target_res1 = torch.index_select(target_res, 0, idx1.long()) 224 | loss_sin1 = compute_res_loss( 225 | valid_output1[:, 2], torch.sin(valid_target_res1[:, 0])) 226 | loss_cos1 = compute_res_loss( 227 | valid_output1[:, 3], torch.cos(valid_target_res1[:, 0])) 228 | loss_res += loss_sin1 + loss_cos1 229 | if target_bin[:, 1].nonzero().shape[0] > 0: 230 | idx2 = target_bin[:, 1].nonzero()[:, 0] 231 | valid_output2 = torch.index_select(output, 0, idx2.long()) 232 | valid_target_res2 = torch.index_select(target_res, 0, idx2.long()) 233 | loss_sin2 = compute_res_loss( 234 | valid_output2[:, 6], torch.sin(valid_target_res2[:, 1])) 235 | loss_cos2 = compute_res_loss( 236 | valid_output2[:, 7], torch.cos(valid_target_res2[:, 1])) 237 | loss_res += loss_sin2 + loss_cos2 238 | return loss_bin1 + loss_bin2 + loss_res 239 | -------------------------------------------------------------------------------- /src/lib/models/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from typing import Dict 5 | 6 | import torchvision.models as models 7 | import torch 8 | import torch.nn as nn 9 | import os 10 | 11 | from .networks.movenet import get_pose_net as get_move_net 12 | 13 | _model_factory = { 14 | 'movenet': get_move_net 15 | } 16 | 17 | 18 | def create_model(arch, heads, head_conv, froze_backbone): 19 | arch = arch[:arch.find('_')] if '_' in arch else arch 20 | get_model = _model_factory[arch] 21 | model = get_model(heads=heads, head_conv=head_conv, froze_backbone=froze_backbone, model_type = 'thunder') 22 | return model 23 | 24 | 25 | def load_model(model, model_path, optimizer=None, resume=False, 26 | lr=None, lr_step=None): 27 | start_epoch = 0 28 | checkpoint = torch.load( 29 | model_path, map_location=lambda storage, loc: storage) 30 | # print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 31 | if 'state_dict' in checkpoint.keys(): 32 | state_dict = checkpoint['state_dict'] 33 | else: 34 | state_dict = checkpoint 35 | 36 | model_state_dict = model.state_dict() 37 | 38 | # check loaded parameters and created model parameters 39 | msg = 'If you see this, your model does not fully load the ' + \ 40 | 'pre-trained weight. Please make sure ' + \ 41 | 'you have correctly specified --arch xxx ' + \ 42 | 'or set the correct --num_classes for your own dataset.' 43 | for k in state_dict: 44 | if k in model_state_dict: 45 | if state_dict[k].shape != model_state_dict[k].shape: 46 | print('Skip loading parameter {}, required shape{}, ' 47 | 'loaded shape{}. {}'.format( 48 | k, model_state_dict[k].shape, state_dict[k].shape, msg)) 49 | state_dict[k] = model_state_dict[k] 50 | else: 51 | print('Drop parameter {}.'.format(k) + msg) 52 | for k in model_state_dict: 53 | if not (k in state_dict): 54 | print('No param {}.'.format(k) + msg) 55 | state_dict[k] = model_state_dict[k] 56 | model.load_state_dict(state_dict, strict=False) 57 | 58 | # resume optimizer parameters 59 | if optimizer is not None and resume: 60 | if 'optimizer' in checkpoint: 61 | optimizer.load_state_dict(checkpoint['optimizer']) 62 | start_epoch = checkpoint['epoch'] 63 | start_lr = lr 64 | for step in lr_step: 65 | if start_epoch >= step: 66 | start_lr *= 0.1 67 | for param_group in optimizer.param_groups: 68 | param_group['lr'] = start_lr 69 | print('Resumed optimizer with start lr', start_lr) 70 | else: 71 | print('No optimizer parameters in checkpoint.') 72 | if optimizer is not None: 73 | return model, optimizer, start_epoch 74 | else: 75 | return model 76 | 77 | 78 | def save_model(path, epoch, model, optimizer=None): 79 | if isinstance(model, torch.nn.DataParallel): 80 | state_dict = model.module.state_dict() 81 | else: 82 | state_dict = model.state_dict() 83 | data = {'epoch': epoch, 84 | 'state_dict': state_dict} 85 | if not (optimizer is None): 86 | data['optimizer'] = optimizer.state_dict() 87 | torch.save(data, path) 88 | -------------------------------------------------------------------------------- /src/lib/models/networks/backbone_utils.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import torch 3 | from torch import nn 4 | from .feature_pyramid_network import FeaturePyramidNetwork 5 | 6 | 7 | from torchvision.ops import misc as misc_nn_ops 8 | from torchvision.models._utils import IntermediateLayerGetter 9 | # from torchvision.models import mobilenet 10 | from .mobilenetv2 import mobilenet_v2 11 | 12 | 13 | class BackboneWithFPN(nn.Module): 14 | """ 15 | Adds a FPN on top of a model. 16 | Internally, it uses torchvision.models._utils.IntermediateLayerGetter to 17 | extract a submodel that returns the feature maps specified in return_layers. 18 | The same limitations of IntermediatLayerGetter apply here. 19 | Args: 20 | backbone (nn.Module) 21 | return_layers (Dict[name, new_name]): a dict containing the names 22 | of the modules for which the activations will be returned as 23 | the key of the dict, and the value of the dict is the name 24 | of the returned activation (which the user can specify). 25 | in_channels_list (List[int]): number of channels for each feature map 26 | that is returned, in the order they are present in the OrderedDict 27 | out_channels (int): number of channels in the FPN. 28 | Attributes: 29 | out_channels (int): the number of channels in the FPN 30 | """ 31 | def __init__(self, backbone, return_layers, in_channels_list, out_channels): 32 | super(BackboneWithFPN, self).__init__() 33 | 34 | 35 | self.body = IntermediateLayerGetter(backbone, return_layers=return_layers) 36 | self.fpn = FeaturePyramidNetwork( 37 | in_channels_list=in_channels_list, 38 | out_channels_list=[24, 32, 64, 64], 39 | fused_channels_list=[24, 24, 32], 40 | ) 41 | self.out_channels = out_channels 42 | 43 | def forward(self, x): 44 | x = self.body(x) 45 | x = self.fpn(x) 46 | return x 47 | 48 | 49 | def mobilenet_backbone( 50 | backbone_name, # discared as we always use mobilenet v2 51 | pretrained, 52 | fpn, 53 | norm_layer=misc_nn_ops.FrozenBatchNorm2d, 54 | trainable_layers=2, 55 | returned_layers=None, 56 | extra_blocks=None, 57 | model_type='lighting' 58 | ): 59 | if model_type == 'lighting': 60 | inverted_residual_setting = [ 61 | # t, c, n, s 62 | [1, 16, 1, 1], 63 | [6, 24, 2, 2], 64 | [6, 32, 3, 2], 65 | [6, 64, 4, 2], 66 | [6, 96, 3, 1], 67 | [6, 160, 3, 2], 68 | [6, 320, 1, 1], 69 | ] 70 | else: 71 | inverted_residual_setting = [ 72 | # t, c, n, s 73 | [1, 32, 1, 1], 74 | [6, 40, 2, 2], 75 | [6, 56, 3, 2], 76 | [6, 112, 4, 2], 77 | [6, 168, 3, 1], 78 | [6, 280, 3, 2], 79 | [6, 560, 1, 1], 80 | ] 81 | 82 | backbone = mobilenet_v2(pretrained=pretrained, norm_layer=norm_layer, inverted_residual_setting = inverted_residual_setting).features 83 | # print("backbone: ", backbone) 84 | 85 | # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. 86 | # The first and last blocks are always included because they are the C0 (conv1) and Cn. 87 | # mli: for mobilenet, the obtained stage_indices = [0, 2, 4, 7, 14, 18] 88 | # stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] 89 | # mli: the following block indices refer to the last layer of each stage (s4, s8, s16, s32) 90 | # **This is wrong** stage_indices = [2, 4, 7, 14] 91 | stage_indices = [3, 6, 10, 18] 92 | num_stages = len(stage_indices) 93 | # print("# stages: ", num_stages) 94 | # print("Stage indicse: ", stage_indices) 95 | 96 | # find the index of the layer from which we wont freeze 97 | assert 0 <= trainable_layers <= num_stages 98 | freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers] 99 | 100 | # mli: make all layers trainable. 101 | # for b in backbone[:freeze_before]: 102 | # for parameter in b.parameters(): 103 | # parameter.requires_grad_(False) 104 | 105 | out_channels = 24 106 | if fpn: 107 | # mli: remove the extra_blocks 108 | # if extra_blocks is None: 109 | # extra_blocks = LastLevelMaxPool() 110 | 111 | if returned_layers is None: 112 | returned_layers = list(range(num_stages)) 113 | assert min(returned_layers) >= 0 and max(returned_layers) < num_stages 114 | return_layers = {f'{stage_indices[k]}': str(v) for v, k in enumerate(returned_layers)} 115 | # print("Return layers: ", return_layers) 116 | 117 | in_channels_list = [backbone[stage_indices[i]].out_channels for i in returned_layers] 118 | # print("in_channels_list", in_channels_list) 119 | 120 | return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) 121 | else: 122 | m = nn.Sequential( 123 | backbone, 124 | # depthwise linear combination of channels to reduce their size 125 | nn.Conv2d(backbone[-1].out_channels, out_channels, 1), 126 | ) 127 | m.out_channels = out_channels 128 | return m 129 | 130 | ''' 131 | # test the functionality 132 | if __name__=='__main__': 133 | """ 134 | Constructs a specified MobileNet v2 backbone with FPN on top. Freezes the specified number of layers in the backbone. 135 | 136 | Examples:: 137 | 138 | >>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone 139 | >>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3) 140 | >>> # get some dummy image 141 | >>> x = torch.rand(1,3,64,64) 142 | >>> # compute the output 143 | >>> output = backbone(x) 144 | >>> print([(k, v.shape) for k, v in output.items()]) 145 | >>> # returns 146 | >>> [('0', torch.Size([1, 256, 16, 16])), 147 | >>> ('1', torch.Size([1, 256, 8, 8])), 148 | >>> ('2', torch.Size([1, 256, 4, 4])), 149 | >>> ('3', torch.Size([1, 256, 2, 2])), 150 | >>> ('pool', torch.Size([1, 256, 1, 1]))] 151 | """ 152 | backbone = mobilenet_backbone('mobilenet_v2', fpn=True, pretrained=False, trainable_layers=3) 153 | x = torch.rand(1,3,192,192) 154 | # compute the output 155 | output = backbone(x) 156 | print('output shape: ', output.shape) 157 | ''' 158 | -------------------------------------------------------------------------------- /src/lib/models/networks/feature_pyramid_network.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Based on torchvision.ops.feature_pyramid_network. 3 | In the original paper, they `fix the feature dimension (numbers of channels, denoted as d) in all the feature maps.` 4 | However, by diving into the Movenet, I found out that the feature dimension is incrementally decreased, from 64 to 32 to 24. So I made the changes correspondingly. 5 | ''' 6 | 7 | from collections import OrderedDict 8 | 9 | import torch.nn.functional as F 10 | from torch import nn, Tensor 11 | 12 | from typing import Tuple, List, Dict, Optional 13 | 14 | class SeperableConv(nn.Module): 15 | def __init__( 16 | self, 17 | inp: int, 18 | oup: int, 19 | activation_layer = None 20 | ) -> None: 21 | super(SeperableConv, self).__init__() 22 | 23 | if activation_layer is None: 24 | activation_layer = nn.ReLU 25 | 26 | hidden_dim = int(round(inp)) 27 | 28 | layers: List[nn.Module] = [] 29 | layers.extend([ 30 | # dw 31 | nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, stride=1, padding=1, groups=hidden_dim, bias=True), 32 | # pw-linear 33 | nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, padding=0, bias=True), 34 | activation_layer(inplace=True), 35 | ]) 36 | self.conv = nn.Sequential(*layers) 37 | self.out_channels = oup 38 | 39 | def forward(self, x: Tensor) -> Tensor: 40 | return self.conv(x) 41 | 42 | 43 | class FeaturePyramidNetwork(nn.Module): 44 | """ 45 | Module that adds a FPN from on top of a set of feature maps. This is based on 46 | `"Feature Pyramid Network for Object Detection" `_. 47 | 48 | The feature maps are currently supposed to be in increasing depth 49 | order. 50 | 51 | The input to the model is expected to be an OrderedDict[Tensor], containing 52 | the feature maps on top of which the FPN will be added. 53 | 54 | Args: 55 | in_channels_list (list[int]): number of channels for each feature map that 56 | is passed to the module 57 | out_channels (int): number of channels of the FPN representation 58 | extra_blocks (ExtraFPNBlock or None): if provided, extra operations will 59 | be performed. It is expected to take the fpn features, the original 60 | features and the names of the original features as input, and returns 61 | a new list of feature maps and their corresponding names 62 | 63 | Examples:: 64 | 65 | >>> m = torchvision.ops.FeaturePyramidNetwork([10, 20, 30], 5) 66 | >>> # get some dummy data 67 | >>> x = OrderedDict() 68 | >>> x['feat0'] = torch.rand(1, 10, 64, 64) 69 | >>> x['feat2'] = torch.rand(1, 20, 16, 16) 70 | >>> x['feat3'] = torch.rand(1, 30, 8, 8) 71 | >>> # compute the FPN on top of x 72 | >>> output = m(x) 73 | >>> print([(k, v.shape) for k, v in output.items()]) 74 | >>> # returns 75 | >>> [('feat0', torch.Size([1, 5, 64, 64])), 76 | >>> ('feat2', torch.Size([1, 5, 16, 16])), 77 | >>> ('feat3', torch.Size([1, 5, 8, 8]))] 78 | 79 | """ 80 | def __init__( 81 | self, 82 | in_channels_list: List[int], # [24, 32, 64, 1280] 83 | out_channels_list: List[int], # [24, 32, 64, 64] 84 | fused_channels_list = List[int], # [24, 24, 32] 85 | ): 86 | super(FeaturePyramidNetwork, self).__init__() 87 | self.inner_blocks = nn.ModuleList() 88 | self.layer_blocks = nn.ModuleList() 89 | assert len(in_channels_list) == len(out_channels_list), 'The lengths of in_channels_list and out_channels_list should be equal.' 90 | for i in range(len(in_channels_list)): 91 | in_channels = in_channels_list[i] 92 | out_channels = out_channels_list[i] 93 | if in_channels == 0 or out_channels == 0: 94 | raise ValueError("in_channels=0/out_channels=0 is currently not supported") 95 | inner_block_module = nn.Conv2d(in_channels, out_channels, 1) 96 | self.inner_blocks.append(inner_block_module) 97 | if i != len(in_channels_list) - 1: 98 | fused_channels = fused_channels_list[i] 99 | layer_block_module = SeperableConv(out_channels, fused_channels) 100 | self.layer_blocks.append(layer_block_module) 101 | 102 | # initialize parameters now to avoid modifying the initialization of top_blocks 103 | for m in self.modules(): 104 | if isinstance(m, nn.Conv2d): 105 | nn.init.kaiming_uniform_(m.weight, a=1) 106 | nn.init.constant_(m.bias, 0) 107 | 108 | def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor: 109 | """ 110 | This is equivalent to self.inner_blocks[idx](x), 111 | but torchscript doesn't support this yet 112 | """ 113 | num_blocks = len(self.inner_blocks) 114 | if idx < 0: 115 | idx += num_blocks 116 | i = 0 117 | out = x 118 | for module in self.inner_blocks: 119 | if i == idx: 120 | out = module(x) 121 | i += 1 122 | return out 123 | 124 | def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor: 125 | """ 126 | This is equivalent to self.layer_blocks[idx](x), 127 | but torchscript doesn't support this yet 128 | """ 129 | num_blocks = len(self.layer_blocks) 130 | if idx < 0: 131 | idx += num_blocks 132 | i = 0 133 | out = x 134 | for module in self.layer_blocks: 135 | if i == idx: 136 | out = module(x) 137 | i += 1 138 | return out 139 | 140 | def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]: 141 | """ 142 | Computes the FPN for a set of feature maps. 143 | 144 | Args: 145 | x (OrderedDict[Tensor]): feature maps for each feature level. 146 | 147 | Returns: 148 | results (Tensor): highest maps after FPN layers. 149 | """ 150 | # unpack OrderedDict into two lists for easier handling 151 | names = list(x.keys()) 152 | x = list(x.values()) 153 | 154 | last_inner = self.get_result_from_inner_blocks(x[-1], -1) 155 | 156 | 157 | for idx in range(len(x)-2, -1, -1): 158 | inner_lateral = self.get_result_from_inner_blocks(x[idx], idx) 159 | 160 | # for pytorch inference 161 | inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="bilinear", align_corners=False) 162 | # for model convertion, please comment the above line and uncomment the following line. 163 | # inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest") 164 | last_inner = inner_lateral + inner_top_down 165 | last_inner = self.get_result_from_layer_blocks(last_inner, idx) 166 | 167 | return last_inner 168 | -------------------------------------------------------------------------------- /src/lib/models/networks/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch import Tensor 4 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 5 | from typing import Callable, Any, Optional, List 6 | 7 | 8 | __all__ = ['MobileNetV2', 'mobilenet_v2'] 9 | 10 | 11 | model_urls = { 12 | 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth', 13 | } 14 | 15 | 16 | def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int: 17 | """ 18 | This function is taken from the original tf repo. 19 | It ensures that all layers have a channel number that is divisible by 8 20 | It can be seen here: 21 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 22 | """ 23 | if min_value is None: 24 | min_value = divisor 25 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 26 | # Make sure that round down does not go down by more than 10%. 27 | if new_v < 0.9 * v: 28 | new_v += divisor 29 | return new_v 30 | 31 | 32 | class ConvBNActivation(nn.Sequential): 33 | def __init__( 34 | self, 35 | in_planes: int, 36 | out_planes: int, 37 | kernel_size: int = 3, 38 | stride: int = 1, 39 | groups: int = 1, 40 | norm_layer: Optional[Callable[..., nn.Module]] = None, 41 | activation_layer: Optional[Callable[..., nn.Module]] = None, 42 | dilation: int = 1, 43 | ) -> None: 44 | padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2 45 | # if norm_layer is None: 46 | # norm_layer = nn.BatchNorm2d 47 | if activation_layer is None: 48 | activation_layer = nn.ReLU6 49 | if stride == 2 and kernel_size == 3: 50 | super().__init__( 51 | nn.ZeroPad2d((0, 1, 0, 1)), 52 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, 0, dilation=dilation, groups=groups, 53 | bias=True), 54 | # norm_layer(out_planes), 55 | activation_layer(inplace=True) 56 | ) 57 | else: 58 | super().__init__( 59 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, dilation=dilation, groups=groups, 60 | bias=True), 61 | # norm_layer(out_planes), 62 | activation_layer(inplace=True) 63 | ) 64 | self.out_channels = out_planes 65 | 66 | 67 | # necessary for backwards compatibility 68 | ConvBNReLU = ConvBNActivation 69 | 70 | 71 | class InvertedResidual(nn.Module): 72 | def __init__( 73 | self, 74 | inp: int, 75 | oup: int, 76 | stride: int, 77 | expand_ratio: int, 78 | norm_layer: Optional[Callable[..., nn.Module]] = None 79 | ) -> None: 80 | super(InvertedResidual, self).__init__() 81 | self.stride = stride 82 | assert stride in [1, 2] 83 | 84 | if norm_layer is None: 85 | norm_layer = nn.BatchNorm2d 86 | 87 | hidden_dim = int(round(inp * expand_ratio)) 88 | self.use_res_connect = self.stride == 1 and inp == oup 89 | 90 | layers: List[nn.Module] = [] 91 | if expand_ratio != 1: 92 | # pw 93 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer)) 94 | 95 | layers.extend([ 96 | # dw 97 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer), 98 | # pw-linear 99 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=True), 100 | # norm_layer(oup), 101 | ]) 102 | self.conv = nn.Sequential(*layers) 103 | self.out_channels = oup 104 | self._is_cn = stride > 1 105 | 106 | def forward(self, x: Tensor) -> Tensor: 107 | if self.use_res_connect: 108 | return x + self.conv(x) 109 | else: 110 | return self.conv(x) 111 | 112 | 113 | class MobileNetV2(nn.Module): 114 | def __init__( 115 | self, 116 | num_classes: int = 1000, 117 | width_mult: float = 1.0, 118 | inverted_residual_setting: Optional[List[List[int]]] = None, 119 | round_nearest: int = 8, 120 | block: Optional[Callable[..., nn.Module]] = None, 121 | norm_layer: Optional[Callable[..., nn.Module]] = None 122 | ) -> None: 123 | """ 124 | MobileNet V2 main class 125 | 126 | Args: 127 | num_classes (int): Number of classes 128 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount 129 | inverted_residual_setting: Network structure 130 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number 131 | Set to 1 to turn off rounding 132 | block: Module specifying inverted residual building block for mobilenet 133 | norm_layer: Module specifying the normalization layer to use 134 | 135 | """ 136 | super(MobileNetV2, self).__init__() 137 | 138 | if block is None: 139 | block = InvertedResidual 140 | 141 | if norm_layer is None: 142 | norm_layer = nn.BatchNorm2d 143 | 144 | input_channel = 32 * 1.75 145 | last_channel = 1280 146 | 147 | if inverted_residual_setting is None: 148 | inverted_residual_setting = [ 149 | # t, c, n, s 150 | [1, 16, 1, 1], 151 | [6, 24, 2, 2], 152 | [6, 32, 3, 2], 153 | [6, 64, 4, 2], 154 | [6, 96, 3, 1], 155 | [6, 160, 3, 2], 156 | [6, 320, 1, 1], 157 | ] 158 | 159 | # only check the first element, assuming user knows t,c,n,s are required 160 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: 161 | raise ValueError("inverted_residual_setting should be non-empty " 162 | "or a 4-element list, got {}".format(inverted_residual_setting)) 163 | 164 | # building first layer 165 | input_channel = _make_divisible(input_channel * width_mult, round_nearest) 166 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) 167 | features: List[nn.Module] = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)] 168 | # building inverted residual blocks 169 | for t, c, n, s in inverted_residual_setting: 170 | output_channel = _make_divisible(c * width_mult, round_nearest) 171 | for i in range(n): 172 | stride = s if i == 0 else 1 173 | features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer)) 174 | input_channel = output_channel 175 | # building last several layers 176 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer)) 177 | # make it nn.Sequential 178 | self.features = nn.Sequential(*features) 179 | 180 | # building classifier 181 | self.classifier = nn.Sequential( 182 | nn.Dropout(0.2), 183 | nn.Linear(self.last_channel, num_classes), 184 | ) 185 | 186 | # weight initialization 187 | for m in self.modules(): 188 | if isinstance(m, nn.Conv2d): 189 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 190 | if m.bias is not None: 191 | nn.init.zeros_(m.bias) 192 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 193 | nn.init.ones_(m.weight) 194 | nn.init.zeros_(m.bias) 195 | elif isinstance(m, nn.Linear): 196 | nn.init.normal_(m.weight, 0, 0.01) 197 | nn.init.zeros_(m.bias) 198 | 199 | def _forward_impl(self, x: Tensor) -> Tensor: 200 | # This exists since TorchScript doesn't support inheritance, so the superclass method 201 | # (this one) needs to have a name other than `forward` that can be accessed in a subclass 202 | x = self.features(x) 203 | # Cannot use "squeeze" as batch-size can be 1 204 | x = nn.functional.adaptive_avg_pool2d(x, (1, 1)) 205 | x = torch.flatten(x, 1) 206 | x = self.classifier(x) 207 | return x 208 | 209 | def forward(self, x: Tensor) -> Tensor: 210 | return self._forward_impl(x) 211 | 212 | 213 | def mobilenet_v2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV2: 214 | """ 215 | Constructs a MobileNetV2 architecture from 216 | `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. 217 | 218 | Args: 219 | pretrained (bool): If True, returns a model pre-trained on ImageNet 220 | progress (bool): If True, displays a progress bar of the download to stderr 221 | """ 222 | model = MobileNetV2(**kwargs) 223 | if pretrained: 224 | state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], 225 | progress=progress) 226 | model.load_state_dict(state_dict) 227 | return model 228 | -------------------------------------------------------------------------------- /src/lib/models/networks/movenet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Dequan Wang and Xingyi Zhou 6 | # Modified by Min Li 7 | # ------------------------------------------------------------------------------ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import os 14 | import math 15 | import logging 16 | 17 | import cv2 18 | 19 | from matplotlib import pyplot as plt 20 | import numpy as np 21 | import torch 22 | import torch.nn as nn 23 | from .backbone_utils import mobilenet_backbone 24 | import torch.utils.model_zoo as model_zoo 25 | 26 | BN_MOMENTUM = 0.1 27 | logger = logging.getLogger(__name__) 28 | 29 | 30 | class MoveNet(nn.Module): 31 | ''' 32 | MoveNet from Goolge. Please refer their blog: https://blog.tensorflow.org/2021/05/next-generation-pose-detection-with-movenet-and-tensorflowjs.html 33 | 34 | ''' 35 | def __init__(self, backbone, heads, head_conv, ft_size=48): 36 | super(MoveNet, self).__init__() 37 | self.num_joints = heads["hm_hp"] 38 | self.out_channels = 24 39 | self.backbone = backbone 40 | self.heads = heads 41 | self.ft_size = ft_size 42 | self.weight_to_center = self._generate_center_dist(self.ft_size).unsqueeze(2) 43 | 44 | self.dist_y, self.dist_x = self._generate_dist_map(self.ft_size) 45 | self.index_17 = torch.arange(0, self.num_joints).float() 46 | 47 | for head in self.heads: 48 | classes = self.heads[head] 49 | if head_conv > 0: 50 | fc = nn.Sequential( 51 | nn.Conv2d(self.out_channels, self.out_channels, 3, padding=1, groups=self.out_channels, bias=True), 52 | nn.Conv2d(self.out_channels, head_conv, 1, 1, 0, bias=True), 53 | nn.ReLU(inplace=True), 54 | nn.Conv2d(head_conv, classes, 55 | kernel_size=1, stride=1, 56 | padding=0, bias=True)) 57 | else: 58 | fc = nn.Conv2d(64, classes, 59 | kernel_size=1, stride=1, 60 | padding=0, bias=True) 61 | self.__setattr__(head, fc) 62 | 63 | 64 | def forward(self, x): 65 | # conv forward 66 | # x = x * 0.007843137718737125 - 1.0 67 | # specify the device 68 | device = x.device 69 | self.weight_to_center = self.weight_to_center.to(device) 70 | self.dist_y, self.dist_x = self.dist_y.to(device), self.dist_x.to(device) 71 | 72 | x = self.backbone(x) 73 | ret = {} 74 | for head in self.heads: 75 | ret[head] = self.__getattr__(head)(x) 76 | 77 | return [ret] 78 | 79 | def decode(self, x): 80 | kpt_heatmap, center, kpt_regress, kpt_offset = x['hm_hp'].squeeze(0).permute((1, 2, 0)), x['hm'].squeeze(0).permute((1, 2, 0)), x['hps'].squeeze(0).permute((1, 2, 0)), x['hp_offset'].squeeze(0).permute((1, 2, 0)) 81 | 82 | # pose decode 83 | kpt_heatmap = torch.sigmoid(kpt_heatmap) 84 | center = torch.sigmoid(center) 85 | 86 | ct_ind = self._top_with_center(center) 87 | 88 | kpt_coor = self._center_to_kpt(kpt_regress, ct_ind) 89 | 90 | kpt_top_inds = self._kpt_from_heatmap(kpt_heatmap, kpt_coor) 91 | 92 | kpt_with_conf = self._kpt_from_offset(kpt_offset, kpt_top_inds, kpt_heatmap, self.ft_size) 93 | 94 | return kpt_with_conf 95 | 96 | 97 | def _draw(self, ft): 98 | plt.imshow(ft.numpy().reshape(self.ft_size, self.ft_size)) 99 | plt.show() 100 | 101 | def _generate_center_dist(self, ft_size=48, delta=1.8): 102 | weight_to_center = torch.zeros((int(ft_size), int(ft_size))) 103 | y, x = np.ogrid[0:ft_size, 0:ft_size] 104 | center_y, center_x = ft_size / 2.0, ft_size/ 2.0 105 | y = y - center_y 106 | x = x - center_x 107 | weight_to_center = 1 / (np.sqrt(y * y + x * x) + delta) 108 | weight_to_center = torch.from_numpy(weight_to_center) 109 | return weight_to_center 110 | 111 | def _generate_dist_map(self, ft_size=48): 112 | y, x = np.ogrid[0:ft_size, 0:ft_size] 113 | y = torch.from_numpy(np.repeat(y, ft_size, axis=1)).unsqueeze(2).float() 114 | x = torch.from_numpy(np.repeat(x, ft_size, axis=0)).unsqueeze(2).float() 115 | 116 | return y, x 117 | 118 | 119 | def _top_with_center(self, center): 120 | scores = center * self.weight_to_center 121 | 122 | top_ind = torch.argmax(scores.view(1, self.ft_size * self.ft_size, 1), dim=1) 123 | return top_ind 124 | 125 | def _center_to_kpt(self, kpt_regress, ct_ind, ft_size=48): 126 | ct_y = torch.div(ct_ind, ft_size, rounding_mode='floor') 127 | # ct_y = (ct_ind.float() / ft_size).int().float() 128 | ct_x = ct_ind - ct_y * ft_size 129 | 130 | kpt_regress = kpt_regress.view(-1, self.num_joints, 2) 131 | ct_ind = ct_ind.unsqueeze(2).expand(ct_ind.size(0), self.num_joints, 2) 132 | kpt_coor = kpt_regress.gather(0, ct_ind).squeeze(0) 133 | 134 | kpt_coor = kpt_coor + torch.cat((ct_y, ct_x), dim=1) 135 | 136 | return kpt_coor 137 | 138 | def _kpt_from_heatmap(self, kpt_heatmap, kpt_coor): 139 | y = self.dist_y - kpt_coor[:, 0].reshape(1, 1, self.num_joints) 140 | x = self.dist_x - kpt_coor[:, 1].reshape(1, 1, self.num_joints) 141 | dist_weight = torch.sqrt(y * y + x * x) + 1.8 142 | 143 | scores = kpt_heatmap / dist_weight 144 | scores = scores.reshape((1, self.ft_size * self.ft_size, self.num_joints)) 145 | top_inds = torch.argmax(scores, dim=1) 146 | 147 | return top_inds 148 | 149 | def _kpt_from_offset(self, kpt_offset, kpt_top_inds, kpt_heatmap, size=48): 150 | kpts_ys = torch.div(kpt_top_inds, size, rounding_mode='floor') 151 | # kpts_ys = (kpt_top_inds.float() / size).int().float() 152 | kpts_xs = kpt_top_inds - kpts_ys * size 153 | kpt_coordinate = torch.stack((kpts_ys.squeeze(0), kpts_xs.squeeze(0)), dim=1) 154 | 155 | kpt_heatmap = kpt_heatmap.view(-1, self.num_joints) 156 | kpt_conf = kpt_heatmap.gather(0, kpt_top_inds).squeeze(0) 157 | 158 | kpt_offset = kpt_offset.view(-1, self.num_joints, 2) 159 | kpt_top_inds = kpt_top_inds.unsqueeze(2).expand(kpt_top_inds.size(0), self.num_joints, 2) 160 | kpt_offset_yx = kpt_offset.gather(0, kpt_top_inds).squeeze(0) 161 | 162 | kpt_coordinate= (kpt_offset_yx + kpt_coordinate) * (1/size) 163 | kpt_with_conf = torch.cat([kpt_coordinate, kpt_conf.unsqueeze(1)], dim=1).reshape((1, 1, self.num_joints, 3)) 164 | 165 | return kpt_with_conf 166 | 167 | 168 | 169 | 170 | # def get_pose_net(heads, head_conv=96, froze_backbone=True): 171 | # backbone = mobilenet_backbone('mobilenet_v2', pretrained=False, fpn=True) 172 | # if froze_backbone: 173 | # for param in backbone.parameters(): 174 | # param.requires_grad = False 175 | # model = MoveNet(backbone, heads, head_conv=head_conv) 176 | # return model 177 | 178 | def get_pose_net(heads, head_conv=96, froze_backbone=True, model_type = 'lighting'): 179 | backbone = mobilenet_backbone('mobilenet_v2', pretrained=False, fpn=True, trainable_layers=0, model_type = model_type) 180 | if froze_backbone: 181 | for param in backbone.parameters(): 182 | param.requires_grad = False 183 | if model_type == 'lighting': 184 | ft_size = 48 185 | else: 186 | ft_size = 64 187 | model = MoveNet(backbone, heads, head_conv=head_conv, ft_size = ft_size) 188 | # froze 189 | '''for k,v in model.named_parameters(): 190 | head_name = k.split('.')[0] 191 | if head_name == 'hm' or head_name == 'hps': 192 | v.requires_grad = False''' 193 | 194 | return model -------------------------------------------------------------------------------- /src/lib/models/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.nn.parallel._functions import Scatter, Gather 4 | 5 | 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None): 7 | r""" 8 | Slices variables into approximately equal chunks and 9 | distributes them across given GPUs. Duplicates 10 | references to objects that are not variables. Does not 11 | support Tensors. 12 | """ 13 | def scatter_map(obj): 14 | if isinstance(obj, Variable): 15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 17 | if isinstance(obj, tuple): 18 | return list(zip(*map(scatter_map, obj))) 19 | if isinstance(obj, list): 20 | return list(map(list, zip(*map(scatter_map, obj)))) 21 | if isinstance(obj, dict): 22 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 23 | return [obj for targets in target_gpus] 24 | 25 | return scatter_map(inputs) 26 | 27 | 28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None): 29 | r"""Scatter with support for kwargs dictionary""" 30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else [] 31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else [] 32 | if len(inputs) < len(kwargs): 33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 34 | elif len(kwargs) < len(inputs): 35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 36 | inputs = tuple(inputs) 37 | kwargs = tuple(kwargs) 38 | return inputs, kwargs 39 | -------------------------------------------------------------------------------- /src/lib/models/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | 9 | def _sigmoid(x): 10 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 11 | return y 12 | 13 | 14 | def _gather_feat(feat, ind, mask=None): 15 | dim = feat.size(2) 16 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 17 | feat = feat.gather(1, ind) 18 | if mask is not None: 19 | mask = mask.unsqueeze(2).expand_as(feat) 20 | feat = feat[mask] 21 | feat = feat.view(-1, dim) 22 | return feat 23 | 24 | 25 | def _gather_feat_plus(feat, ind, num_joints): 26 | # num_objs = ind.size(1) / 17 27 | ind = ind.view(ind.size(0), -1, num_joints) 28 | ind = ind.unsqueeze(3).expand(ind.size(0), ind.size(1), ind.size(2), 2) 29 | feat = feat.gather(1, ind) 30 | return feat 31 | 32 | 33 | def _transpose_and_gather_feat(feat, ind): 34 | feat = feat.permute(0, 2, 3, 1).contiguous() 35 | feat = feat.view(feat.size(0), -1, feat.size(3)) 36 | feat = _gather_feat(feat, ind) 37 | return feat 38 | 39 | def _transpose_and_gather_feat_plus(feat, ind, num_joints): 40 | feat = feat.permute(0, 2, 3, 1).contiguous() 41 | feat = feat.view(feat.size(0), -1, num_joints, 2) 42 | feat = _gather_feat_plus(feat, ind, num_joints) 43 | feat = feat.view(feat.size(0), -1, 2) 44 | return feat 45 | 46 | 47 | def flip_tensor(x): 48 | return torch.flip(x, [3]) 49 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy() 50 | # return torch.from_numpy(tmp).to(x.device) 51 | 52 | 53 | def flip_lr(x, flip_idx): 54 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 55 | shape = tmp.shape 56 | for e in flip_idx: 57 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 58 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 59 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 60 | 61 | 62 | def flip_lr_off(x, flip_idx, num_joints): 63 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 64 | shape = tmp.shape 65 | tmp = tmp.reshape(tmp.shape[0], num_joints, 2, 66 | tmp.shape[2], tmp.shape[3]) 67 | tmp[:, :, 0, :, :] *= -1 68 | for e in flip_idx: 69 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 70 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 71 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 72 | -------------------------------------------------------------------------------- /src/lib/trains/base_trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import time 6 | import torch 7 | from progress.bar import Bar 8 | from models.data_parallel import DataParallel 9 | from utils.utils import AverageMeter 10 | 11 | 12 | class ModelWithLoss(torch.nn.Module): 13 | def __init__(self, model, loss): 14 | super(ModelWithLoss, self).__init__() 15 | self.model = model 16 | self.loss = loss 17 | 18 | def forward(self, batch): 19 | outputs = self.model(batch['input']) 20 | loss, loss_stats = self.loss(outputs, batch) 21 | return outputs[-1], loss, loss_stats 22 | 23 | class BaseTrainer(object): 24 | def __init__( 25 | self, opt, model, optimizer=None): 26 | self.opt = opt 27 | self.optimizer = optimizer 28 | self.loss_stats, self.loss = self._get_losses(opt) 29 | self.model_with_loss = ModelWithLoss(model, self.loss) 30 | 31 | def set_device(self, gpus, chunk_sizes, device): 32 | if len(gpus) > 1: 33 | self.model_with_loss = DataParallel( 34 | self.model_with_loss, device_ids=gpus, 35 | chunk_sizes=chunk_sizes).to(device) 36 | else: 37 | self.model_with_loss = self.model_with_loss.to(device) 38 | 39 | for state in self.optimizer.state.values(): 40 | for k, v in state.items(): 41 | if isinstance(v, torch.Tensor): 42 | state[k] = v.to(device=device, non_blocking=True) 43 | 44 | def run_epoch(self, phase, epoch, data_loader): 45 | model_with_loss = self.model_with_loss 46 | if phase == 'train': 47 | model_with_loss.train() 48 | else: 49 | if len(self.opt.gpus) > 1: 50 | model_with_loss = self.model_with_loss.module 51 | model_with_loss.eval() 52 | torch.cuda.empty_cache() 53 | 54 | opt = self.opt 55 | results = {} 56 | data_time, batch_time = AverageMeter(), AverageMeter() 57 | avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} 58 | num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters 59 | bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters) 60 | end = time.time() 61 | for iter_id, batch in enumerate(data_loader): 62 | if iter_id >= num_iters: 63 | break 64 | data_time.update(time.time() - end) 65 | 66 | for k in batch: 67 | if k != 'meta': 68 | batch[k] = batch[k].to(device=opt.device, non_blocking=True) 69 | output, loss, loss_stats = model_with_loss(batch) 70 | loss = loss.mean() 71 | if phase == 'train': 72 | self.optimizer.zero_grad() 73 | loss.backward() 74 | self.optimizer.step() 75 | batch_time.update(time.time() - end) 76 | end = time.time() 77 | 78 | Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format( 79 | epoch, iter_id, num_iters, phase=phase, 80 | total=bar.elapsed_td, eta=bar.eta_td) 81 | for l in avg_loss_stats: 82 | avg_loss_stats[l].update( 83 | loss_stats[l].mean().item(), batch['input'].size(0)) 84 | Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg) 85 | if not opt.hide_data_time: 86 | Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \ 87 | '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) 88 | if opt.print_iter > 0: 89 | if iter_id % opt.print_iter == 0: 90 | print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) 91 | else: 92 | bar.next() 93 | 94 | if opt.debug > 0: 95 | self.debug(batch, output, iter_id) 96 | 97 | if opt.test: 98 | self.save_result(output, batch, results) 99 | del output, loss, loss_stats 100 | 101 | bar.finish() 102 | ret = {k: v.avg for k, v in avg_loss_stats.items()} 103 | ret['time'] = bar.elapsed_td.total_seconds() / 60. 104 | return ret, results 105 | 106 | def debug(self, batch, output, iter_id): 107 | raise NotImplementedError 108 | 109 | def save_result(self, output, batch, results): 110 | raise NotImplementedError 111 | 112 | def _get_losses(self, opt): 113 | raise NotImplementedError 114 | 115 | def val(self, epoch, data_loader): 116 | return self.run_epoch('val', epoch, data_loader) 117 | 118 | def train(self, epoch, data_loader): 119 | return self.run_epoch('train', epoch, data_loader) -------------------------------------------------------------------------------- /src/lib/trains/multi_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import numpy as np 7 | 8 | from models.losses import FocalLoss, RegL1Loss, RegLoss, RegWeightedL1Loss 9 | from models.decode import multi_pose_decode 10 | from models.utils import _sigmoid, flip_tensor, flip_lr_off, flip_lr 11 | from utils.debugger import Debugger 12 | from utils.post_process import multi_pose_post_process 13 | from utils.oracle_utils import gen_oracle_map 14 | from .base_trainer import BaseTrainer 15 | 16 | class MultiPoseLoss(torch.nn.Module): 17 | def __init__(self, opt): 18 | super(MultiPoseLoss, self).__init__() 19 | self.crit = FocalLoss() 20 | self.crit_hm_hp = torch.nn.MSELoss() if opt.mse_loss else FocalLoss() 21 | self.crit_kp = RegWeightedL1Loss() if not opt.dense_hp else \ 22 | torch.nn.L1Loss(reduction='sum') 23 | self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \ 24 | RegLoss() if opt.reg_loss == 'sl1' else None 25 | self.opt = opt 26 | 27 | def forward(self, outputs, batch): 28 | opt = self.opt 29 | hm_loss, wh_loss, off_loss = 0, 0, 0 30 | hp_loss, off_loss, hm_hp_loss, hp_offset_loss = 0, 0, 0, 0 31 | for s in range(opt.num_stacks): 32 | output = outputs[s] 33 | output['hm'] = _sigmoid(output['hm']) 34 | if opt.hm_hp and not opt.mse_loss: 35 | output['hm_hp'] = _sigmoid(output['hm_hp']) 36 | 37 | if opt.eval_oracle_hmhp: 38 | output['hm_hp'] = batch['hm_hp'] 39 | if opt.eval_oracle_hm: 40 | output['hm'] = batch['hm'] 41 | if opt.eval_oracle_kps: 42 | if opt.dense_hp: 43 | output['hps'] = batch['dense_hps'] 44 | else: 45 | output['hps'] = torch.from_numpy(gen_oracle_map( 46 | batch['hps'].detach().cpu().numpy(), 47 | batch['ind'].detach().cpu().numpy(), 48 | opt.output_res, opt.output_res)).to(opt.device) 49 | if opt.eval_oracle_hp_offset: 50 | output['hp_offset'] = torch.from_numpy(gen_oracle_map( 51 | batch['hp_offset'].detach().cpu().numpy(), 52 | batch['hp_ind'].detach().cpu().numpy(), 53 | opt.output_res, opt.output_res)).to(opt.device) 54 | 55 | 56 | hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks 57 | if opt.dense_hp: 58 | mask_weight = batch['dense_hps_mask'].sum() + 1e-4 59 | hp_loss += (self.crit_kp(output['hps'] * batch['dense_hps_mask'], 60 | batch['dense_hps'] * batch['dense_hps_mask']) / 61 | mask_weight) / opt.num_stacks 62 | else: 63 | hp_loss += self.crit_kp(output['hps'], batch['hps_mask'], 64 | batch['ind'], batch['hps']) / opt.num_stacks 65 | if opt.wh_weight > 0: 66 | wh_loss += self.crit_reg(output['wh'], batch['reg_mask'], 67 | batch['ind'], batch['wh']) / opt.num_stacks 68 | if opt.reg_offset and opt.off_weight > 0: 69 | off_loss += self.crit_reg(output['reg'], batch['reg_mask'], 70 | batch['ind'], batch['reg']) / opt.num_stacks 71 | if opt.reg_hp_offset and opt.off_weight > 0: 72 | hp_offset_loss += self.crit_reg( 73 | output['hp_offset'], batch['hp_mask'], 74 | batch['hp_ind'], batch['hp_offset']) / opt.num_stacks 75 | if opt.hm_hp and opt.hm_hp_weight > 0: 76 | hm_hp_loss += self.crit_hm_hp( 77 | output['hm_hp'], batch['hm_hp']) / opt.num_stacks 78 | loss = opt.hm_weight * hm_loss + opt.wh_weight * wh_loss + \ 79 | opt.off_weight * off_loss + opt.hp_weight * hp_loss + \ 80 | opt.hm_hp_weight * hm_hp_loss + opt.off_weight * hp_offset_loss 81 | 82 | loss_stats = {'loss': loss, 'hm_loss': hm_loss, 'hp_loss': hp_loss, 83 | 'hm_hp_loss': hm_hp_loss, 'hp_offset_loss': hp_offset_loss, 84 | 'wh_loss': wh_loss, 'off_loss': off_loss} 85 | return loss, loss_stats 86 | 87 | class MultiPoseTrainer(BaseTrainer): 88 | def __init__(self, opt, model, optimizer=None): 89 | super(MultiPoseTrainer, self).__init__(opt, model, optimizer=optimizer) 90 | 91 | def _get_losses(self, opt): 92 | loss_states = ['loss', 'hm_loss', 'hp_loss', 'hm_hp_loss', 93 | 'hp_offset_loss', 'wh_loss', 'off_loss'] 94 | loss = MultiPoseLoss(opt) 95 | return loss_states, loss 96 | 97 | def debug(self, batch, output, iter_id): 98 | opt = self.opt 99 | reg = output['reg'] if opt.reg_offset else None 100 | hm_hp = output['hm_hp'] if opt.hm_hp else None 101 | hp_offset = output['hp_offset'] if opt.reg_hp_offset else None 102 | dets = multi_pose_decode( 103 | output['hm'], output['wh'], output['hps'], 104 | reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=opt.K) 105 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 106 | 107 | dets[:, :, :4] *= opt.input_res / opt.output_res 108 | dets[:, :, 5:39] *= opt.input_res / opt.output_res 109 | dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) 110 | dets_gt[:, :, :4] *= opt.input_res / opt.output_res 111 | dets_gt[:, :, 5:39] *= opt.input_res / opt.output_res 112 | for i in range(1): 113 | debugger = Debugger( 114 | dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme) 115 | img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) 116 | img = np.clip((( 117 | img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) 118 | pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) 119 | gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) 120 | debugger.add_blend_img(img, pred, 'pred_hm') 121 | debugger.add_blend_img(img, gt, 'gt_hm') 122 | 123 | debugger.add_img(img, img_id='out_pred') 124 | for k in range(len(dets[i])): 125 | if dets[i, k, 4] > opt.center_thresh: 126 | debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], 127 | dets[i, k, 4], img_id='out_pred') 128 | debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred') 129 | 130 | debugger.add_img(img, img_id='out_gt') 131 | for k in range(len(dets_gt[i])): 132 | if dets_gt[i, k, 4] > opt.center_thresh: 133 | debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], 134 | dets_gt[i, k, 4], img_id='out_gt') 135 | debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt') 136 | 137 | if opt.hm_hp: 138 | pred = debugger.gen_colormap_hp(output['hm_hp'][i].detach().cpu().numpy()) 139 | gt = debugger.gen_colormap_hp(batch['hm_hp'][i].detach().cpu().numpy()) 140 | debugger.add_blend_img(img, pred, 'pred_hmhp') 141 | debugger.add_blend_img(img, gt, 'gt_hmhp') 142 | 143 | if opt.debug == 4: 144 | debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) 145 | else: 146 | debugger.show_all_imgs(pause=True) 147 | 148 | def save_result(self, output, batch, results): 149 | reg = output['reg'] if self.opt.reg_offset else None 150 | hm_hp = output['hm_hp'] if self.opt.hm_hp else None 151 | hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None 152 | dets = multi_pose_decode( 153 | output['hm'], output['wh'], output['hps'], 154 | reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K) 155 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 156 | 157 | dets_out = multi_pose_post_process( 158 | dets.copy(), batch['meta']['c'].cpu().numpy(), 159 | batch['meta']['s'].cpu().numpy(), 160 | output['hm'].shape[2], output['hm'].shape[3]) 161 | results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0] 162 | -------------------------------------------------------------------------------- /src/lib/trains/single_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import numpy as np 7 | 8 | from models.losses import FocalLoss, RegL1Loss, RegLoss, RegWeightedL1Loss 9 | from models.decode import multi_pose_decode 10 | from models.utils import _sigmoid, flip_tensor, flip_lr_off, flip_lr 11 | from utils.debugger import Debugger 12 | from utils.post_process import multi_pose_post_process 13 | from utils.oracle_utils import gen_oracle_map 14 | from .base_trainer import BaseTrainer 15 | 16 | 17 | class SinglePoseLoss(torch.nn.Module): 18 | ''' 19 | Same as MultiPoseLoss. 20 | Modified by Min LI to support (17 * 2) human pose local offset loss setting. 21 | ''' 22 | 23 | def __init__(self, opt): 24 | super(SinglePoseLoss, self).__init__() 25 | self.crit = FocalLoss() 26 | self.crit_hm_hp = torch.nn.MSELoss() if opt.mse_loss else FocalLoss() 27 | self.crit_kp = RegWeightedL1Loss() 28 | self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \ 29 | RegLoss() if opt.reg_loss == 'sl1' else None 30 | self.opt = opt 31 | 32 | def forward(self, outputs, batch): 33 | opt = self.opt 34 | hm_loss, hp_loss, hm_hp_loss, hp_offset_loss = 0, 0, 0, 0 35 | for s in range(opt.num_stacks): 36 | output = outputs[s] 37 | output['hm'] = _sigmoid(output['hm']) 38 | output['hm_hp'] = _sigmoid(output['hm_hp']) 39 | 40 | hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks 41 | 42 | hp_loss += self.crit_kp(output['hps'], batch['hps_mask'], 43 | batch['ind'], batch['hps']) / opt.num_stacks 44 | hp_offset_loss += self.crit_reg( 45 | output['hp_offset'], batch['hp_mask'], 46 | batch['hp_ind'], batch['hp_offset']) / opt.num_stacks 47 | hm_hp_loss += self.crit_hm_hp( 48 | output['hm_hp'], batch['hm_hp']) / opt.num_stacks 49 | loss = opt.hm_weight * hm_loss + \ 50 | opt.hp_weight * hp_loss + \ 51 | opt.hm_hp_weight * hm_hp_loss + opt.off_weight * hp_offset_loss 52 | 53 | loss_stats = {'loss': loss, 'hm_loss': hm_loss, 'hp_loss': hp_loss, 54 | 'hm_hp_loss': hm_hp_loss, 'hp_offset_loss': hp_offset_loss} 55 | return loss, loss_stats 56 | 57 | 58 | class SinglePoseTrainer(BaseTrainer): 59 | def __init__(self, opt, model, optimizer=None): 60 | super(SinglePoseTrainer, self).__init__( 61 | opt, model, optimizer=optimizer) 62 | 63 | def _get_losses(self, opt): 64 | loss_states = ['loss', 'hm_loss', 'hp_loss', 'hm_hp_loss', 65 | 'hp_offset_loss'] 66 | loss = SinglePoseLoss(opt) 67 | return loss_states, loss 68 | 69 | def debug(self, batch, output, iter_id): 70 | opt = self.opt 71 | reg = output['reg'] if opt.reg_offset else None 72 | hm_hp = output['hm_hp'] if opt.hm_hp else None 73 | hp_offset = output['hp_offset'] if opt.reg_hp_offset else None 74 | dets = multi_pose_decode( 75 | output['hm'], output['wh'], output['hps'], 76 | reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=opt.K) 77 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 78 | 79 | dets[:, :, :4] *= opt.input_res / opt.output_res 80 | dets[:, :, 5:39] *= opt.input_res / opt.output_res 81 | dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) 82 | dets_gt[:, :, :4] *= opt.input_res / opt.output_res 83 | dets_gt[:, :, 5:39] *= opt.input_res / opt.output_res 84 | for i in range(1): 85 | debugger = Debugger( 86 | dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) 87 | img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) 88 | img = np.clip((( 89 | img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) 90 | pred = debugger.gen_colormap( 91 | output['hm'][i].detach().cpu().numpy()) 92 | gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) 93 | debugger.add_blend_img(img, pred, 'pred_hm') 94 | debugger.add_blend_img(img, gt, 'gt_hm') 95 | 96 | debugger.add_img(img, img_id='out_pred') 97 | for k in range(len(dets[i])): 98 | if dets[i, k, 4] > opt.center_thresh: 99 | debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], 100 | dets[i, k, 4], img_id='out_pred') 101 | debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred') 102 | 103 | debugger.add_img(img, img_id='out_gt') 104 | for k in range(len(dets_gt[i])): 105 | if dets_gt[i, k, 4] > opt.center_thresh: 106 | debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], 107 | dets_gt[i, k, 4], img_id='out_gt') 108 | debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt') 109 | 110 | if opt.hm_hp: 111 | pred = debugger.gen_colormap_hp( 112 | output['hm_hp'][i].detach().cpu().numpy()) 113 | gt = debugger.gen_colormap_hp( 114 | batch['hm_hp'][i].detach().cpu().numpy()) 115 | debugger.add_blend_img(img, pred, 'pred_hmhp') 116 | debugger.add_blend_img(img, gt, 'gt_hmhp') 117 | 118 | if opt.debug == 4: 119 | debugger.save_all_imgs( 120 | opt.debug_dir, prefix='{}'.format(iter_id)) 121 | else: 122 | debugger.show_all_imgs(pause=True) 123 | 124 | def save_result(self, output, batch, results): 125 | reg = output['reg'] if self.opt.reg_offset else None 126 | hm_hp = output['hm_hp'] if self.opt.hm_hp else None 127 | hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None 128 | dets = multi_pose_decode( 129 | output['hm'], output['wh'], output['hps'], 130 | reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K) 131 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 132 | 133 | dets_out = multi_pose_post_process( 134 | dets.copy(), batch['meta']['c'].cpu().numpy(), 135 | batch['meta']['s'].cpu().numpy(), 136 | output['hm'].shape[2], output['hm'].shape[3]) 137 | results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0] 138 | -------------------------------------------------------------------------------- /src/lib/trains/train_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from .multi_pose import MultiPoseTrainer 6 | from .single_pose import SinglePoseTrainer 7 | 8 | train_factory = { 9 | 'multi_pose': MultiPoseTrainer, 10 | 'single_pose': SinglePoseTrainer 11 | } 12 | -------------------------------------------------------------------------------- /src/lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lee-man/movenet/1b3b9db5c0eaf98f22806bcd2eacf2aba50e7ec1/src/lib/utils/__init__.py -------------------------------------------------------------------------------- /src/lib/utils/image.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Xingyi Zhou 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | import cv2 14 | import random 15 | 16 | def flip(img): 17 | return img[:, :, ::-1].copy() 18 | 19 | def transform_preds(coords, center, scale, output_size): 20 | target_coords = np.zeros(coords.shape) 21 | trans = get_affine_transform(center, scale, 0, output_size, inv=1) 22 | for p in range(coords.shape[0]): 23 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 24 | return target_coords 25 | 26 | 27 | def get_affine_transform(center, 28 | scale, 29 | rot, 30 | output_size, 31 | shift=np.array([0, 0], dtype=np.float32), 32 | inv=0): 33 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 34 | scale = np.array([scale, scale], dtype=np.float32) 35 | 36 | scale_tmp = scale 37 | src_w = scale_tmp[0] 38 | dst_w = output_size[0] 39 | dst_h = output_size[1] 40 | 41 | rot_rad = np.pi * rot / 180 42 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 43 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 44 | 45 | src = np.zeros((3, 2), dtype=np.float32) 46 | dst = np.zeros((3, 2), dtype=np.float32) 47 | src[0, :] = center + scale_tmp * shift 48 | src[1, :] = center + src_dir + scale_tmp * shift 49 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 50 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir 51 | 52 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 53 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 54 | 55 | if inv: 56 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 57 | else: 58 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 59 | 60 | return trans 61 | 62 | 63 | def affine_transform(pt, t): 64 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T 65 | new_pt = np.dot(t, new_pt) 66 | return new_pt[:2] 67 | 68 | 69 | def get_3rd_point(a, b): 70 | direct = a - b 71 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 72 | 73 | 74 | def get_dir(src_point, rot_rad): 75 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 76 | 77 | src_result = [0, 0] 78 | src_result[0] = src_point[0] * cs - src_point[1] * sn 79 | src_result[1] = src_point[0] * sn + src_point[1] * cs 80 | 81 | return src_result 82 | 83 | 84 | def crop(img, center, scale, output_size, rot=0): 85 | trans = get_affine_transform(center, scale, rot, output_size) 86 | 87 | dst_img = cv2.warpAffine(img, 88 | trans, 89 | (int(output_size[0]), int(output_size[1])), 90 | flags=cv2.INTER_LINEAR) 91 | 92 | return dst_img 93 | 94 | 95 | def gaussian_radius(det_size, min_overlap=0.7): 96 | height, width = det_size 97 | 98 | a1 = 1 99 | b1 = (height + width) 100 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 101 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 102 | r1 = (b1 + sq1) / 2 103 | 104 | a2 = 4 105 | b2 = 2 * (height + width) 106 | c2 = (1 - min_overlap) * width * height 107 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 108 | r2 = (b2 + sq2) / 2 109 | 110 | a3 = 4 * min_overlap 111 | b3 = -2 * min_overlap * (height + width) 112 | c3 = (min_overlap - 1) * width * height 113 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 114 | r3 = (b3 + sq3) / 2 115 | return min(r1, r2, r3) 116 | 117 | 118 | def gaussian2D(shape, sigma=1): 119 | m, n = [(ss - 1.) / 2. for ss in shape] 120 | y, x = np.ogrid[-m:m+1,-n:n+1] 121 | 122 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 123 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 124 | return h 125 | 126 | def draw_umich_gaussian(heatmap, center, radius, k=1): 127 | diameter = 2 * radius + 1 128 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 129 | 130 | x, y = int(center[0]), int(center[1]) 131 | 132 | height, width = heatmap.shape[0:2] 133 | 134 | left, right = min(x, radius), min(width - x, radius + 1) 135 | top, bottom = min(y, radius), min(height - y, radius + 1) 136 | 137 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 138 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 139 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 140 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 141 | return heatmap 142 | 143 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False): 144 | diameter = 2 * radius + 1 145 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 146 | value = np.array(value, dtype=np.float32).reshape(-1, 1, 1) 147 | dim = value.shape[0] 148 | reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value 149 | if is_offset and dim == 2: 150 | delta = np.arange(diameter*2+1) - radius 151 | reg[0] = reg[0] - delta.reshape(1, -1) 152 | reg[1] = reg[1] - delta.reshape(-1, 1) 153 | 154 | x, y = int(center[0]), int(center[1]) 155 | 156 | height, width = heatmap.shape[0:2] 157 | 158 | left, right = min(x, radius), min(width - x, radius + 1) 159 | top, bottom = min(y, radius), min(height - y, radius + 1) 160 | 161 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 162 | masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right] 163 | masked_gaussian = gaussian[radius - top:radius + bottom, 164 | radius - left:radius + right] 165 | masked_reg = reg[:, radius - top:radius + bottom, 166 | radius - left:radius + right] 167 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 168 | idx = (masked_gaussian >= masked_heatmap).reshape( 169 | 1, masked_gaussian.shape[0], masked_gaussian.shape[1]) 170 | masked_regmap = (1-idx) * masked_regmap + idx * masked_reg 171 | regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap 172 | return regmap 173 | 174 | 175 | def draw_msra_gaussian(heatmap, center, sigma): 176 | tmp_size = sigma * 3 177 | mu_x = int(center[0] + 0.5) 178 | mu_y = int(center[1] + 0.5) 179 | w, h = heatmap.shape[0], heatmap.shape[1] 180 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] 181 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] 182 | if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0: 183 | return heatmap 184 | size = 2 * tmp_size + 1 185 | x = np.arange(0, size, 1, np.float32) 186 | y = x[:, np.newaxis] 187 | x0 = y0 = size // 2 188 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 189 | g_x = max(0, -ul[0]), min(br[0], h) - ul[0] 190 | g_y = max(0, -ul[1]), min(br[1], w) - ul[1] 191 | img_x = max(0, ul[0]), min(br[0], h) 192 | img_y = max(0, ul[1]), min(br[1], w) 193 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum( 194 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]], 195 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]]) 196 | return heatmap 197 | 198 | def grayscale(image): 199 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 200 | 201 | def lighting_(data_rng, image, alphastd, eigval, eigvec): 202 | alpha = data_rng.normal(scale=alphastd, size=(3, )) 203 | image += np.dot(eigvec, eigval * alpha) 204 | 205 | def blend_(alpha, image1, image2): 206 | image1 *= alpha 207 | image2 *= (1 - alpha) 208 | image1 += image2 209 | 210 | def saturation_(data_rng, image, gs, gs_mean, var): 211 | alpha = 1. + data_rng.uniform(low=-var, high=var) 212 | blend_(alpha, image, gs[:, :, None]) 213 | 214 | def brightness_(data_rng, image, gs, gs_mean, var): 215 | alpha = 1. + data_rng.uniform(low=-var, high=var) 216 | image *= alpha 217 | 218 | def contrast_(data_rng, image, gs, gs_mean, var): 219 | alpha = 1. + data_rng.uniform(low=-var, high=var) 220 | blend_(alpha, image, gs_mean) 221 | 222 | def color_aug(data_rng, image, eig_val, eig_vec): 223 | functions = [brightness_, contrast_, saturation_] 224 | random.shuffle(functions) 225 | 226 | gs = grayscale(image) 227 | gs_mean = gs.mean() 228 | for f in functions: 229 | f(data_rng, image, gs, gs_mean, 0.4) 230 | lighting_(data_rng, image, 0.1, eig_val, eig_vec) 231 | -------------------------------------------------------------------------------- /src/lib/utils/oracle_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import numba 7 | 8 | @numba.jit(nopython=True, nogil=True) 9 | def gen_oracle_map(feat, ind, w, h): 10 | # feat: B x maxN x featDim 11 | # ind: B x maxN 12 | batch_size = feat.shape[0] 13 | max_objs = feat.shape[1] 14 | feat_dim = feat.shape[2] 15 | out = np.zeros((batch_size, feat_dim, h, w), dtype=np.float32) 16 | vis = np.zeros((batch_size, h, w), dtype=np.uint8) 17 | ds = [(0, 1), (0, -1), (1, 0), (-1, 0)] 18 | for i in range(batch_size): 19 | queue_ind = np.zeros((h*w*2, 2), dtype=np.int32) 20 | queue_feat = np.zeros((h*w*2, feat_dim), dtype=np.float32) 21 | head, tail = 0, 0 22 | for j in range(max_objs): 23 | if ind[i][j] > 0: 24 | x, y = ind[i][j] % w, ind[i][j] // w 25 | out[i, :, y, x] = feat[i][j] 26 | vis[i, y, x] = 1 27 | queue_ind[tail] = x, y 28 | queue_feat[tail] = feat[i][j] 29 | tail += 1 30 | while tail - head > 0: 31 | x, y = queue_ind[head] 32 | f = queue_feat[head] 33 | head += 1 34 | for (dx, dy) in ds: 35 | xx, yy = x + dx, y + dy 36 | if xx >= 0 and yy >= 0 and xx < w and yy < h and vis[i, yy, xx] < 1: 37 | out[i, :, yy, xx] = f 38 | vis[i, yy, xx] = 1 39 | queue_ind[tail] = xx, yy 40 | queue_feat[tail] = f 41 | tail += 1 42 | return out -------------------------------------------------------------------------------- /src/lib/utils/post_process.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | from .image import transform_preds 7 | 8 | 9 | def get_pred_depth(depth): 10 | return depth 11 | 12 | def get_alpha(rot): 13 | # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 14 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 15 | # return rot[:, 0] 16 | idx = rot[:, 1] > rot[:, 5] 17 | alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi) 18 | alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi) 19 | return alpha1 * idx + alpha2 * (1 - idx) 20 | 21 | 22 | 23 | def ctdet_post_process(dets, c, s, h, w, num_classes): 24 | # dets: batch x max_dets x dim 25 | # return 1-based class det dict 26 | ret = [] 27 | for i in range(dets.shape[0]): 28 | top_preds = {} 29 | dets[i, :, :2] = transform_preds( 30 | dets[i, :, 0:2], c[i], s[i], (w, h)) 31 | dets[i, :, 2:4] = transform_preds( 32 | dets[i, :, 2:4], c[i], s[i], (w, h)) 33 | classes = dets[i, :, -1] 34 | for j in range(num_classes): 35 | inds = (classes == j) 36 | top_preds[j + 1] = np.concatenate([ 37 | dets[i, inds, :4].astype(np.float32), 38 | dets[i, inds, 4:5].astype(np.float32)], axis=1).tolist() 39 | ret.append(top_preds) 40 | return ret 41 | 42 | 43 | def multi_pose_post_process(dets, c, s, h, w): 44 | # dets: batch x max_dets x 40 45 | # return list of 39 in image coord 46 | ret = [] 47 | for i in range(dets.shape[0]): 48 | bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h)) 49 | pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h)) 50 | top_preds = np.concatenate( 51 | [bbox.reshape(-1, 4), dets[i, :, 4:5], 52 | pts.reshape(-1, 34)], axis=1).astype(np.float32).tolist() 53 | ret.append({np.ones(1, dtype=np.int32)[0]: top_preds}) 54 | return ret 55 | 56 | def single_pose_post_process(dets, h, w): 57 | # post_process for Movenet especially 58 | # restore is the original size is not square. 59 | longEdge = max(h, w) 60 | dets[:, 0] = dets[:, 0] * longEdge 61 | dets[:, 1] = dets[:, 1] * longEdge 62 | if h > w: 63 | dets[:, 1] = dets[:, 1] - (h - w) // 2 64 | elif w > h: 65 | dets[:, 0] = dets[:, 0] - (w - h) // 2 66 | return dets -------------------------------------------------------------------------------- /src/lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | 7 | class AverageMeter(object): 8 | """Computes and stores the average and current value""" 9 | def __init__(self): 10 | self.reset() 11 | 12 | def reset(self): 13 | self.val = 0 14 | self.avg = 0 15 | self.sum = 0 16 | self.count = 0 17 | 18 | def update(self, val, n=1): 19 | self.val = val 20 | self.sum += val * n 21 | self.count += n 22 | if self.count > 0: 23 | self.avg = self.sum / self.count -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | 7 | import os 8 | 9 | import torch 10 | import torch.utils.data 11 | from opts import opts 12 | from models.model import create_model, load_model, save_model 13 | from models.data_parallel import DataParallel 14 | from logger import Logger 15 | from datasets.dataset_factory import get_dataset 16 | from trains.train_factory import train_factory 17 | 18 | 19 | def main(opt): 20 | torch.manual_seed(opt.seed) 21 | torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test 22 | Dataset = get_dataset(opt.dataset, opt.task) 23 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset) 24 | print(opt) 25 | 26 | logger = Logger(opt) 27 | 28 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 29 | opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') 30 | 31 | print('Creating model...') 32 | model = create_model(opt.arch, opt.heads, 33 | opt.head_conv, opt.froze_backbone) 34 | optimizer = torch.optim.Adam(model.parameters(), opt.lr) 35 | start_epoch = 0 36 | if opt.load_model != '': 37 | model, optimizer, start_epoch = load_model( 38 | model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) 39 | 40 | 41 | Trainer = train_factory[opt.task] 42 | trainer = Trainer(opt, model, optimizer) 43 | trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) 44 | 45 | print('Setting up data...') 46 | val_loader = torch.utils.data.DataLoader( 47 | Dataset(opt, 'val'), 48 | batch_size=1, 49 | shuffle=False, 50 | num_workers=1, 51 | pin_memory=True 52 | ) 53 | 54 | if opt.test: 55 | _, preds = trainer.val(0, val_loader) 56 | val_loader.dataset.run_eval(preds, opt.save_dir) 57 | return 58 | 59 | train_loader = torch.utils.data.DataLoader( 60 | Dataset(opt, 'train'), 61 | batch_size=opt.batch_size, 62 | shuffle=True, 63 | num_workers=opt.num_workers, 64 | pin_memory=True, 65 | drop_last=True 66 | ) 67 | 68 | print('Starting training...') 69 | best = 1e10 70 | for epoch in range(start_epoch + 1, opt.num_epochs + 1): 71 | mark = epoch if opt.save_all else 'last' 72 | log_dict_train, _ = trainer.train(epoch, train_loader) 73 | logger.write('epoch: {} |'.format(epoch)) 74 | for k, v in log_dict_train.items(): 75 | logger.scalar_summary('train_{}'.format(k), v, epoch) 76 | logger.write('{} {:8f} | '.format(k, v)) 77 | if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: 78 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), 79 | epoch, model, optimizer) 80 | with torch.no_grad(): 81 | log_dict_val, preds = trainer.val(epoch, val_loader) 82 | for k, v in log_dict_val.items(): 83 | logger.scalar_summary('val_{}'.format(k), v, epoch) 84 | logger.write('{} {:8f} | '.format(k, v)) 85 | if log_dict_val[opt.metric] < best: 86 | best = log_dict_val[opt.metric] 87 | save_model(os.path.join(opt.save_dir, 'model_best.pth'), 88 | epoch, model) 89 | else: 90 | save_model(os.path.join(opt.save_dir, 'model_last.pth'), 91 | epoch, model, optimizer) 92 | logger.write('\n') 93 | if epoch in opt.lr_step: 94 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), 95 | epoch, model, optimizer) 96 | lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) 97 | print('Drop LR to', lr) 98 | for param_group in optimizer.param_groups: 99 | param_group['lr'] = lr 100 | logger.close() 101 | 102 | 103 | if __name__ == '__main__': 104 | opt = opts().parse() 105 | main(opt) 106 | -------------------------------------------------------------------------------- /src/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | 7 | import os 8 | import json 9 | import cv2 10 | import numpy as np 11 | import time 12 | from progress.bar import Bar 13 | import torch 14 | 15 | # from external.nms import soft_nms 16 | from opts import opts 17 | from logger import Logger 18 | from utils.utils import AverageMeter 19 | from datasets.dataset_factory import dataset_factory 20 | from detectors.detector_factory import detector_factory 21 | 22 | 23 | def test(opt): 24 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 25 | 26 | Dataset = dataset_factory[opt.dataset] 27 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset) 28 | print(opt) 29 | Logger(opt) 30 | Detector = detector_factory[opt.task] 31 | 32 | split = 'val' if not opt.trainval else 'test' 33 | dataset = Dataset(opt, split) 34 | detector = Detector(opt) 35 | 36 | results = {} 37 | num_iters = len(dataset) 38 | bar = Bar('{}'.format(opt.exp_id), max=num_iters) 39 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] 40 | avg_time_stats = {t: AverageMeter() for t in time_stats} 41 | for ind in range(num_iters): 42 | img_id = dataset.images[ind] 43 | img_info = dataset.coco.loadImgs(ids=[img_id])[0] 44 | img_path = os.path.join(dataset.img_dir, img_info['file_name']) 45 | 46 | ret = detector.run(img_path) 47 | 48 | results[img_id] = ret['results'] 49 | 50 | Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( 51 | ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td) 52 | for t in avg_time_stats: 53 | avg_time_stats[t].update(ret[t]) 54 | Bar.suffix = Bar.suffix + \ 55 | '|{} {:.3f} '.format(t, avg_time_stats[t].avg) 56 | bar.next() 57 | bar.finish() 58 | dataset.run_eval(results, opt.save_dir) 59 | 60 | 61 | if __name__ == '__main__': 62 | opt = opts().parse() 63 | test(opt) 64 | -------------------------------------------------------------------------------- /src/tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, '../lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /src/tools/convert_active_to_coco.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Split `Active` dataset into training and test sets. 3 | Move this file to `{$movenet}/data/active` and run it. 4 | Author: Min LI 5 | 6 | TODO: Check whether keypoint mapping from MPII to COCO is correct. 7 | ''' 8 | from PIL import Image 9 | import os 10 | import os.path as osp 11 | import numpy as np 12 | import json 13 | import shutil 14 | import random 15 | 16 | db_type = 'train' # train, test 17 | train_percentage = 0.9 18 | annot_path = "annotations/active.json" 19 | train_save_path = "annotations/active_train.json" 20 | val_save_path = "annotations/active_val.json" 21 | 22 | if not osp.isdir('train'): 23 | os.makedirs('train') 24 | if not osp.isdir('val'): 25 | os.makedirs('val') 26 | 27 | 28 | print("Loading Acitve dataset...") 29 | with open(annot_path) as json_file: 30 | active = json.load(json_file) 31 | ''' 32 | MPII: 0 - r ankle, 1 - r knee, 2 - r hip, 3 - l hip, 4 - l knee, 5 - l ankle, 6 - pelvis, 7 - thorax, 8 - upper neck, 9 - head top, 10 - r wrist, 11 - r elbow, 12 - r shoulder, 13 - l shoulder, 14 - l elbow, 15 - l wrist 33 | 34 | COCO_PERSON_KEYPOINT_NAMES = [ 35 | 'nose', 0 36 | 'left_eye', 1 37 | 'right_eye', 2 38 | 'left_ear', 3 39 | 'right_ear', 4 40 | 'left_shoulder', 5 41 | 'right_shoulder', 6 42 | 'left_elbow', 7 43 | 'right_elbow', 8 44 | 'left_wrist', 9 45 | 'right_wrist', 10 46 | 'left_hip', 11 47 | 'right_hip', 12 48 | 'left_knee', 13 49 | 'right_knee', 14 50 | 'left_ankle', 15 51 | 'right_ankle' 16 52 | ] 53 | ''' 54 | joint_mapping = {'0': 16, '1': 14, '2': 12, '3': 11, '4': 13, '5': 15, '6': -1, '7': -1, '8': -1, '9': 0, '10': 10, '11': 8, '12': 6, '13': 5, '14': 7, '15': 9} 55 | joint_num = 17 56 | img_num = len(active) 57 | random_index = list(range(img_num)) 58 | random.shuffle(random_index) 59 | train_index = random_index[:int(img_num * train_percentage) + 1] 60 | val_index = random_index[int(img_num * train_percentage) + 1:] 61 | 62 | print("image size: ", img_num) 63 | print("train size: ", int(img_num * train_percentage)) 64 | print("val size: ", img_num -int(img_num * train_percentage)) 65 | 66 | aid = 0 67 | coco_train = {'images': [], 'categories': [], 'annotations': []} 68 | 69 | for img_id in train_index: 70 | 71 | filename = 'images/' + str(active[img_id]['image'])#filename 72 | filename_target = 'train/' + str(active[img_id]['image']) 73 | shutil.copy(filename, filename_target) 74 | img = Image.open(osp.join('.', filename)) 75 | w,h = img.size 76 | img_dict = {'id': aid, 'file_name': str(active[img_id]['image']), 'width': w, 'height': h} 77 | coco_train['images'].append(img_dict) 78 | 79 | bbox = np.zeros((4)) # xmin, ymin, w, h 80 | kps = np.zeros((joint_num, 3)) # xcoord, ycoord, vis 81 | ori_kps = [] 82 | 83 | #kps 84 | for jid in range(16): 85 | if (joint_mapping[str(jid)] == -1): continue 86 | kps[joint_mapping[str(jid)]][0] = active[img_id]["joints"][jid][0] 87 | kps[joint_mapping[str(jid)]][1] = active[img_id]["joints"][jid][1] 88 | kps[joint_mapping[str(jid)]][2] = active[img_id]["joint_vis"][jid] + 1 89 | ori_kps.append([active[img_id]["joints"][jid][0],active[img_id]["joints"][jid][1]]) 90 | kps[1:5] = np.zeros((4, 3)) 91 | ori_kps = np.asarray(ori_kps) 92 | 93 | #bbox extract from annotated kps 94 | 95 | xmin = np.min(ori_kps[:,0]) 96 | ymin = np.min(ori_kps[:,1]) 97 | xmax = np.max(ori_kps[:,0]) 98 | ymax = np.max(ori_kps[:,1]) 99 | width = xmax - xmin - 1 100 | height = ymax - ymin - 1 101 | 102 | # corrupted bounding box 103 | if width <= 0 or height <= 0: 104 | continue 105 | # 20% extend 106 | else: 107 | bbox[0] = ((xmin + xmax)/2. - width/2*1.2) if(((xmin + xmax)/2. - width/2*1.2)>0) else 0 108 | bbox[1] = ((ymin + ymax)/2. - height/2*1.2) if(((ymin + ymax)/2. - height/2*1.2)>0) else 0 109 | bbox[2] = width*1.2 if ((bbox[0]+width*1.2)0) else 0 163 | bbox[1] = ((ymin + ymax)/2. - height/2*1.2) if(((ymin + ymax)/2. - height/2*1.2)>0) else 0 164 | bbox[2] = width*1.2 if ((bbox[0]+width*1.2) 0: 18 | return False 19 | else: 20 | return True 21 | 22 | 23 | db_type = 'train' # train, test 24 | annot_file = loadmat('mpii_human_pose_v1_u12_1')['RELEASE'] 25 | save_path = '../annotations/' + db_type + '.json' 26 | 27 | joint_num = 16 28 | img_num = len(annot_file['annolist'][0][0][0]) 29 | 30 | aid = 0 31 | coco = {'images': [], 'categories': [], 'annotations': []} 32 | for img_id in range(img_num): 33 | 34 | if ((db_type == 'train' and annot_file['img_train'][0][0][0][img_id] == 1) or (db_type == 'test' and annot_file['img_train'][0][0][0][img_id] == 0)) and \ 35 | check_empty(annot_file['annolist'][0][0][0][img_id],'annorect') == False: #any person is annotated 36 | 37 | filename = 'images/' + str(annot_file['annolist'][0][0][0][img_id]['image'][0][0][0][0]) #filename 38 | img = Image.open(osp.join('..', filename)) 39 | w,h = img.size 40 | img_dict = {'id': img_id, 'file_name': filename, 'width': w, 'height': h} 41 | coco['images'].append(img_dict) 42 | 43 | if db_type == 'test': 44 | continue 45 | 46 | person_num = len(annot_file['annolist'][0][0][0][img_id]['annorect'][0]) #person_num 47 | joint_annotated = np.zeros((person_num,joint_num)) 48 | for pid in range(person_num): 49 | 50 | if check_empty(annot_file['annolist'][0][0][0][img_id]['annorect'][0][pid],'annopoints') == False: #kps is annotated 51 | 52 | bbox = np.zeros((4)) # xmin, ymin, w, h 53 | kps = np.zeros((joint_num,3)) # xcoord, ycoord, vis 54 | 55 | #kps 56 | annot_joint_num = len(annot_file['annolist'][0][0][0][img_id]['annorect'][0][pid]['annopoints']['point'][0][0][0]) 57 | for jid in range(annot_joint_num): 58 | annot_jid = annot_file['annolist'][0][0][0][img_id]['annorect'][0][pid]['annopoints']['point'][0][0][0][jid]['id'][0][0] 59 | kps[annot_jid][0] = annot_file['annolist'][0][0][0][img_id]['annorect'][0][pid]['annopoints']['point'][0][0][0][jid]['x'][0][0] 60 | kps[annot_jid][1] = annot_file['annolist'][0][0][0][img_id]['annorect'][0][pid]['annopoints']['point'][0][0][0][jid]['y'][0][0] 61 | kps[annot_jid][2] = 1 62 | 63 | #bbox extract from annotated kps 64 | annot_kps = kps[kps[:,2]==1,:].reshape(-1,3) 65 | xmin = np.min(annot_kps[:,0]) 66 | ymin = np.min(annot_kps[:,1]) 67 | xmax = np.max(annot_kps[:,0]) 68 | ymax = np.max(annot_kps[:,1]) 69 | width = xmax - xmin - 1 70 | height = ymax - ymin - 1 71 | 72 | # corrupted bounding box 73 | if width <= 0 or height <= 0: 74 | continue 75 | # 20% extend 76 | else: 77 | bbox[0] = (xmin + xmax)/2. - width/2*1.2 78 | bbox[1] = (ymin + ymax)/2. - height/2*1.2 79 | bbox[2] = width*1.2 80 | bbox[3] = height*1.2 81 | 82 | 83 | person_dict = {'id': aid, 'image_id': img_id, 'category_id': 1, 'area': bbox[2]*bbox[3], 'bbox': bbox.tolist(), 'iscrowd': 0, 'keypoints': kps.reshape(-1).tolist(), 'num_keypoints': int(np.sum(kps[:,2]==1))} 84 | coco['annotations'].append(person_dict) 85 | aid += 1 86 | 87 | category = { 88 | "supercategory": "person", 89 | "id": 1, # to be same as COCO, not using 0 90 | "name": "person", 91 | "skeleton": [[0,1], 92 | [1,2], 93 | [2,6], 94 | [7,12], 95 | [12,11], 96 | [11,10], 97 | [5,4], 98 | [4,3], 99 | [3,6], 100 | [7,13], 101 | [13,14], 102 | [14,15], 103 | [6,7], 104 | [7,8], 105 | [8,9]] , 106 | "keypoints": ["r_ankle", "r_knee","r_hip", 107 | "l_hip", "l_knee", "l_ankle", 108 | "pelvis", "throax", 109 | "upper_neck", "head_top", 110 | "r_wrist", "r_elbow", "r_shoulder", 111 | "l_shoulder", "l_elbow", "l_wrist"]} 112 | 113 | coco['categories'] = [category] 114 | 115 | with open(save_path, 'w') as f: 116 | json.dump(coco, f) -------------------------------------------------------------------------------- /src/tools/draw.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | angles = [14,11,7,4,9,23,26,22,10,15,14,15,13,10,11,11,6,7,6,4,4,6,13,36,64,75,86,113,130,133,137,140,148,148,133,119,29,11,8,4,3,6,9,23,51,66,100,108,135,144,154,157,158,144,110,71,15,6,7,7,8,10,33,89,103,120,130,153,153,153,152,141,104,54,7,3,-3,-3,-3,-3,-3,-3,-3,-3,0,13,10,6,12,12,12,17,13,14,13,13,11,7,8,9,37,71,84,104,123,130,143,147,149,141,113,75,39,21,14,12,13,10,14,38,79,102,130,143,149,146,144,136,137,137,149,157,154,92,55,20,10,12,6,5,9,23,55,97,109,124,134,140,148,154,158,159,161,141,101,56,29,13,10,9,7,5,5,4,2,1,5,12,48,58,70,83,96,110,133] 3 | x = [] 4 | index = 1 5 | 6 | for i in range(len(angles)): 7 | x.append(index) 8 | index+=10 9 | 10 | plt.plot(x,angles,'s-',color = 'g',label="ATT-RLSTM")#s-:方形 11 | plt.xlabel("region length")#横坐标名字 12 | plt.ylabel("accuracy")#纵坐标名字 13 | plt.legend(loc = "best")#图例 14 | plt.show() 15 | 16 | -------------------------------------------------------------------------------- /src/tools/eval_coco.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import sys 8 | import cv2 9 | import numpy as np 10 | import pickle 11 | import os 12 | 13 | this_dir = os.path.dirname(__file__) 14 | ANN_PATH = this_dir + '../../data/coco/annotations/instances_val2017.json' 15 | print(ANN_PATH) 16 | if __name__ == '__main__': 17 | pred_path = sys.argv[1] 18 | coco = coco.COCO(ANN_PATH) 19 | dets = coco.loadRes(pred_path) 20 | img_ids = coco.getImgIds() 21 | num_images = len(img_ids) 22 | coco_eval = COCOeval(coco, dets, "bbox") 23 | coco_eval.evaluate() 24 | coco_eval.accumulate() 25 | coco_eval.summarize() 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/tools/eval_coco_hp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import sys 8 | import cv2 9 | import numpy as np 10 | import pickle 11 | import os 12 | 13 | this_dir = os.path.dirname(__file__) 14 | ANN_PATH = this_dir + '../../data/coco/annotations/person_keypoints_val2017.json' 15 | print(ANN_PATH) 16 | if __name__ == '__main__': 17 | pred_path = sys.argv[1] 18 | coco = coco.COCO(ANN_PATH) 19 | dets = coco.loadRes(pred_path) 20 | img_ids = coco.getImgIds() 21 | num_images = len(img_ids) 22 | coco_eval = COCOeval(coco, dets, "keypoints") 23 | coco_eval.evaluate() 24 | coco_eval.accumulate() 25 | coco_eval.summarize() 26 | coco_eval = COCOeval(coco, dets, "bbox") 27 | coco_eval.evaluate() 28 | coco_eval.accumulate() 29 | coco_eval.summarize() 30 | 31 | -------------------------------------------------------------------------------- /src/tools/filter_hp.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | import sys 4 | import shutil 5 | 6 | ''' 7 | Filtering images with single-human (or double-human) poses in COCO dataset. 8 | Usage: 9 | Put the file under `data/` directory and run it. 10 | By default, it will copy the images to `active_coco` directory. 11 | 12 | ''' 13 | 14 | class CocoFilter(): 15 | """ 16 | Filters the COCO dataset 17 | """ 18 | def filter_human_pose(self): 19 | image_infos = self.coco['images'] 20 | annotation_infos = self.coco['annotations'] 21 | 22 | annotation_infos_by_image_id = {} 23 | for annotation_info in annotation_infos: 24 | image_id = annotation_info['image_id'] 25 | if image_id in annotation_infos_by_image_id: 26 | annotation_infos_by_image_id[image_id].append(annotation_info) 27 | else: 28 | annotation_infos_by_image_id[image_id] = [annotation_info] 29 | 30 | image_ids = list(annotation_infos_by_image_id.keys()) 31 | 32 | image_id_to_image_info = {} 33 | for image_info in image_infos: 34 | image_id_to_image_info[image_info['id']] = image_info 35 | 36 | filtered_person_image_ids = list(filter(lambda image_id: len(annotation_infos_by_image_id[image_id]) <= self.counts, image_ids)) 37 | # image_infos 38 | filtered_image_infos = list(map(lambda image_id: image_id_to_image_info[image_id], filtered_person_image_ids)) 39 | self.new_images = filtered_image_infos 40 | print("Filtered image length: ", len(filtered_image_infos)) 41 | print("E.g.,", self.new_images[0]) 42 | # annotation_infos 43 | filterted_annotation_infos = list(filter(lambda annotation_info: annotation_info["image_id"] in filtered_person_image_ids, annotation_infos)) 44 | self.new_annotations = filterted_annotation_infos 45 | print("Filtered annotation length: ", len(filterted_annotation_infos)) 46 | print("E.g.,", self.new_annotations[0]) 47 | 48 | def move_files(self): 49 | print("Start moving data...") 50 | for image_info in self.new_images: 51 | file_name = image_info["file_name"] 52 | file_path = self.in_data_path / Path(file_name) 53 | file_path_target = self.out_data_path / Path(file_name) 54 | shutil.copy(file_path, file_path_target) 55 | print("Completed moving data.") 56 | 57 | 58 | def main(self, args): 59 | # Open json 60 | self.input_json_path = Path('coco', 'annotations', args.input_json) 61 | self.output_json_path = Path('active_coco', 'annotations', args.output_json) 62 | # data dir 63 | self.in_data_path = Path('coco', args.split) 64 | self.out_data_path = Path('active_coco', args.split) 65 | # self.out_data_pathmkdir(parents=True, exist_ok=True) 66 | self.counts = args.counts 67 | 68 | # Verify input path exists 69 | if not self.input_json_path.exists(): 70 | print('Input json path not found.') 71 | print('Quitting early.') 72 | quit() 73 | 74 | # Verify output path does not already exist 75 | if self.output_json_path.exists(): 76 | should_continue = input('Output path already exists. Overwrite? (y/n) ').lower() 77 | if should_continue != 'y' and should_continue != 'yes': 78 | print('Quitting early.') 79 | quit() 80 | 81 | # Load the json 82 | print('Loading json file...') 83 | with open(self.input_json_path) as json_file: 84 | self.coco = json.load(json_file) 85 | 86 | # Filter to specific categories 87 | print('Filtering...') 88 | self.filter_human_pose() 89 | 90 | # Build new JSON 91 | new_master_json = { 92 | 'info': self.coco['info'], 93 | 'licenses': self.coco['licenses'], 94 | 'images': self.new_images, 95 | 'annotations': self.new_annotations, 96 | 'categories': self.coco['categories'] 97 | } 98 | 99 | # Write the JSON to a file 100 | print('Saving new json file...') 101 | with open(self.output_json_path, 'w+') as output_file: 102 | json.dump(new_master_json, output_file) 103 | 104 | print('Filtered json saved.') 105 | 106 | self.move_files() 107 | 108 | 109 | if __name__ == "__main__": 110 | import argparse 111 | 112 | parser = argparse.ArgumentParser(description="Filter COCO JSON: " 113 | "Filters a COCO Keypoints JSON file to only include specified maximum human counts. " 114 | "This includes images, and annotations. Does not modify 'info' or 'licenses'.") 115 | 116 | parser.add_argument("-i", "--input_json", dest="input_json", default="person_keypoints_train2017.json", 117 | help="path to a json file in coco format") 118 | parser.add_argument("-o", "--output_json", dest="output_json", default="person_keypoints_train2017.json", 119 | help="path to save the output json") 120 | parser.add_argument("-s", "--split", dest="split", default="train2017", 121 | help="The split of data: train/val") 122 | parser.add_argument("-c", "--counts", dest="counts", type=int, default=2, 123 | help="Maximun human counts in a single image, e.g. -c 2 for training data, -c 1 for validation data") 124 | 125 | args = parser.parse_args() 126 | 127 | cf = CocoFilter() 128 | cf.main(args) 129 | -------------------------------------------------------------------------------- /src/tools/get_kitti.sh: -------------------------------------------------------------------------------- 1 | mkdir kitti 2 | cd kitti 3 | wget http://www.cvlibs.net/download.php?file=data_object_image_2.zip 4 | wget http://www.cvlibs.net/download.php?file=data_object_label_2.zip 5 | wget http://www.cvlibs.net/download.php?file=data_object_calib.zip 6 | unzip data_object_image_2.zip 7 | unzip data_object_label_2.zip 8 | unzip data_object_calib.zip 9 | 10 | -------------------------------------------------------------------------------- /src/tools/get_pascal_voc.sh: -------------------------------------------------------------------------------- 1 | mkdir voc 2 | cd voc 3 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 4 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 5 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar 6 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 7 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar 8 | tar xvf VOCtrainval_06-Nov-2007.tar 9 | tar xvf VOCtest_06-Nov-2007.tar 10 | tar xvf VOCdevkit_08-Jun-2007.tar 11 | tar xvf VOCtrainval_11-May-2012.tar 12 | tar xvf VOCdevkit_18-May-2011.tar 13 | rm VOCtrainval_06-Nov-2007.tar 14 | rm VOCtest_06-Nov-2007.tar 15 | rm VOCdevkit_08-Jun-2007.tar 16 | rm VOCtrainval_11-May-2012.tar 17 | rm VOCdevkit_18-May-2011.tar 18 | mkdir images 19 | cp VOCdevkit/VOC2007/JPEGImages/* images/ 20 | cp VOCdevkit/VOC2012/JPEGImages/* images/ 21 | wget https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip 22 | unzip PASCAL_VOC.zip 23 | rm PASCAL_VOC.zip 24 | mv PASCAL_VOC annotations/ 25 | cd .. 26 | python merge_pascal_json.py 27 | -------------------------------------------------------------------------------- /src/tools/merge_active_coco_json.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import json 6 | 7 | coco_path = "person_keypoints_train2017_filtered.json" 8 | active_path = "active_coco.json" 9 | 10 | save_path = "person_keypoints_train2017_filtered_merged.json" 11 | 12 | print("Loading Acitve and COCO dataset...") 13 | with open(coco_path) as json_file: 14 | coco = json.load(json_file) 15 | with open(active_path) as json_file: 16 | active = json.load(json_file) 17 | 18 | images = [] 19 | for coco_image in coco['images']: 20 | images.append(coco_image) 21 | for active_image in active['images']: 22 | images.append(active_image) 23 | annotations = [] 24 | for coco_annotation in coco['annotations']: 25 | annotations.append(coco_annotation) 26 | for active_annotation in active['annotations']: 27 | annotations.append(active_annotation) 28 | new_master_json = { 29 | 'info': coco['info'], 30 | 'licenses': coco['licenses'], 31 | 'images': images, 32 | 'annotations': annotations, 33 | 'categories': coco['categories'] 34 | } 35 | 36 | img_num = len(images) 37 | print("image size: ", img_num) 38 | annotations_num = len(annotations) 39 | print("annotation size: ", annotations_num) 40 | 41 | with open(save_path, 'w') as f: 42 | json.dump(new_master_json, f) -------------------------------------------------------------------------------- /src/tools/reval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # Modified by Xingyi Zhou 9 | # -------------------------------------------------------- 10 | 11 | # Reval = re-eval. Re-evaluate saved detections. 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | import sys 17 | import os.path as osp 18 | sys.path.insert(0, osp.join(osp.dirname(__file__), 'voc_eval_lib')) 19 | 20 | from model.test import apply_nms 21 | from datasets.pascal_voc import pascal_voc 22 | import pickle 23 | import os, argparse 24 | import numpy as np 25 | import json 26 | 27 | def parse_args(): 28 | """ 29 | Parse input arguments 30 | """ 31 | parser = argparse.ArgumentParser(description='Re-evaluate results') 32 | parser.add_argument('detection_file', type=str) 33 | parser.add_argument('--output_dir', help='results directory', type=str) 34 | parser.add_argument('--imdb', dest='imdb_name', 35 | help='dataset to re-evaluate', 36 | default='voc_2007_test', type=str) 37 | parser.add_argument('--matlab', dest='matlab_eval', 38 | help='use matlab for evaluation', 39 | action='store_true') 40 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 41 | action='store_true') 42 | parser.add_argument('--nms', dest='apply_nms', help='apply nms', 43 | action='store_true') 44 | 45 | if len(sys.argv) == 1: 46 | parser.print_help() 47 | sys.exit(1) 48 | 49 | args = parser.parse_args() 50 | return args 51 | 52 | 53 | def from_dets(imdb_name, detection_file, args): 54 | imdb = pascal_voc('test', '2007') 55 | imdb.competition_mode(args.comp_mode) 56 | imdb.config['matlab_eval'] = args.matlab_eval 57 | with open(os.path.join(detection_file), 'rb') as f: 58 | if 'json' in detection_file: 59 | dets = json.load(f) 60 | else: 61 | dets = pickle.load(f, encoding='latin1') 62 | # import pdb; pdb.set_trace() 63 | if args.apply_nms: 64 | print('Applying NMS to all detections') 65 | test_nms = 0.3 66 | nms_dets = apply_nms(dets, test_nms) 67 | else: 68 | nms_dets = dets 69 | 70 | print('Evaluating detections') 71 | imdb.evaluate_detections(nms_dets) 72 | 73 | 74 | if __name__ == '__main__': 75 | args = parse_args() 76 | 77 | imdb_name = args.imdb_name 78 | from_dets(imdb_name, args.detection_file, args) 79 | -------------------------------------------------------------------------------- /src/tools/tflite_weight_viewer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_hub as hub 3 | from tensorflow_docs.vis import embed 4 | import numpy as np 5 | import cv2 6 | 7 | # Import matplotlib libraries 8 | from matplotlib import pyplot as plt 9 | from matplotlib.collections import LineCollection 10 | import matplotlib.patches as patches 11 | 12 | # Some modules to display an animation using imageio. 13 | import imageio 14 | 15 | 16 | interpreter = tf.lite.Interpreter(model_path="../../models/lite-model_movenet_singlepose_lightning_3.tflite") 17 | interpreter.allocate_tensors() 18 | 19 | ''' 20 | Check input/output details 21 | ''' 22 | input_details = interpreter.get_input_details() 23 | output_details = interpreter.get_output_details() 24 | 25 | print("== Input details ==") 26 | print("name:", input_details[0]['name']) 27 | print("shape:", input_details[0]['shape']) 28 | print("type:", input_details[0]['dtype']) 29 | print("\n== Output details ==") 30 | print("name:", output_details[0]['name']) 31 | print("shape:", output_details[0]['shape']) 32 | print("type:", output_details[0]['dtype']) 33 | 34 | 35 | ''' 36 | This gives a list of dictionaries. 37 | ''' 38 | tensor_details = interpreter.get_tensor_details() 39 | 40 | for dict in tensor_details: 41 | i = dict['index'] 42 | tensor_name = dict['name'] 43 | shape = dict['shape'] 44 | # scales = dict['quantization_parameters']['scales'] 45 | # zero_points = dict['quantization_parameters']['zero_points'] 46 | # tensor = interpreter.tensor(i)() 47 | 48 | print(i, type, tensor_name, shape)# , scales.shape, zero_points.shape, tensor.shape) 49 | -------------------------------------------------------------------------------- /src/tools/vis_pred.py: -------------------------------------------------------------------------------- 1 | import pycocotools.coco as coco 2 | from pycocotools.cocoeval import COCOeval 3 | import sys 4 | import cv2 5 | import numpy as np 6 | import pickle 7 | IMG_PATH = '../../data/coco/val2017/' 8 | ANN_PATH = '../../data/coco/annotations/instances_val2017.json' 9 | DEBUG = True 10 | 11 | def _coco_box_to_bbox(box): 12 | bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]], 13 | dtype=np.int32) 14 | return bbox 15 | 16 | _cat_ids = [ 17 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 18 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 19 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 20 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 21 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 22 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 23 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 24 | 82, 84, 85, 86, 87, 88, 89, 90 25 | ] 26 | num_classes = 80 27 | _classes = { 28 | ind + 1: cat_id for ind, cat_id in enumerate(_cat_ids) 29 | } 30 | _to_order = {cat_id: ind for ind, cat_id in enumerate(_cat_ids)} 31 | coco = coco.COCO(ANN_PATH) 32 | CAT_NAMES = [coco.loadCats([_classes[i + 1]])[0]['name'] \ 33 | for i in range(num_classes)] 34 | COLORS = [((np.random.random((3, )) * 0.6 + 0.4)*255).astype(np.uint8) \ 35 | for _ in range(num_classes)] 36 | 37 | 38 | def add_box(image, bbox, sc, cat_id): 39 | cat_id = _to_order[cat_id] 40 | cat_name = CAT_NAMES[cat_id] 41 | cat_size = cv2.getTextSize(cat_name + '0', cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] 42 | color = np.array(COLORS[cat_id]).astype(np.int32).tolist() 43 | txt = '{}{:.0f}'.format(cat_name, sc * 10) 44 | if bbox[1] - cat_size[1] - 2 < 0: 45 | cv2.rectangle(image, 46 | (bbox[0], bbox[1] + 2), 47 | (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2), 48 | color, -1) 49 | cv2.putText(image, txt, 50 | (bbox[0], bbox[1] + cat_size[1] + 2), 51 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) 52 | else: 53 | cv2.rectangle(image, 54 | (bbox[0], bbox[1] - cat_size[1] - 2), 55 | (bbox[0] + cat_size[0], bbox[1] - 2), 56 | color, -1) 57 | cv2.putText(image, txt, 58 | (bbox[0], bbox[1] - 2), 59 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) 60 | cv2.rectangle(image, 61 | (bbox[0], bbox[1]), 62 | (bbox[2], bbox[3]), 63 | color, 2) 64 | return image 65 | 66 | if __name__ == '__main__': 67 | dets = [] 68 | img_ids = coco.getImgIds() 69 | num_images = len(img_ids) 70 | for k in range(1, len(sys.argv)): 71 | pred_path = sys.argv[k] 72 | dets.append(coco.loadRes(pred_path)) 73 | # import pdb; pdb.set_trace() 74 | for i, img_id in enumerate(img_ids): 75 | img_info = coco.loadImgs(ids=[img_id])[0] 76 | img_path = IMG_PATH + img_info['file_name'] 77 | img = cv2.imread(img_path) 78 | gt_ids = coco.getAnnIds(imgIds=[img_id]) 79 | gts = coco.loadAnns(gt_ids) 80 | gt_img = img.copy() 81 | for j, pred in enumerate(gts): 82 | bbox = _coco_box_to_bbox(pred['bbox']) 83 | cat_id = pred['category_id'] 84 | gt_img = add_box(gt_img, bbox, 0, cat_id) 85 | for k in range(len(dets)): 86 | pred_ids = dets[k].getAnnIds(imgIds=[img_id]) 87 | preds = dets[k].loadAnns(pred_ids) 88 | pred_img = img.copy() 89 | for j, pred in enumerate(preds): 90 | bbox = _coco_box_to_bbox(pred['bbox']) 91 | sc = pred['score'] 92 | cat_id = pred['category_id'] 93 | if sc > 0.2: 94 | pred_img = add_box(pred_img, bbox, sc, cat_id) 95 | cv2.imshow('pred{}'.format(k), pred_img) 96 | # cv2.imwrite('vis/{}_pred{}.png'.format(i, k), pred_img) 97 | cv2.imshow('gt', gt_img) 98 | # cv2.imwrite('vis/{}_gt.png'.format(i), gt_img) 99 | cv2.waitKey() 100 | # coco_eval.evaluate() 101 | # coco_eval.accumulate() 102 | # coco_eval.summarize() 103 | 104 | 105 | --------------------------------------------------------------------------------