├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── configs
    ├── campus
    │   ├── prn32_cpn80x80x20.yaml
    │   └── prn64_cpn80x80x20.yaml
    ├── panoptic
    │   └── resnet50
    │   │   ├── prn32_cpn48x48x12_960x512_cam5.yaml
    │   │   └── prn64_cpn80x80x20_960x512_cam5.yaml
    └── shelf
    │   ├── prn32_cpn48x48x12.yaml
    │   └── prn64_cpn80x80x20.yaml
├── data
    ├── CampusSeq1
    │   ├── calibration_campus.json
    │   └── pred_campus_maskrcnn_hrnet_coco.pkl
    ├── Shelf
    │   ├── calibration_shelf.json
    │   └── pred_shelf_maskrcnn_hrnet_coco.pkl
    ├── panoptic.gif
    ├── panoptic2.gif
    └── panoptic_training_pose.pkl
├── lib
    ├── core
    │   ├── __init__.py
    │   ├── config.py
    │   ├── function.py
    │   ├── loss.py
    │   └── proposal.py
    ├── dataset
    │   ├── JointsDataset.py
    │   ├── __init__.py
    │   ├── campus.py
    │   ├── campus_synthetic.py
    │   ├── panoptic.py
    │   ├── shelf.py
    │   └── shelf_synthetic.py
    ├── models
    │   ├── __init__.py
    │   ├── cuboid_proposal_net.py
    │   ├── multi_person_posenet.py
    │   ├── pose_regression_net.py
    │   ├── pose_resnet.py
    │   ├── project_layer.py
    │   └── v2v_net.py
    └── utils
    │   ├── __init__.py
    │   ├── cameras.py
    │   ├── cameras_cpu.py
    │   ├── transforms.py
    │   ├── utils.py
    │   ├── vis.py
    │   └── zipreader.py
├── requirements.txt
├── run
    ├── _init_paths.py
    ├── train_3d.py
    └── validate_3d.py
└── test
    ├── _init_paths.py
    └── evaluate.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # VoxelPose
  2 | 
  3 | 
  4 | 
  5 | This is the official implementation for:
  6 | > [**VoxelPose: Towards Multi-Camera 3D Human Pose Estimation in Wild Environment**](https://arxiv.org/abs/2004.06239),            
  7 | > Hanyue Tu, Chunyu Wang, Wenjun Zeng        
  8 | > *ECCV 2020 (Oral) ([arXiv 2004.06239](https://arxiv.org/abs/2004.06239))*
  9 | 
 10 | 
 11 | <img src="data/panoptic2.gif" width="800"/>
 12 | 
 13 | 
 14 | ## Installation
 15 | 1. Clone this repo, and we'll call the directory that you cloned multiview-multiperson-pose as ${POSE_ROOT}.
 16 | 2. Install dependencies.
 17 | 
 18 | ## Data preparation
 19 | 
 20 | ### Shelf/Campus datasets
 21 | 1. Download the datasets from http://campar.in.tum.de/Chair/MultiHumanPose and extract them under `${POSE_ROOT}/data/Shelf` and `${POSE_ROOT}/data/CampusSeq1`, respectively.
 22 | 
 23 | 2. We have processed the camera parameters to our formats and you can download them from this repository. They lie in `${POSE_ROOT}/data/Shelf/` and `${POSE_ROOT}/data/CampusSeq1/`,  respectively.
 24 | 
 25 | 3. Due to the limited and incomplete annotations of the two datasets, we don't train our model using this dataset. Instead, we directly use the 2D pose estimator trained on COCO, and use independent 3D human poses from the Panoptic dataset to train our 3D model. It lies in `${POSE_ROOT}/data/panoptic_training_pose.pkl`. See our paper for more details.
 26 | 
 27 | 4. For testing, we first estimate 2D poses and generate 2D heatmaps for these two datasets in this repository.  The predicted poses can also download from the repository. They lie in `${POSE_ROOT}/data/Shelf/` and `${POSE_ROOT}/data/CampusSeq1/`,  respectively. You can also use the models trained on COCO dataset (like HigherHRNet) to generate 2D heatmaps directly.
 28 | 
 29 | The directory tree should look like this:
 30 | ```
 31 | ${POSE_ROOT}
 32 | |-- data
 33 |     |-- Shelf
 34 |     |   |-- Camera0
 35 |     |   |-- ...
 36 |     |   |-- Camera4
 37 |     |   |-- actorsGT.mat
 38 |     |   |-- calibration_shelf.json
 39 |     |   |-- pred_shelf_maskrcnn_hrnet_coco.pkl
 40 |     |-- CampusSeq1
 41 |     |   |-- Camera0
 42 |     |   |-- Camera1
 43 |     |   |-- Camera2
 44 |     |   |-- actorsGT.mat
 45 |     |   |-- calibration_campus.json
 46 |     |   |-- pred_campus_maskrcnn_hrnet_coco.pkl
 47 |     |-- panoptic_training_pose.pkl
 48 | ```
 49 | 
 50 | 
 51 | ### CMU Panoptic dataset
 52 | 1. Download the dataset by following the instructions in [panoptic-toolbox](https://github.com/CMU-Perceptual-Computing-Lab/panoptic-toolbox) and extract them under `${POSE_ROOT}/data/panoptic_toolbox/data`.
 53 | - You can only download those sequences you need. You can also just download a subset of camera views by specifying the number of views (HD_Video_Number) and changing the camera order in `./scripts/getData.sh`. The sequences and camera views used in our project can be obtained from our paper.
 54 | - Note that we only use HD videos,  calibration data, and 3D Body Keypoint in the codes. You can comment out other irrelevant codes such as downloading 3D Face data in `./scripts/getData.sh`.
 55 | 2. Download the pretrained backbone model from [pretrained backbone](https://1drv.ms/u/s!AjX41AtnTHeTjn3H9PGSLcbSC0bl?e=cw7SQg) and place it here: `${POSE_ROOT}/models/pose_resnet50_panoptic.pth.tar` (ResNet-50 pretrained on COCO dataset and finetuned jointly on Panoptic dataset and MPII).
 56 | 
 57 | The directory tree should look like this:
 58 | ```
 59 | ${POSE_ROOT}
 60 | |-- models
 61 | |   |-- pose_resnet50_panoptic.pth.tar
 62 | |-- data
 63 |     |-- panoptic-toolbox
 64 |         |-- data
 65 |             |-- 16060224_haggling1
 66 |             |   |-- hdImgs
 67 |             |   |-- hdvideos
 68 |             |   |-- hdPose3d_stage1_coco19
 69 |             |   |-- calibration_160224_haggling1.json
 70 |             |-- 160226_haggling1  
 71 |             |-- ...
 72 | ```
 73 | 
 74 | ## Training
 75 | ### CMU Panoptic dataset
 76 | 
 77 | Train and validate on the five selected camera views. You can specify the GPU devices and batch size per GPU  in the config file. We trained our models on two GPUs.
 78 | ```
 79 | python run/train_3d.py --cfg configs/panoptic/resnet50/prn64_cpn80x80x20_960x512_cam5.yaml
 80 | ```
 81 | ### Shelf/Campus datasets
 82 | ```
 83 | python run/train_3d.py --cfg configs/shelf/prn64_cpn80x80x20.yaml
 84 | python run/train_3d.py --cfg configs/campus/prn64_cpn80x80x20.yaml
 85 | ```
 86 | 
 87 | ## Evaluation
 88 | ### CMU Panoptic dataset
 89 | 
 90 | Evaluate the models. It will print evaluation results to the screen./
 91 | ```
 92 | python test/evaluate.py --cfg configs/panoptic/resnet50/prn64_cpn80x80x20_960x512_cam5.yaml
 93 | ```
 94 | ### Shelf/Campus datasets
 95 | 
 96 | It will print the PCP results to the screen.
 97 | ```
 98 | python test/evaluate.py --cfg configs/shelf/prn64_cpn80x80x20.yaml
 99 | python test/evaluate.py --cfg configs/campus/prn64_cpn80x80x20.yaml
100 | ```
101 | 
102 | ## Citation
103 | If you use our code or models in your research, please cite with:
104 | ```
105 | @inproceedings{voxelpose,
106 |     author={Tu, Hanyue and Wang, Chunyu and Zeng, Wenjun},
107 |     title={VoxelPose: Towards Multi-Camera 3D Human Pose Estimation in Wild Environment},
108 |     booktitle = {European Conference on Computer Vision (ECCV)},
109 |     year = {2020}
110 | }
111 | ```
112 | 
113 | 
114 | # Contributing
115 | 
116 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
117 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
118 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
119 | 
120 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
121 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
122 | provided by the bot. You will only need to do this once across all repos using our CLA.
123 | 
124 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
125 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
126 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
127 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->


--------------------------------------------------------------------------------
/configs/campus/prn32_cpn80x80x20.yaml:
--------------------------------------------------------------------------------
 1 | CUDNN:
 2 |   BENCHMARK: true
 3 |   DETERMINISTIC: false
 4 |   ENABLED: true
 5 | BACKBONE_MODEL: ''
 6 | MODEL: 'multi_person_posenet'
 7 | DATA_DIR: ''
 8 | GPUS: '0'
 9 | OUTPUT_DIR: 'output'
10 | LOG_DIR: 'log'
11 | WORKERS: 4
12 | PRINT_FREQ: 100
13 | 
14 | DATASET:
15 |   COLOR_RGB: True
16 |   TRAIN_DATASET: 'campus_synthetic'
17 |   TEST_DATASET: 'campus'
18 |   DATA_FORMAT: png
19 |   DATA_AUGMENTATION: False
20 |   FLIP: False
21 |   ROOT: 'data/CampusSeq1'
22 |   ROT_FACTOR: 45
23 |   SCALE_FACTOR: 0.35
24 |   TEST_SUBSET: 'validation'
25 |   TRAIN_SUBSET: 'train'
26 |   ROOTIDX:
27 |     - 2
28 |     - 3
29 |   CAMERA_NUM: 3
30 | NETWORK:
31 |   PRETRAINED_BACKBONE: ''
32 |   PRETRAINED:  ''  # 'models/pytorch/imagenet/resnet50-19c8e357.pth'
33 |   TARGET_TYPE: gaussian
34 |   IMAGE_SIZE:
35 |   - 800
36 |   - 640
37 |   HEATMAP_SIZE:
38 |   - 200
39 |   - 160
40 |   SIGMA: 3
41 |   NUM_JOINTS: 17
42 |   USE_GT: False
43 | LOSS:
44 |   USE_TARGET_WEIGHT: true
45 | TRAIN:
46 |   BATCH_SIZE: 4
47 |   SHUFFLE: true
48 |   BEGIN_EPOCH: 0
49 |   END_EPOCH: 30
50 |   RESUME: true
51 |   OPTIMIZER: adam
52 |   LR: 0.0001
53 | TEST:
54 |   MODEL_FILE: "model_best.pth.tar"
55 |   BATCH_SIZE: 4
56 | DEBUG:
57 |   DEBUG: true
58 |   SAVE_HEATMAPS_GT: true
59 |   SAVE_HEATMAPS_PRED: true
60 | MULTI_PERSON:
61 |   SPACE_SIZE:
62 |     - 12000.0
63 |     - 12000.0
64 |     - 2000.0
65 |   SPACE_CENTER:
66 |     - 3000.0
67 |     - 4500.0
68 |     - 1000.0
69 |   INITIAL_CUBE_SIZE:
70 |     - 80
71 |     - 80
72 |     - 20
73 |   MAX_PEOPLE_NUM: 10
74 |   THRESHOLD: 0.1
75 | PICT_STRUCT:
76 |   GRID_SIZE:
77 |     - 2000.0
78 |     - 2000.0
79 |     - 2000.0
80 |   CUBE_SIZE:
81 |     - 32
82 |     - 32
83 |     - 32
84 | 


--------------------------------------------------------------------------------
/configs/campus/prn64_cpn80x80x20.yaml:
--------------------------------------------------------------------------------
 1 | CUDNN:
 2 |   BENCHMARK: true
 3 |   DETERMINISTIC: false
 4 |   ENABLED: true
 5 | BACKBONE_MODEL: ''
 6 | MODEL: 'multi_person_posenet'
 7 | DATA_DIR: ''
 8 | GPUS: '0'
 9 | OUTPUT_DIR: 'output'
10 | LOG_DIR: 'log'
11 | WORKERS: 4
12 | PRINT_FREQ: 100
13 | 
14 | DATASET:
15 |   COLOR_RGB: True
16 |   TRAIN_DATASET: 'campus_synthetic'
17 |   TEST_DATASET: 'campus'
18 |   DATA_FORMAT: png
19 |   DATA_AUGMENTATION: False
20 |   FLIP: False
21 |   ROOT: 'data/CampusSeq1'
22 |   ROT_FACTOR: 45
23 |   SCALE_FACTOR: 0.35
24 |   TEST_SUBSET: 'validation'
25 |   TRAIN_SUBSET: 'train'
26 |   ROOTIDX:
27 |     - 2
28 |     - 3
29 |   CAMERA_NUM: 3
30 | NETWORK:
31 |   PRETRAINED_BACKBONE: ''
32 |   PRETRAINED:  ''  # 'models/pytorch/imagenet/resnet50-19c8e357.pth'
33 |   TARGET_TYPE: gaussian
34 |   IMAGE_SIZE:
35 |   - 800
36 |   - 640
37 |   HEATMAP_SIZE:
38 |   - 200
39 |   - 160
40 |   SIGMA: 3
41 |   NUM_JOINTS: 17
42 |   USE_GT: False
43 | LOSS:
44 |   USE_TARGET_WEIGHT: true
45 | TRAIN:
46 |   BATCH_SIZE: 1
47 |   SHUFFLE: true
48 |   BEGIN_EPOCH: 0
49 |   END_EPOCH: 30
50 |   RESUME: true
51 |   OPTIMIZER: adam
52 |   LR: 0.0001
53 | TEST:
54 |   MODEL_FILE: "model_best.pth.tar"
55 |   BATCH_SIZE: 4
56 | DEBUG:
57 |   DEBUG: true
58 |   SAVE_HEATMAPS_GT: true
59 |   SAVE_HEATMAPS_PRED: true
60 | MULTI_PERSON:
61 |   SPACE_SIZE:
62 |     - 12000.0
63 |     - 12000.0
64 |     - 2000.0
65 |   SPACE_CENTER:
66 |     - 3000.0
67 |     - 4500.0
68 |     - 1000.0
69 |   INITIAL_CUBE_SIZE:
70 |     - 80
71 |     - 80
72 |     - 20
73 |   MAX_PEOPLE_NUM: 10
74 |   THRESHOLD: 0.1
75 | PICT_STRUCT:
76 |   GRID_SIZE:
77 |     - 2000.0
78 |     - 2000.0
79 |     - 2000.0
80 |   CUBE_SIZE:
81 |     - 64
82 |     - 64
83 |     - 64
84 | 


--------------------------------------------------------------------------------
/configs/panoptic/resnet50/prn32_cpn48x48x12_960x512_cam5.yaml:
--------------------------------------------------------------------------------
 1 | CUDNN:
 2 |   BENCHMARK: true
 3 |   DETERMINISTIC: false
 4 |   ENABLED: true
 5 | BACKBONE_MODEL: 'pose_resnet'
 6 | MODEL: 'multi_person_posenet'
 7 | DATA_DIR: ''
 8 | GPUS: '0'
 9 | OUTPUT_DIR: 'output'
10 | LOG_DIR: 'log'
11 | WORKERS: 4
12 | PRINT_FREQ: 100
13 | 
14 | DATASET:
15 |   COLOR_RGB: True
16 |   TRAIN_DATASET: 'panoptic'
17 |   TEST_DATASET: 'panoptic'
18 |   DATA_FORMAT: jpg
19 |   DATA_AUGMENTATION: False
20 |   FLIP: False
21 |   ROOT: 'data/panoptic-toolbox/data/' # 'data/panoptic/'
22 |   ROT_FACTOR: 45
23 |   SCALE_FACTOR: 0.35
24 |   TEST_SUBSET: 'validation'
25 |   TRAIN_SUBSET: 'train'
26 |   ROOTIDX: 2
27 |   CAMERA_NUM: 5
28 | NETWORK:
29 |   PRETRAINED_BACKBONE: "models/pose_resnet50_panoptic.pth.tar"
30 |   PRETRAINED:  ''  # 'models/pytorch/imagenet/resnet50-19c8e357.pth'
31 |   TARGET_TYPE: gaussian
32 |   IMAGE_SIZE:
33 |   - 960
34 |   - 512
35 |   HEATMAP_SIZE:
36 |   - 240
37 |   - 128
38 |   SIGMA: 3
39 |   NUM_JOINTS: 15
40 |   USE_GT: False
41 | POSE_RESNET:
42 |   FINAL_CONV_KERNEL: 1
43 |   DECONV_WITH_BIAS: False
44 |   NUM_DECONV_LAYERS: 3
45 |   NUM_DECONV_FILTERS:
46 |   - 256
47 |   - 256
48 |   - 256
49 |   NUM_DECONV_KERNELS:
50 |   - 4
51 |   - 4
52 |   - 4
53 |   NUM_LAYERS: 50
54 | LOSS:
55 |   USE_TARGET_WEIGHT: true
56 | TRAIN:
57 |   BATCH_SIZE: 2
58 |   SHUFFLE: true
59 |   BEGIN_EPOCH: 0
60 |   END_EPOCH: 10
61 |   RESUME: true
62 |   OPTIMIZER: adam
63 |   LR: 0.0001
64 | TEST:
65 |   MODEL_FILE: 'model_best.pth.tar'
66 |   BATCH_SIZE: 4
67 | DEBUG:
68 |   DEBUG: true
69 |   SAVE_HEATMAPS_GT: true
70 |   SAVE_HEATMAPS_PRED: true
71 | MULTI_PERSON:
72 |   SPACE_SIZE:
73 |     - 8000.0
74 |     - 8000.0
75 |     - 2000.0
76 |   SPACE_CENTER:
77 |     - 0.0 # 120.0
78 |     - -500.0 # -600.0
79 |     - 800.0
80 |   INITIAL_CUBE_SIZE:
81 |     - 48
82 |     - 48
83 |     - 12
84 |   MAX_PEOPLE_NUM: 10
85 |   THRESHOLD: 0.3
86 | PICT_STRUCT:
87 |   GRID_SIZE:
88 |     - 2000.0
89 |     - 2000.0
90 |     - 2000.0
91 |   CUBE_SIZE:
92 |     - 32
93 |     - 32
94 |     - 32
95 | 


--------------------------------------------------------------------------------
/configs/panoptic/resnet50/prn64_cpn80x80x20_960x512_cam5.yaml:
--------------------------------------------------------------------------------
 1 | CUDNN:
 2 |   BENCHMARK: true
 3 |   DETERMINISTIC: false
 4 |   ENABLED: true
 5 | BACKBONE_MODEL: 'pose_resnet'
 6 | MODEL: 'multi_person_posenet'
 7 | DATA_DIR: ''
 8 | GPUS: '0'
 9 | OUTPUT_DIR: 'output'
10 | LOG_DIR: 'log'
11 | WORKERS: 4
12 | PRINT_FREQ: 100
13 | 
14 | DATASET:
15 |   COLOR_RGB: True
16 |   TRAIN_DATASET: 'panoptic'
17 |   TEST_DATASET: 'panoptic'
18 |   DATA_FORMAT: jpg
19 |   DATA_AUGMENTATION: False
20 |   FLIP: False
21 |   ROOT: 'data/panoptic-toolbox/data/' # 'data/panoptic/'
22 |   ROT_FACTOR: 45
23 |   SCALE_FACTOR: 0.35
24 |   TEST_SUBSET: 'validation'
25 |   TRAIN_SUBSET: 'train'
26 |   ROOTIDX: 2
27 |   CAMERA_NUM: 5
28 | NETWORK:
29 |   PRETRAINED_BACKBONE: "models/pose_resnet50_panoptic.pth.tar"
30 |   PRETRAINED:  ''  # 'models/pytorch/imagenet/resnet50-19c8e357.pth'
31 |   TARGET_TYPE: gaussian
32 |   IMAGE_SIZE:
33 |   - 960
34 |   - 512
35 |   HEATMAP_SIZE:
36 |   - 240
37 |   - 128
38 |   SIGMA: 3
39 |   NUM_JOINTS: 15
40 |   USE_GT: False
41 | POSE_RESNET:
42 |   FINAL_CONV_KERNEL: 1
43 |   DECONV_WITH_BIAS: False
44 |   NUM_DECONV_LAYERS: 3
45 |   NUM_DECONV_FILTERS:
46 |   - 256
47 |   - 256
48 |   - 256
49 |   NUM_DECONV_KERNELS:
50 |   - 4
51 |   - 4
52 |   - 4
53 |   NUM_LAYERS: 50
54 | LOSS:
55 |   USE_TARGET_WEIGHT: true
56 | TRAIN:
57 |   BATCH_SIZE: 1
58 |   SHUFFLE: true
59 |   BEGIN_EPOCH: 0
60 |   END_EPOCH: 10
61 |   RESUME: true
62 |   OPTIMIZER: adam
63 |   LR: 0.0001
64 | TEST:
65 |   MODEL_FILE: 'model_best.pth.tar'
66 |   BATCH_SIZE: 4
67 | DEBUG:
68 |   DEBUG: true
69 |   SAVE_HEATMAPS_GT: true
70 |   SAVE_HEATMAPS_PRED: true
71 | MULTI_PERSON:
72 |   SPACE_SIZE:
73 |     - 8000.0
74 |     - 8000.0
75 |     - 2000.0
76 |   SPACE_CENTER:
77 |     - 0.0 # 120.0
78 |     - -500.0 # -600.0
79 |     - 800.0
80 |   INITIAL_CUBE_SIZE:
81 |     - 80
82 |     - 80
83 |     - 20
84 |   MAX_PEOPLE_NUM: 10
85 |   THRESHOLD: 0.3
86 | PICT_STRUCT:
87 |   GRID_SIZE:
88 |     - 2000.0
89 |     - 2000.0
90 |     - 2000.0
91 |   CUBE_SIZE:
92 |     - 64
93 |     - 64
94 |     - 64
95 | 


--------------------------------------------------------------------------------
/configs/shelf/prn32_cpn48x48x12.yaml:
--------------------------------------------------------------------------------
 1 | CUDNN:
 2 |   BENCHMARK: true
 3 |   DETERMINISTIC: false
 4 |   ENABLED: true
 5 | BACKBONE_MODEL: ''
 6 | MODEL: 'multi_person_posenet'
 7 | DATA_DIR: ''
 8 | GPUS: '0'
 9 | OUTPUT_DIR: 'output'
10 | LOG_DIR: 'log'
11 | WORKERS: 4
12 | PRINT_FREQ: 100
13 | 
14 | DATASET:
15 |   COLOR_RGB: True
16 |   TRAIN_DATASET: 'shelf_synthetic'
17 |   TEST_DATASET: 'shelf'
18 |   DATA_FORMAT: jpg
19 |   DATA_AUGMENTATION: False
20 |   FLIP: False
21 |   ROOT: 'data/Shelf'
22 |   ROT_FACTOR: 45
23 |   SCALE_FACTOR: 0.35
24 |   TEST_SUBSET: 'validation'
25 |   TRAIN_SUBSET: 'train'
26 |   ROOTIDX:
27 |     - 2
28 |     - 3
29 |   CAMERA_NUM: 5
30 | NETWORK:
31 |   PRETRAINED_BACKBONE: ''
32 |   PRETRAINED:  ''  # 'models/pytorch/imagenet/resnet50-19c8e357.pth'
33 |   TARGET_TYPE: gaussian
34 |   IMAGE_SIZE:
35 |   - 800
36 |   - 608
37 |   HEATMAP_SIZE:
38 |   - 200
39 |   - 152
40 |   SIGMA: 3
41 |   NUM_JOINTS: 17
42 |   USE_GT: False
43 | LOSS:
44 |   USE_TARGET_WEIGHT: true
45 | TRAIN:
46 |   BATCH_SIZE: 2
47 |   SHUFFLE: true
48 |   BEGIN_EPOCH: 0
49 |   END_EPOCH: 30
50 |   RESUME: true
51 |   OPTIMIZER: adam
52 |   LR: 0.0001
53 | TEST:
54 |   MODEL_FILE: "model_best.pth.tar"
55 |   BATCH_SIZE: 4
56 | DEBUG:
57 |   DEBUG: true
58 |   SAVE_HEATMAPS_GT: true
59 |   SAVE_HEATMAPS_PRED: true
60 | MULTI_PERSON:
61 |   SPACE_SIZE:
62 |     - 8000.0
63 |     - 8000.0
64 |     - 2000.0
65 |   SPACE_CENTER:
66 |     - 450.0 # 120.0
67 |     - -320.0 # -600.0
68 |     - 800.0
69 |   INITIAL_CUBE_SIZE:
70 |     - 48
71 |     - 48
72 |     - 12
73 |   MAX_PEOPLE_NUM: 10
74 |   THRESHOLD: 0.1
75 | PICT_STRUCT:
76 |   GRID_SIZE:
77 |     - 2000.0
78 |     - 2000.0
79 |     - 2000.0
80 |   CUBE_SIZE:
81 |     - 32
82 |     - 32
83 |     - 32
84 | 


--------------------------------------------------------------------------------
/configs/shelf/prn64_cpn80x80x20.yaml:
--------------------------------------------------------------------------------
 1 | CUDNN:
 2 |   BENCHMARK: true
 3 |   DETERMINISTIC: false
 4 |   ENABLED: true
 5 | BACKBONE_MODEL: ''
 6 | MODEL: 'multi_person_posenet'
 7 | DATA_DIR: ''
 8 | GPUS: '0'
 9 | OUTPUT_DIR: 'output'
10 | LOG_DIR: 'log'
11 | WORKERS: 4
12 | PRINT_FREQ: 100
13 | 
14 | DATASET:
15 |   COLOR_RGB: True
16 |   TRAIN_DATASET: 'shelf_synthetic'
17 |   TEST_DATASET: 'shelf'
18 |   DATA_FORMAT: jpg
19 |   DATA_AUGMENTATION: False
20 |   FLIP: False
21 |   ROOT: 'data/Shelf' # 'data/panoptic/'
22 |   ROT_FACTOR: 45
23 |   SCALE_FACTOR: 0.35
24 |   TEST_SUBSET: 'validation'
25 |   TRAIN_SUBSET: 'train'
26 |   ROOTIDX:
27 |     - 2
28 |     - 3
29 |   CAMERA_NUM: 5
30 | NETWORK:
31 |   PRETRAINED_BACKBONE: ''
32 |   PRETRAINED:  ''  # 'models/pytorch/imagenet/resnet50-19c8e357.pth'
33 |   TARGET_TYPE: gaussian
34 |   IMAGE_SIZE:
35 |   - 800
36 |   - 608
37 |   HEATMAP_SIZE:
38 |   - 200
39 |   - 152
40 |   SIGMA: 3
41 |   NUM_JOINTS: 17
42 |   USE_GT: False
43 | LOSS:
44 |   USE_TARGET_WEIGHT: true
45 | TRAIN:
46 |   BATCH_SIZE: 1
47 |   SHUFFLE: true
48 |   BEGIN_EPOCH: 0
49 |   END_EPOCH: 30
50 |   RESUME: true
51 |   OPTIMIZER: adam
52 |   LR: 0.0001
53 | TEST:
54 |   MODEL_FILE: "model_best.pth.tar"
55 |   BATCH_SIZE: 4
56 | DEBUG:
57 |   DEBUG: true
58 |   SAVE_HEATMAPS_GT: true
59 |   SAVE_HEATMAPS_PRED: true
60 | MULTI_PERSON:
61 |   SPACE_SIZE:
62 |     - 8000.0
63 |     - 8000.0
64 |     - 2000.0
65 |   SPACE_CENTER:
66 |     - 450.0 # 120.0
67 |     - -320.0 # -600.0
68 |     - 800.0
69 |   INITIAL_CUBE_SIZE:
70 |     - 80
71 |     - 80
72 |     - 20
73 |   MAX_PEOPLE_NUM: 10
74 |   THRESHOLD: 0.1
75 | PICT_STRUCT:
76 |   GRID_SIZE:
77 |     - 2000.0
78 |     - 2000.0
79 |     - 2000.0
80 |   CUBE_SIZE:
81 |     - 64
82 |     - 64
83 |     - 64
84 | 


--------------------------------------------------------------------------------
/data/CampusSeq1/calibration_campus.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "0": {
  3 |     "R": [
  4 |       [
  5 |         0.9998819135498813,
  6 |         -0.007627303394110196,
  7 |         -0.013341034396255802
  8 |       ],
  9 |       [
 10 |         -0.01412240122676837,
 11 |         -0.11375390190151916,
 12 |         -0.9934085803866252
 13 |       ],
 14 |       [
 15 |         0.00605943391894462,
 16 |         0.9934796797343738,
 17 |         -0.11384818494586636
 18 |       ]
 19 |     ],
 20 |     "T": [
 21 |       [
 22 |         1774.8953318252247
 23 |       ],
 24 |       [
 25 |         -5051.695948238737
 26 |       ],
 27 |       [
 28 |         1923.3559877015355
 29 |       ]
 30 |     ],
 31 |     "fx": 437.9852173913044,
 32 |     "fy": 437.9852173913044,
 33 |     "cx": 185.3596,
 34 |     "cy": 139.2537,
 35 |     "k": [
 36 |       [
 37 |         0.0
 38 |       ],
 39 |       [
 40 |         0.0
 41 |       ],
 42 |       [
 43 |         0.0
 44 |       ]
 45 |     ],
 46 |     "p": [
 47 |       [
 48 |         0.0
 49 |       ],
 50 |       [
 51 |         0.0
 52 |       ]
 53 |     ]
 54 |   },
 55 |   "1": {
 56 |     "R": [
 57 |       [
 58 |         -0.04633107785835382,
 59 |         -0.9988140384937536,
 60 |         0.014964883303310195
 61 |       ],
 62 |       [
 63 |         -0.13065076504992335,
 64 |         -0.008793265243184023,
 65 |         -0.9913894573164639
 66 |       ],
 67 |       [
 68 |         0.9903452977706073,
 69 |         -0.04788731558734052,
 70 |         -0.1300884168152014
 71 |       ]
 72 |     ],
 73 |     "T": [
 74 |       [
 75 |         -6240.579909342256
 76 |       ],
 77 |       [
 78 |         5247.348264374987
 79 |       ],
 80 |       [
 81 |         1947.3802148598609
 82 |       ]
 83 |     ],
 84 |     "fx": 430.03326086956525,
 85 |     "fy": 430.03326086956525,
 86 |     "cx": 184.0583,
 87 |     "cy": 130.7467,
 88 |     "k": [
 89 |       [
 90 |         0.0
 91 |       ],
 92 |       [
 93 |         0.0
 94 |       ],
 95 |       [
 96 |         0.0
 97 |       ]
 98 |     ],
 99 |     "p": [
100 |       [
101 |         0.0
102 |       ],
103 |       [
104 |         0.0
105 |       ]
106 |     ]
107 |   },
108 |   "2": {
109 |     "R": [
110 |       [
111 |         0.5386991962445586,
112 |         0.8424723621738047,
113 |         -0.006595069276080057
114 |       ],
115 |       [
116 |         0.10782367722838201,
117 |         -0.07670471706694504,
118 |         -0.9912065581949252
119 |       ],
120 |       [
121 |         -0.835570003407504,
122 |         0.5332510715910186,
123 |         -0.13215923748499042
124 |       ]
125 |     ],
126 |     "T": [
127 |       [
128 |         11943.56106545541
129 |       ],
130 |       [
131 |         -1803.8527374133198
132 |       ],
133 |       [
134 |         1973.3939116534714
135 |       ]
136 |     ],
137 |     "fx": 700.9856521739131,
138 |     "fy": 700.9856521739131,
139 |     "cx": 167.59475,
140 |     "cy": 142.0545,
141 |     "k": [
142 |       [
143 |         0.0
144 |       ],
145 |       [
146 |         0.0
147 |       ],
148 |       [
149 |         0.0
150 |       ]
151 |     ],
152 |     "p": [
153 |       [
154 |         0.0
155 |       ],
156 |       [
157 |         0.0
158 |       ]
159 |     ]
160 |   }
161 | }


--------------------------------------------------------------------------------
/data/CampusSeq1/pred_campus_maskrcnn_hrnet_coco.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/voxelpose-pytorch/9ef5d407a597c9647b2c8f6c0a246b725a87a054/data/CampusSeq1/pred_campus_maskrcnn_hrnet_coco.pkl


--------------------------------------------------------------------------------
/data/Shelf/calibration_shelf.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "0": {
  3 |     "k": [
  4 |       [
  5 |         0.0
  6 |       ],
  7 |       [
  8 |         0.0
  9 |       ],
 10 |       [
 11 |         0.0
 12 |       ]
 13 |     ],
 14 |     "p": [
 15 |       [
 16 |         0.0
 17 |       ],
 18 |       [
 19 |         0.0
 20 |       ]
 21 |     ],
 22 |     "R": [
 23 |       [
 24 |         0.650977,
 25 |         -0.758717,
 26 |         0.024027
 27 |       ],
 28 |       [
 29 |         -0.018862,
 30 |         -0.04781,
 31 |         -0.998678
 32 |       ],
 33 |       [
 34 |         0.758863,
 35 |         0.649664,
 36 |         -0.045434
 37 |       ]
 38 |     ],
 39 |     "T": [
 40 |       [
 41 |         -1586.4496077989998
 42 |       ],
 43 |       [
 44 |         -2109.46905869
 45 |       ],
 46 |       [
 47 |         1104.209800652
 48 |       ]
 49 |     ],
 50 |     "fx": 1063.512085,
 51 |     "fy": 1071.863647,
 52 |     "cx": 511.738251,
 53 |     "cy": 350.088287
 54 |   },
 55 |   "1": {
 56 |     "k": [
 57 |       [
 58 |         0.0
 59 |       ],
 60 |       [
 61 |         0.0
 62 |       ],
 63 |       [
 64 |         0.0
 65 |       ]
 66 |     ],
 67 |     "p": [
 68 |       [
 69 |         0.0
 70 |       ],
 71 |       [
 72 |         0.0
 73 |       ]
 74 |     ],
 75 |     "R": [
 76 |       [
 77 |         -0.016771,
 78 |         -0.999835,
 79 |         0.006926
 80 |       ],
 81 |       [
 82 |         -0.029435,
 83 |         -0.006431,
 84 |         -0.999546
 85 |       ],
 86 |       [
 87 |         0.999426,
 88 |         -0.016967,
 89 |         -0.029322
 90 |       ]
 91 |     ],
 92 |     "T": [
 93 |       [
 94 |         -3512.391424833
 95 |       ],
 96 |       [
 97 |         311.47771461800005
 98 |       ],
 99 |       [
100 |         964.5481307480001
101 |       ]
102 |     ],
103 |     "fx": 1097.697754,
104 |     "fy": 1086.668457,
105 |     "cx": 521.652161,
106 |     "cy": 376.587067
107 |   },
108 |   "2": {
109 |     "k": [
110 |       [
111 |         0.0
112 |       ],
113 |       [
114 |         0.0
115 |       ],
116 |       [
117 |         0.0
118 |       ]
119 |     ],
120 |     "p": [
121 |       [
122 |         0.0
123 |       ],
124 |       [
125 |         0.0
126 |       ]
127 |     ],
128 |     "R": [
129 |       [
130 |         -0.789986,
131 |         -0.610527,
132 |         0.05638
133 |       ],
134 |       [
135 |         -0.370413,
136 |         0.401962,
137 |         -0.837389
138 |       ],
139 |       [
140 |         0.488586,
141 |         -0.68241,
142 |         -0.543691
143 |       ]
144 |     ],
145 |     "T": [
146 |       [
147 |         -1420.944211509
148 |       ],
149 |       [
150 |         2546.574076866
151 |       ],
152 |       [
153 |         2688.8728944060003
154 |       ]
155 |     ],
156 |     "fx": 1130.065552,
157 |     "fy": 1112.470337,
158 |     "cx": 566.884338,
159 |     "cy": 375.212708
160 |   },
161 |   "3": {
162 |     "k": [
163 |       [
164 |         0.0
165 |       ],
166 |       [
167 |         0.0
168 |       ],
169 |       [
170 |         0.0
171 |       ]
172 |     ],
173 |     "p": [
174 |       [
175 |         0.0
176 |       ],
177 |       [
178 |         0.0
179 |       ]
180 |     ],
181 |     "R": [
182 |       [
183 |         -0.970568,
184 |         0.235647,
185 |         -0.049676
186 |       ],
187 |       [
188 |         0.09763,
189 |         0.196438,
190 |         -0.975644
191 |       ],
192 |       [
193 |         -0.22015,
194 |         -0.951779,
195 |         -0.213663
196 |       ]
197 |     ],
198 |     "T": [
199 |       [
200 |         963.489306486
201 |       ],
202 |       [
203 |         3408.674914882
204 |       ],
205 |       [
206 |         1422.035001899
207 |       ]
208 |     ],
209 |     "fx": 1056.162598,
210 |     "fy": 1059.639648,
211 |     "cx": 552.43573,
212 |     "cy": 393.180389
213 |   },
214 |   "4": {
215 |     "k": [
216 |       [
217 |         0.0
218 |       ],
219 |       [
220 |         0.0
221 |       ],
222 |       [
223 |         0.0
224 |       ]
225 |     ],
226 |     "p": [
227 |       [
228 |         0.0
229 |       ],
230 |       [
231 |         0.0
232 |       ]
233 |     ],
234 |     "R": [
235 |       [
236 |         -0.194109,
237 |         0.980554,
238 |         -0.028888
239 |       ],
240 |       [
241 |         0.233045,
242 |         0.017488,
243 |         -0.972309
244 |       ],
245 |       [
246 |         -0.952896,
247 |         -0.195466,
248 |         -0.231908
249 |       ]
250 |     ],
251 |     "T": [
252 |       [
253 |         3832.020978729
254 |       ],
255 |       [
256 |         273.55271850000014
257 |       ],
258 |       [
259 |         1439.4616998990002
260 |       ]
261 |     ],
262 |     "fx": 1089.654175,
263 |     "fy": 1080.99939,
264 |     "cx": 498.32962,
265 |     "cy": 359.514832
266 |   }
267 | }


--------------------------------------------------------------------------------
/data/Shelf/pred_shelf_maskrcnn_hrnet_coco.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/voxelpose-pytorch/9ef5d407a597c9647b2c8f6c0a246b725a87a054/data/Shelf/pred_shelf_maskrcnn_hrnet_coco.pkl


--------------------------------------------------------------------------------
/data/panoptic.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/voxelpose-pytorch/9ef5d407a597c9647b2c8f6c0a246b725a87a054/data/panoptic.gif


--------------------------------------------------------------------------------
/data/panoptic2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/voxelpose-pytorch/9ef5d407a597c9647b2c8f6c0a246b725a87a054/data/panoptic2.gif


--------------------------------------------------------------------------------
/data/panoptic_training_pose.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/voxelpose-pytorch/9ef5d407a597c9647b2c8f6c0a246b725a87a054/data/panoptic_training_pose.pkl


--------------------------------------------------------------------------------
/lib/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/voxelpose-pytorch/9ef5d407a597c9647b2c8f6c0a246b725a87a054/lib/core/__init__.py


--------------------------------------------------------------------------------
/lib/core/config.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import os
 11 | import yaml
 12 | 
 13 | import numpy as np
 14 | from easydict import EasyDict as edict
 15 | 
 16 | config = edict()
 17 | 
 18 | config.OUTPUT_DIR = 'output'
 19 | config.LOG_DIR = 'log'
 20 | config.DATA_DIR = ''
 21 | config.BACKBONE_MODEL = 'pose_resnet'
 22 | config.MODEL = 'multi_person_posenet'
 23 | config.GPUS = '0,1'
 24 | config.WORKERS = 8
 25 | config.PRINT_FREQ = 100
 26 | 
 27 | # higherhrnet definition
 28 | config.MODEL_EXTRA = edict()
 29 | config.MODEL_EXTRA.PRETRAINED_LAYERS = ['*']
 30 | config.MODEL_EXTRA.FINAL_CONV_KERNEL = 1
 31 | config.MODEL_EXTRA.STEM_INPLANES = 64
 32 | 
 33 | config.MODEL_EXTRA.STAGE2 = edict()
 34 | config.MODEL_EXTRA.STAGE2.NUM_MODULES = 1
 35 | config.MODEL_EXTRA.STAGE2.NUM_BRANCHES= 2
 36 | config.MODEL_EXTRA.STAGE2.BLOCK = 'BASIC'
 37 | config.MODEL_EXTRA.STAGE2.NUM_BLOCKS = [4, 4]
 38 | config.MODEL_EXTRA.STAGE2.NUM_CHANNELS = [48, 96]
 39 | config.MODEL_EXTRA.STAGE2.FUSE_METHOD = 'SUM'
 40 | 
 41 | config.MODEL_EXTRA.STAGE3 = edict()
 42 | config.MODEL_EXTRA.STAGE3.NUM_MODULES = 4
 43 | config.MODEL_EXTRA.STAGE3.NUM_BRANCHES = 3
 44 | config.MODEL_EXTRA.STAGE3.BLOCK = 'BASIC'
 45 | config.MODEL_EXTRA.STAGE3.NUM_BLOCKS = [4, 4, 4]
 46 | config.MODEL_EXTRA.STAGE3.NUM_CHANNELS = [48, 96, 192]
 47 | config.MODEL_EXTRA.STAGE3.FUSE_METHOD = 'SUM'
 48 | 
 49 | config.MODEL_EXTRA.STAGE4 = edict()
 50 | config.MODEL_EXTRA.STAGE4.NUM_MODULES = 3
 51 | config.MODEL_EXTRA.STAGE4.NUM_BRANCHES = 4
 52 | config.MODEL_EXTRA.STAGE4.BLOCK = 'BASIC'
 53 | config.MODEL_EXTRA.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
 54 | config.MODEL_EXTRA.STAGE4.NUM_CHANNELS = [48, 96, 192, 384]
 55 | config.MODEL_EXTRA.STAGE4.FUSE_METHOD = 'SUM'
 56 | 
 57 | config.MODEL_EXTRA.DECONV = edict()
 58 | config.MODEL_EXTRA.DECONV.NUM_DECONVS = 1
 59 | config.MODEL_EXTRA.DECONV.NUM_CHANNELS = 32
 60 | config.MODEL_EXTRA.DECONV.KERNEL_SIZE = 4
 61 | config.MODEL_EXTRA.DECONV.NUM_BASIC_BLOCKS = 4
 62 | config.MODEL_EXTRA.DECONV.CAT_OUTPUT = True
 63 | 
 64 | # Cudnn related params
 65 | config.CUDNN = edict()
 66 | config.CUDNN.BENCHMARK = True
 67 | config.CUDNN.DETERMINISTIC = False
 68 | config.CUDNN.ENABLED = True
 69 | 
 70 | # common params for NETWORK
 71 | config.NETWORK = edict()
 72 | config.NETWORK.PRETRAINED = 'models/pytorch/imagenet/resnet50-19c8e357.pth'
 73 | config.NETWORK.PRETRAINED_BACKBONE = ''
 74 | config.NETWORK.NUM_JOINTS = 20
 75 | config.NETWORK.INPUT_SIZE = 512
 76 | config.NETWORK.HEATMAP_SIZE = np.array([80, 80])
 77 | config.NETWORK.IMAGE_SIZE = np.array([320, 320])
 78 | config.NETWORK.SIGMA = 2
 79 | config.NETWORK.TARGET_TYPE = 'gaussian'
 80 | config.NETWORK.AGGRE = True
 81 | config.NETWORK.USE_GT = False
 82 | config.NETWORK.BETA = 100.0
 83 | 
 84 | # pose_resnet related params
 85 | config.POSE_RESNET = edict()
 86 | config.POSE_RESNET.NUM_LAYERS = 50
 87 | config.POSE_RESNET.DECONV_WITH_BIAS = False
 88 | config.POSE_RESNET.NUM_DECONV_LAYERS = 3
 89 | config.POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256]
 90 | config.POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4]
 91 | config.POSE_RESNET.FINAL_CONV_KERNEL = 1
 92 | 
 93 | config.LOSS = edict()
 94 | config.LOSS.USE_TARGET_WEIGHT = True
 95 | config.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False
 96 | 
 97 | # DATASET related params
 98 | config.DATASET = edict()
 99 | config.DATASET.ROOT = '../data/h36m/'
100 | config.DATASET.TRAIN_DATASET = 'mixed_dataset'
101 | config.DATASET.TEST_DATASET = 'multi_view_h36m'
102 | config.DATASET.TRAIN_SUBSET = 'train'
103 | config.DATASET.TEST_SUBSET = 'validation'
104 | config.DATASET.ROOTIDX = 2
105 | config.DATASET.DATA_FORMAT = 'jpg'
106 | config.DATASET.BBOX = 2000
107 | config.DATASET.CROP = True
108 | config.DATASET.COLOR_RGB = False
109 | config.DATASET.FLIP = True
110 | config.DATASET.DATA_AUGMENTATION = True
111 | config.DATASET.CAMERA_NUM = 5
112 | 
113 | # training data augmentation
114 | config.DATASET.SCALE_FACTOR = 0
115 | config.DATASET.ROT_FACTOR = 0
116 | 
117 | # train
118 | config.TRAIN = edict()
119 | config.TRAIN.LR_FACTOR = 0.1
120 | config.TRAIN.LR_STEP = [90, 110]
121 | config.TRAIN.LR = 0.001
122 | 
123 | config.TRAIN.OPTIMIZER = 'adam'
124 | config.TRAIN.MOMENTUM = 0.9
125 | config.TRAIN.WD = 0.0001
126 | config.TRAIN.NESTEROV = False
127 | config.TRAIN.GAMMA1 = 0.99
128 | config.TRAIN.GAMMA2 = 0.0
129 | 
130 | config.TRAIN.BEGIN_EPOCH = 0
131 | config.TRAIN.END_EPOCH = 140
132 | 
133 | config.TRAIN.RESUME = False
134 | 
135 | config.TRAIN.BATCH_SIZE = 8
136 | config.TRAIN.SHUFFLE = True
137 | 
138 | # testing
139 | config.TEST = edict()
140 | config.TEST.BATCH_SIZE = 8
141 | config.TEST.STATE = 'best'
142 | config.TEST.FLIP_TEST = False
143 | config.TEST.POST_PROCESS = False
144 | config.TEST.SHIFT_HEATMAP = False
145 | config.TEST.USE_GT_BBOX = False
146 | config.TEST.IMAGE_THRE = 0.1
147 | config.TEST.NMS_THRE = 0.6
148 | config.TEST.OKS_THRE = 0.5
149 | config.TEST.IN_VIS_THRE = 0.0
150 | config.TEST.BBOX_FILE = ''
151 | config.TEST.BBOX_THRE = 1.0
152 | config.TEST.MATCH_IOU_THRE = 0.3
153 | config.TEST.DETECTOR = 'fpn_dcn'
154 | config.TEST.DETECTOR_DIR = ''
155 | config.TEST.MODEL_FILE = ''
156 | config.TEST.HEATMAP_LOCATION_FILE = 'predicted_heatmaps.h5'
157 | 
158 | # debug
159 | config.DEBUG = edict()
160 | config.DEBUG.DEBUG = True
161 | config.DEBUG.SAVE_BATCH_IMAGES_GT = True
162 | config.DEBUG.SAVE_BATCH_IMAGES_PRED = True
163 | config.DEBUG.SAVE_HEATMAPS_GT = True
164 | config.DEBUG.SAVE_HEATMAPS_PRED = True
165 | 
166 | # pictorial structure
167 | config.PICT_STRUCT = edict()
168 | config.PICT_STRUCT.FIRST_NBINS = 16
169 | config.PICT_STRUCT.PAIRWISE_FILE = ''
170 | config.PICT_STRUCT.RECUR_NBINS = 2
171 | config.PICT_STRUCT.RECUR_DEPTH = 10
172 | config.PICT_STRUCT.LIMB_LENGTH_TOLERANCE = 150
173 | config.PICT_STRUCT.GRID_SIZE = np.array([2000.0, 2000.0, 2000.0])
174 | config.PICT_STRUCT.CUBE_SIZE = np.array([64, 64, 64])
175 | config.PICT_STRUCT.DEBUG = False
176 | config.PICT_STRUCT.TEST_PAIRWISE = False
177 | config.PICT_STRUCT.SHOW_ORIIMG = False
178 | config.PICT_STRUCT.SHOW_CROPIMG = False
179 | config.PICT_STRUCT.SHOW_HEATIMG = False
180 | 
181 | config.MULTI_PERSON = edict()
182 | config.MULTI_PERSON.SPACE_SIZE = np.array([4000.0, 5200.0, 2400.0])
183 | config.MULTI_PERSON.SPACE_CENTER = np.array([300.0, 300.0, 300.0])
184 | config.MULTI_PERSON.INITIAL_CUBE_SIZE = np.array([24, 32, 16])
185 | config.MULTI_PERSON.MAX_PEOPLE_NUM = 10
186 | config.MULTI_PERSON.THRESHOLD = 0.1
187 | 
188 | 
189 | def _update_dict(k, v):
190 |     if k == 'DATASET':
191 |         if 'MEAN' in v and v['MEAN']:
192 |             v['MEAN'] = np.array(
193 |                 [eval(x) if isinstance(x, str) else x for x in v['MEAN']])
194 |         if 'STD' in v and v['STD']:
195 |             v['STD'] = np.array(
196 |                 [eval(x) if isinstance(x, str) else x for x in v['STD']])
197 |     if k == 'NETWORK':
198 |         if 'HEATMAP_SIZE' in v:
199 |             if isinstance(v['HEATMAP_SIZE'], int):
200 |                 v['HEATMAP_SIZE'] = np.array(
201 |                     [v['HEATMAP_SIZE'], v['HEATMAP_SIZE']])
202 |             else:
203 |                 v['HEATMAP_SIZE'] = np.array(v['HEATMAP_SIZE'])
204 |         if 'IMAGE_SIZE' in v:
205 |             if isinstance(v['IMAGE_SIZE'], int):
206 |                 v['IMAGE_SIZE'] = np.array([v['IMAGE_SIZE'], v['IMAGE_SIZE']])
207 |             else:
208 |                 v['IMAGE_SIZE'] = np.array(v['IMAGE_SIZE'])
209 |     for vk, vv in v.items():
210 |         if vk in config[k]:
211 |             config[k][vk] = vv
212 |         else:
213 |             raise ValueError("{}.{} not exist in config.py".format(k, vk))
214 | 
215 | 
216 | def update_config(config_file):
217 |     exp_config = None
218 |     with open(config_file) as f:
219 |         exp_config = edict(yaml.load(f, Loader=yaml.FullLoader))
220 |         for k, v in exp_config.items():
221 |             if k in config:
222 |                 if isinstance(v, dict):
223 |                     _update_dict(k, v)
224 |                 else:
225 |                     if k == 'SCALES':
226 |                         config[k][0] = (tuple(v))
227 |                     else:
228 |                         config[k] = v
229 |             else:
230 |                 raise ValueError("{} not exist in config.py".format(k))
231 | 
232 | 
233 | def gen_config(config_file):
234 |     cfg = dict(config)
235 |     for k, v in cfg.items():
236 |         if isinstance(v, edict):
237 |             cfg[k] = dict(v)
238 | 
239 |     with open(config_file, 'w') as f:
240 |         yaml.dump(dict(cfg), f, default_flow_style=False)
241 | 
242 | 
243 | def update_dir(model_dir, log_dir, data_dir):
244 |     if model_dir:
245 |         config.OUTPUT_DIR = model_dir
246 | 
247 |     if log_dir:
248 |         config.LOG_DIR = log_dir
249 | 
250 |     if data_dir:
251 |         config.DATA_DIR = data_dir
252 | 
253 |     config.DATASET.ROOT = os.path.join(config.DATA_DIR, config.DATASET.ROOT)
254 | 
255 |     config.TEST.BBOX_FILE = os.path.join(config.DATA_DIR, config.TEST.BBOX_FILE)
256 | 
257 |     config.NETWORK.PRETRAINED = os.path.join(config.DATA_DIR,
258 |                                              config.NETWORK.PRETRAINED)
259 | 
260 | 
261 | def get_model_name(cfg):
262 |     name = '{model}_{num_layers}'.format(
263 |         model=cfg.MODEL, num_layers=cfg.POSE_RESNET.NUM_LAYERS)
264 |     deconv_suffix = ''.join(
265 |         'd{}'.format(num_filters)
266 |         for num_filters in cfg.POSE_RESNET.NUM_DECONV_FILTERS)
267 |     full_name = '{height}x{width}_{name}_{deconv_suffix}'.format(
268 |         height=cfg.NETWORK.IMAGE_SIZE[1],
269 |         width=cfg.NETWORK.IMAGE_SIZE[0],
270 |         name=name,
271 |         deconv_suffix=deconv_suffix)
272 | 
273 |     return name, full_name
274 | 
275 | 
276 | if __name__ == '__main__':
277 |     import sys
278 |     gen_config(sys.argv[1])
279 | 


--------------------------------------------------------------------------------
/lib/core/function.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import time
  6 | import logging
  7 | import os
  8 | import copy
  9 | 
 10 | import torch
 11 | import numpy as np
 12 | 
 13 | from utils.vis import save_debug_images_multi
 14 | from utils.vis import save_debug_3d_images
 15 | from utils.vis import save_debug_3d_cubes
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | def train_3d(config, model, optimizer, loader, epoch, output_dir, writer_dict, device=torch.device('cuda'), dtype=torch.float):
 21 |     batch_time = AverageMeter()
 22 |     data_time = AverageMeter()
 23 |     losses = AverageMeter()
 24 |     losses_2d = AverageMeter()
 25 |     losses_3d = AverageMeter()
 26 |     losses_cord = AverageMeter()
 27 | 
 28 |     model.train()
 29 | 
 30 |     if model.module.backbone is not None:
 31 |         model.module.backbone.eval()  # Comment out this line if you want to train 2D backbone jointly
 32 | 
 33 |     accumulation_steps = 4
 34 |     accu_loss_3d = 0
 35 | 
 36 |     end = time.time()
 37 |     for i, (inputs, targets_2d, weights_2d, targets_3d, meta, input_heatmap) in enumerate(loader):
 38 |         data_time.update(time.time() - end)
 39 | 
 40 |         if 'panoptic' in config.DATASET.TEST_DATASET:
 41 |             pred, heatmaps, grid_centers, loss_2d, loss_3d, loss_cord = model(views=inputs, meta=meta,
 42 |                                                                               targets_2d=targets_2d,
 43 |                                                                               weights_2d=weights_2d,
 44 |                                                                               targets_3d=targets_3d[0])
 45 |         elif 'campus' in config.DATASET.TEST_DATASET or 'shelf' in config.DATASET.TEST_DATASET:
 46 |             pred, heatmaps, grid_centers, loss_2d, loss_3d, loss_cord = model(meta=meta, targets_3d=targets_3d[0],
 47 |                                                                               input_heatmaps=input_heatmap)
 48 | 
 49 |         loss_2d = loss_2d.mean()
 50 |         loss_3d = loss_3d.mean()
 51 |         loss_cord = loss_cord.mean()
 52 | 
 53 |         losses_2d.update(loss_2d.item())
 54 |         losses_3d.update(loss_3d.item())
 55 |         losses_cord.update(loss_cord.item())
 56 |         loss = loss_2d + loss_3d + loss_cord
 57 |         losses.update(loss.item())
 58 | 
 59 |         if loss_cord > 0:
 60 |             optimizer.zero_grad()
 61 |             (loss_2d + loss_cord).backward()
 62 |             optimizer.step()
 63 | 
 64 |         if accu_loss_3d > 0 and (i + 1) % accumulation_steps == 0:
 65 |             optimizer.zero_grad()
 66 |             accu_loss_3d.backward()
 67 |             optimizer.step()
 68 |             accu_loss_3d = 0.0
 69 |         else:
 70 |             accu_loss_3d += loss_3d / accumulation_steps
 71 | 
 72 |         batch_time.update(time.time() - end)
 73 |         end = time.time()
 74 | 
 75 |         if i % config.PRINT_FREQ == 0:
 76 |             gpu_memory_usage = torch.cuda.memory_allocated(0)
 77 |             msg = 'Epoch: [{0}][{1}/{2}]\t' \
 78 |                   'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
 79 |                   'Speed: {speed:.1f} samples/s\t' \
 80 |                   'Data: {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
 81 |                   'Loss: {loss.val:.6f} ({loss.avg:.6f})\t' \
 82 |                   'Loss_2d: {loss_2d.val:.7f} ({loss_2d.avg:.7f})\t' \
 83 |                   'Loss_3d: {loss_3d.val:.7f} ({loss_3d.avg:.7f})\t' \
 84 |                   'Loss_cord: {loss_cord.val:.6f} ({loss_cord.avg:.6f})\t' \
 85 |                   'Memory {memory:.1f}'.format(
 86 |                     epoch, i, len(loader), batch_time=batch_time,
 87 |                     speed=len(inputs) * inputs[0].size(0) / batch_time.val,
 88 |                     data_time=data_time, loss=losses, loss_2d=losses_2d, loss_3d=losses_3d,
 89 |                     loss_cord=losses_cord, memory=gpu_memory_usage)
 90 |             logger.info(msg)
 91 | 
 92 |             writer = writer_dict['writer']
 93 |             global_steps = writer_dict['train_global_steps']
 94 |             writer.add_scalar('train_loss_3d', losses_3d.val, global_steps)
 95 |             writer.add_scalar('train_loss_cord', losses_cord.val, global_steps)
 96 |             writer.add_scalar('train_loss', losses.val, global_steps)
 97 |             writer_dict['train_global_steps'] = global_steps + 1
 98 | 
 99 |             for k in range(len(inputs)):
100 |                 view_name = 'view_{}'.format(k + 1)
101 |                 prefix = '{}_{:08}_{}'.format(
102 |                     os.path.join(output_dir, 'train'), i, view_name)
103 |                 save_debug_images_multi(config, inputs[k], meta[k], targets_2d[k], heatmaps[k], prefix)
104 |             prefix2 = '{}_{:08}'.format(
105 |                 os.path.join(output_dir, 'train'), i)
106 | 
107 |             save_debug_3d_cubes(config, meta[0], grid_centers, prefix2)
108 |             save_debug_3d_images(config, meta[0], pred, prefix2)
109 | 
110 | 
111 | def validate_3d(config, model, loader, output_dir):
112 |     batch_time = AverageMeter()
113 |     data_time = AverageMeter()
114 |     model.eval()
115 | 
116 |     preds = []
117 |     with torch.no_grad():
118 |         end = time.time()
119 |         for i, (inputs, targets_2d, weights_2d, targets_3d, meta, input_heatmap) in enumerate(loader):
120 |             data_time.update(time.time() - end)
121 |             if 'panoptic' in config.DATASET.TEST_DATASET:
122 |                 pred, heatmaps, grid_centers, _, _, _ = model(views=inputs, meta=meta, targets_2d=targets_2d,
123 |                                                               weights_2d=weights_2d, targets_3d=targets_3d[0])
124 |             elif 'campus' in config.DATASET.TEST_DATASET or 'shelf' in config.DATASET.TEST_DATASET:
125 |                 pred, heatmaps, grid_centers, _, _, _ = model(meta=meta, targets_3d=targets_3d[0],
126 |                                                               input_heatmaps=input_heatmap)
127 |             pred = pred.detach().cpu().numpy()
128 |             for b in range(pred.shape[0]):
129 |                 preds.append(pred[b])
130 | 
131 |             batch_time.update(time.time() - end)
132 |             end = time.time()
133 |             if i % config.PRINT_FREQ == 0 or i == len(loader) - 1:
134 |                 gpu_memory_usage = torch.cuda.memory_allocated(0)
135 |                 msg = 'Test: [{0}/{1}]\t' \
136 |                       'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
137 |                       'Speed: {speed:.1f} samples/s\t' \
138 |                       'Data: {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
139 |                       'Memory {memory:.1f}'.format(
140 |                         i, len(loader), batch_time=batch_time,
141 |                         speed=len(inputs) * inputs[0].size(0) / batch_time.val,
142 |                         data_time=data_time, memory=gpu_memory_usage)
143 |                 logger.info(msg)
144 | 
145 |                 for k in range(len(inputs)):
146 |                     view_name = 'view_{}'.format(k + 1)
147 |                     prefix = '{}_{:08}_{}'.format(
148 |                         os.path.join(output_dir, 'validation'), i, view_name)
149 |                     save_debug_images_multi(config, inputs[k], meta[k], targets_2d[k], heatmaps[k], prefix)
150 |                 prefix2 = '{}_{:08}'.format(
151 |                     os.path.join(output_dir, 'validation'), i)
152 | 
153 |                 save_debug_3d_cubes(config, meta[0], grid_centers, prefix2)
154 |                 save_debug_3d_images(config, meta[0], pred, prefix2)
155 | 
156 |     metric = None
157 |     if 'panoptic' in config.DATASET.TEST_DATASET:
158 |         aps, _, mpjpe, recall = loader.dataset.evaluate(preds)
159 |         msg = 'ap@25: {aps_25:.4f}\tap@50: {aps_50:.4f}\tap@75: {aps_75:.4f}\t' \
160 |               'ap@100: {aps_100:.4f}\tap@125: {aps_125:.4f}\tap@150: {aps_150:.4f}\t' \
161 |               'recall@500mm: {recall:.4f}\tmpjpe@500mm: {mpjpe:.3f}'.format(
162 |                 aps_25=aps[0], aps_50=aps[1], aps_75=aps[2], aps_100=aps[3],
163 |                 aps_125=aps[4], aps_150=aps[5], recall=recall, mpjpe=mpjpe
164 |               )
165 |         logger.info(msg)
166 |         metric = np.mean(aps)
167 |     elif 'campus' in config.DATASET.TEST_DATASET or 'shelf' in config.DATASET.TEST_DATASET:
168 |         actor_pcp, avg_pcp, _, recall = loader.dataset.evaluate(preds)
169 |         msg = '     | Actor 1 | Actor 2 | Actor 3 | Average | \n' \
170 |               ' PCP |  {pcp_1:.2f}  |  {pcp_2:.2f}  |  {pcp_3:.2f}  |  {pcp_avg:.2f}  |\t Recall@500mm: {recall:.4f}'.format(
171 |                 pcp_1=actor_pcp[0]*100, pcp_2=actor_pcp[1]*100, pcp_3=actor_pcp[2]*100, pcp_avg=avg_pcp*100, recall=recall)
172 |         logger.info(msg)
173 |         metric = np.mean(avg_pcp)
174 | 
175 |     return metric
176 | 
177 | 
178 | class AverageMeter(object):
179 |     """Computes and stores the average and current value"""
180 | 
181 |     def __init__(self):
182 |         self.reset()
183 | 
184 |     def reset(self):
185 |         self.val = 0
186 |         self.avg = 0
187 |         self.sum = 0
188 |         self.count = 0
189 | 
190 |     def update(self, val, n=1):
191 |         self.val = val
192 |         self.sum += val * n
193 |         self.count += n
194 |         self.avg = self.sum / self.count
195 | 


--------------------------------------------------------------------------------
/lib/core/loss.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import torch.nn as nn
11 | 
12 | 
13 | class JointsMSELoss(nn.Module):
14 |     def __init__(self, use_target_weight):
15 |         super(JointsMSELoss, self).__init__()
16 |         self.criterion = nn.MSELoss(reduction='mean')
17 |         self.use_target_weight = use_target_weight
18 | 
19 |     def forward(self, output, target, target_weight):
20 |         batch_size = output.size(0)
21 |         num_joints = output.size(1)
22 |         heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1)
23 |         heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
24 |         loss = 0
25 | 
26 |         for idx in range(num_joints):
27 |             heatmap_pred = heatmaps_pred[idx].squeeze()
28 |             heatmap_gt = heatmaps_gt[idx].squeeze()
29 |             if self.use_target_weight:
30 |                 loss += self.criterion(heatmap_pred.mul(target_weight[:, idx]),
31 |                                        heatmap_gt.mul(target_weight[:, idx]))
32 |             else:
33 |                 loss += self.criterion(heatmap_pred, heatmap_gt)
34 | 
35 |         return loss
36 | 
37 | 
38 | class PerJointMSELoss(nn.Module):
39 |     def __init__(self):
40 |         super(PerJointMSELoss, self).__init__()
41 |         self.criterion = nn.MSELoss(reduction='mean')
42 | 
43 |     def forward(self, output, target, use_target_weight = False, target_weight=None):
44 |         if use_target_weight:
45 |             batch_size = output.size(0)
46 |             num_joints = output.size(1)
47 | 
48 |             heatmap_pred = output.reshape((batch_size, num_joints, -1))
49 |             heatmap_gt = target.reshape((batch_size, num_joints, -1))
50 |             loss = self.criterion(heatmap_pred.mul(target_weight), heatmap_gt.mul(target_weight))
51 |         else:
52 |             loss = self.criterion(output, target)
53 | 
54 |         return loss
55 | 
56 | 
57 | class PerJointL1Loss(nn.Module):
58 |     def __init__(self):
59 |         super(PerJointL1Loss, self).__init__()
60 |         self.criterion = nn.L1Loss(reduction='mean')
61 | 
62 |     def forward(self, output, target, use_target_weight=False, target_weight=None):
63 |         if use_target_weight:
64 |             batch_size = output.size(0)
65 |             num_joints = output.size(1)
66 | 
67 |             pred = output.reshape((batch_size, num_joints, -1))
68 |             gt = target.reshape((batch_size, num_joints, -1))
69 |             loss = self.criterion(pred.mul(target_weight), gt.mul(target_weight))
70 |         else:
71 |             loss = self.criterion(output, target)
72 | 
73 |         return loss
74 | 


--------------------------------------------------------------------------------
/lib/core/proposal.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | import torch
12 | import torch.nn.functional as F
13 | from scipy.ndimage import maximum_filter
14 | 
15 | 
16 | def get_index(indices, shape):
17 |     batch_size = indices.shape[0]
18 |     num_people = indices.shape[1]
19 |     indices_x = (indices // (shape[1] * shape[2])).reshape(batch_size, num_people, -1)
20 |     indices_y = ((indices % (shape[1] * shape[2])) // shape[2]).reshape(batch_size, num_people, -1)
21 |     indices_z = (indices % shape[2]).reshape(batch_size, num_people, -1)
22 |     indices = torch.cat([indices_x, indices_y, indices_z], dim=2)
23 |     return indices
24 | 
25 | 
26 | def max_pool(inputs, kernel=3):
27 |     padding = (kernel - 1) // 2
28 |     max = F.max_pool3d(inputs, kernel_size=kernel, stride=1, padding=padding)
29 |     keep = (inputs == max).float()
30 |     return keep * inputs
31 | 
32 | 
33 | def nms(root_cubes, max_num):
34 |     batch_size = root_cubes.shape[0]
35 |     # root_cubes_nms = torch.zeros_like(root_cubes, device=root_cubes.device)
36 |     #
37 |     # for b in range(batch_size):
38 |     #     mx = torch.as_tensor(maximum_filter(root_cubes[b].detach().cpu().numpy(), size=3),
39 |     #                          dtype=torch.float, device=root_cubes.device)
40 |     #     root_cubes_nms[b] = (mx == root_cubes[b]).float() * root_cubes[b]
41 |     root_cubes_nms = max_pool(root_cubes)
42 |     root_cubes_nms_reshape = root_cubes_nms.reshape(batch_size, -1)
43 |     topk_values, topk_index = root_cubes_nms_reshape.topk(max_num)
44 |     topk_unravel_index = get_index(topk_index, root_cubes[0].shape)
45 | 
46 |     return topk_values, topk_unravel_index
47 | 


--------------------------------------------------------------------------------
/lib/dataset/JointsDataset.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | import copy
  7 | import logging
  8 | 
  9 | import cv2
 10 | import numpy as np
 11 | import torch
 12 | from torch.utils.data import Dataset
 13 | import os
 14 | 
 15 | from utils.transforms import get_affine_transform
 16 | from utils.transforms import affine_transform, get_scale
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | class JointsDataset(Dataset):
 22 | 
 23 |     def __init__(self, cfg, image_set, is_train, transform=None):
 24 |         self.cfg = cfg
 25 |         self.num_joints = 0
 26 |         self.pixel_std = 200
 27 |         self.flip_pairs = []
 28 |         self.maximum_person = cfg.MULTI_PERSON.MAX_PEOPLE_NUM
 29 | 
 30 |         self.is_train = is_train
 31 | 
 32 |         this_dir = os.path.dirname(__file__)
 33 |         dataset_root = os.path.join(this_dir, '../..', cfg.DATASET.ROOT)
 34 |         self.dataset_root = os.path.abspath(dataset_root)
 35 |         self.root_id = cfg.DATASET.ROOTIDX
 36 |         self.image_set = image_set
 37 |         self.dataset_name = cfg.DATASET.TEST_DATASET
 38 | 
 39 |         self.data_format = cfg.DATASET.DATA_FORMAT
 40 |         self.data_augmentation = cfg.DATASET.DATA_AUGMENTATION
 41 | 
 42 |         self.num_views = cfg.DATASET.CAMERA_NUM
 43 | 
 44 |         self.scale_factor = cfg.DATASET.SCALE_FACTOR
 45 |         self.rotation_factor = cfg.DATASET.ROT_FACTOR
 46 |         self.flip = cfg.DATASET.FLIP
 47 |         self.color_rgb = cfg.DATASET.COLOR_RGB
 48 | 
 49 |         self.target_type = cfg.NETWORK.TARGET_TYPE
 50 |         self.image_size = np.array(cfg.NETWORK.IMAGE_SIZE)
 51 |         self.heatmap_size = np.array(cfg.NETWORK.HEATMAP_SIZE)
 52 |         self.sigma = cfg.NETWORK.SIGMA
 53 |         self.use_different_joints_weight = cfg.LOSS.USE_DIFFERENT_JOINTS_WEIGHT
 54 |         self.joints_weight = 1
 55 | 
 56 |         self.transform = transform
 57 |         self.db = []
 58 | 
 59 |         self.space_size = np.array(cfg.MULTI_PERSON.SPACE_SIZE)
 60 |         self.space_center = np.array(cfg.MULTI_PERSON.SPACE_CENTER)
 61 |         self.initial_cube_size = np.array(cfg.MULTI_PERSON.INITIAL_CUBE_SIZE)
 62 | 
 63 | 
 64 |     def _get_db(self):
 65 |         raise NotImplementedError
 66 | 
 67 |     def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
 68 |         raise NotImplementedError
 69 | 
 70 |     def __len__(self,):
 71 |         return len(self.db)
 72 | 
 73 |     def __getitem__(self, idx):
 74 |         db_rec = copy.deepcopy(self.db[idx])
 75 | 
 76 |         image_file = db_rec['image']
 77 | 
 78 |         if self.data_format == 'zip':
 79 |             from utils import zipreader
 80 |             data_numpy = zipreader.imread(
 81 |                 image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
 82 |         else:
 83 |             data_numpy = cv2.imread(
 84 |                 image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
 85 | 
 86 |         if data_numpy is None:
 87 |             # logger.error('=> fail to read {}'.format(image_file))
 88 |             # raise ValueError('Fail to read {}'.format(image_file))
 89 |             return None, None, None, None, None, None
 90 | 
 91 |         if self.color_rgb:
 92 |             data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
 93 | 
 94 |         joints = db_rec['joints_2d']
 95 |         joints_vis = db_rec['joints_2d_vis']
 96 |         joints_3d = db_rec['joints_3d']
 97 |         joints_3d_vis = db_rec['joints_3d_vis']
 98 | 
 99 |         nposes = len(joints)
100 |         assert nposes <= self.maximum_person, 'too many persons'
101 | 
102 |         height, width, _ = data_numpy.shape
103 |         c = np.array([width / 2.0, height / 2.0])
104 |         s = get_scale((width, height), self.image_size)
105 |         r = 0
106 | 
107 |         trans = get_affine_transform(c, s, r, self.image_size)
108 |         input = cv2.warpAffine(
109 |             data_numpy,
110 |             trans, (int(self.image_size[0]), int(self.image_size[1])),
111 |             flags=cv2.INTER_LINEAR)
112 | 
113 |         if self.transform:
114 |             input = self.transform(input)
115 | 
116 |         for n in range(nposes):
117 |             for i in range(len(joints[0])):
118 |                 if joints_vis[n][i, 0] > 0.0:
119 |                     joints[n][i, 0:2] = affine_transform(
120 |                         joints[n][i, 0:2], trans)
121 |                     if (np.min(joints[n][i, :2]) < 0 or
122 |                             joints[n][i, 0] >= self.image_size[0] or
123 |                             joints[n][i, 1] >= self.image_size[1]):
124 |                         joints_vis[n][i, :] = 0
125 | 
126 |         if 'pred_pose2d' in db_rec and db_rec['pred_pose2d'] != None:
127 |             # For convenience, we use predicted poses and corresponding values at the original heatmaps
128 |             # to generate 2d heatmaps for Campus and Shelf dataset.
129 |             # You can also use other 2d backbone trained on COCO to generate 2d heatmaps directly.
130 |             pred_pose2d = db_rec['pred_pose2d']
131 |             for n in range(len(pred_pose2d)):
132 |                 for i in range(len(pred_pose2d[n])):
133 |                     pred_pose2d[n][i, 0:2] = affine_transform(pred_pose2d[n][i, 0:2], trans)
134 | 
135 |             input_heatmap = self.generate_input_heatmap(pred_pose2d)
136 |             input_heatmap = torch.from_numpy(input_heatmap)
137 |         else:
138 |             input_heatmap = torch.zeros(self.cfg.NETWORK.NUM_JOINTS, self.heatmap_size[1], self.heatmap_size[0])
139 | 
140 |         target_heatmap, target_weight = self.generate_target_heatmap(
141 |             joints, joints_vis)
142 |         target_heatmap = torch.from_numpy(target_heatmap)
143 |         target_weight = torch.from_numpy(target_weight)
144 | 
145 |         # make joints and joints_vis having same shape
146 |         joints_u = np.zeros((self.maximum_person, self.num_joints, 2))
147 |         joints_vis_u = np.zeros((self.maximum_person, self.num_joints, 2))
148 |         for i in range(nposes):
149 |             joints_u[i] = joints[i]
150 |             joints_vis_u[i] = joints_vis[i]
151 | 
152 |         joints_3d_u = np.zeros((self.maximum_person, self.num_joints, 3))
153 |         joints_3d_vis_u = np.zeros((self.maximum_person, self.num_joints, 3))
154 |         for i in range(nposes):
155 |             joints_3d_u[i] = joints_3d[i][:, 0:3]
156 |             joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3]
157 | 
158 |         target_3d = self.generate_3d_target(joints_3d)
159 |         target_3d = torch.from_numpy(target_3d)
160 | 
161 |         if isinstance(self.root_id, int):
162 |             roots_3d = joints_3d_u[:, self.root_id]
163 |         elif isinstance(self.root_id, list):
164 |             roots_3d = np.mean([joints_3d_u[:, j] for j in self.root_id], axis=0)
165 |         meta = {
166 |             'image': image_file,
167 |             'num_person': nposes,
168 |             'joints_3d': joints_3d_u,
169 |             'joints_3d_vis': joints_3d_vis_u,
170 |             'roots_3d': roots_3d,
171 |             'joints': joints_u,
172 |             'joints_vis': joints_vis_u,
173 |             'center': c,
174 |             'scale': s,
175 |             'rotation': r,
176 |             'camera': db_rec['camera']
177 |         }
178 | 
179 |         return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
180 | 
181 |     def compute_human_scale(self, pose, joints_vis):
182 |         idx = joints_vis[:, 0] == 1
183 |         if np.sum(idx) == 0:
184 |             return 0
185 |         minx, maxx = np.min(pose[idx, 0]), np.max(pose[idx, 0])
186 |         miny, maxy = np.min(pose[idx, 1]), np.max(pose[idx, 1])
187 |         # return np.clip((maxy - miny) * (maxx - minx), 1.0 / 4 * 256**2,
188 |         #                4 * 256**2)
189 |         return np.clip(np.maximum(maxy - miny, maxx - minx)**2,  1.0 / 4 * 96**2, 4 * 96**2)
190 | 
191 |     def generate_target_heatmap(self, joints, joints_vis):
192 |         '''
193 |         :param joints:  [[num_joints, 3]]
194 |         :param joints_vis: [num_joints, 3]
195 |         :return: target, target_weight(1: visible, 0: invisible)
196 |         '''
197 |         nposes = len(joints)
198 |         num_joints = self.num_joints
199 |         target_weight = np.zeros((num_joints, 1), dtype=np.float32)
200 |         for i in range(num_joints):
201 |             for n in range(nposes):
202 |                 if joints_vis[n][i, 0] == 1:
203 |                     target_weight[i, 0] = 1
204 | 
205 |         assert self.target_type == 'gaussian', \
206 |             'Only support gaussian map now!'
207 | 
208 |         if self.target_type == 'gaussian':
209 |             target = np.zeros(
210 |                 (num_joints, self.heatmap_size[1], self.heatmap_size[0]),
211 |                 dtype=np.float32)
212 |             feat_stride = self.image_size / self.heatmap_size
213 | 
214 |             for n in range(nposes):
215 |                 human_scale = 2 * self.compute_human_scale(joints[n] / feat_stride, joints_vis[n])
216 |                 if human_scale == 0:
217 |                     continue
218 | 
219 |                 cur_sigma = self.sigma * np.sqrt((human_scale / (96.0 * 96.0)))
220 |                 tmp_size = cur_sigma * 3
221 |                 for joint_id in range(num_joints):
222 |                     feat_stride = self.image_size / self.heatmap_size
223 |                     mu_x = int(joints[n][joint_id][0] / feat_stride[0])
224 |                     mu_y = int(joints[n][joint_id][1] / feat_stride[1])
225 |                     ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
226 |                     br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
227 |                     if joints_vis[n][joint_id, 0] == 0 or \
228 |                             ul[0] >= self.heatmap_size[0] or \
229 |                             ul[1] >= self.heatmap_size[1] \
230 |                             or br[0] < 0 or br[1] < 0:
231 |                         continue
232 | 
233 |                     size = 2 * tmp_size + 1
234 |                     x = np.arange(0, size, 1, np.float32)
235 |                     y = x[:, np.newaxis]
236 |                     x0 = y0 = size // 2
237 |                     g = np.exp(
238 |                         -((x - x0)**2 + (y - y0)**2) / (2 * cur_sigma**2))
239 | 
240 |                     # Usable gaussian range
241 |                     g_x = max(0,
242 |                               -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
243 |                     g_y = max(0,
244 |                               -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
245 |                     # Image range
246 |                     img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
247 |                     img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
248 | 
249 |                     target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]],
250 |                         g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
251 |                 target = np.clip(target, 0, 1)
252 | 
253 |         if self.use_different_joints_weight:
254 |             target_weight = np.multiply(target_weight, self.joints_weight)
255 | 
256 |         return target, target_weight
257 | 
258 |     def generate_3d_target(self, joints_3d):
259 |         num_people = len(joints_3d)
260 | 
261 |         space_size = self.space_size
262 |         space_center = self.space_center
263 |         cube_size = self.initial_cube_size
264 |         grid1Dx = np.linspace(-space_size[0] / 2, space_size[0] / 2, cube_size[0]) + space_center[0]
265 |         grid1Dy = np.linspace(-space_size[1] / 2, space_size[1] / 2, cube_size[1]) + space_center[1]
266 |         grid1Dz = np.linspace(-space_size[2] / 2, space_size[2] / 2, cube_size[2]) + space_center[2]
267 | 
268 |         target = np.zeros((cube_size[0], cube_size[1], cube_size[2]), dtype=np.float32)
269 |         cur_sigma = 200.0
270 | 
271 |         for n in range(num_people):
272 |             joint_id = self.root_id  # mid-hip
273 |             if isinstance(joint_id, int):
274 |                 mu_x = joints_3d[n][joint_id][0]
275 |                 mu_y = joints_3d[n][joint_id][1]
276 |                 mu_z = joints_3d[n][joint_id][2]
277 |             elif isinstance(joint_id, list):
278 |                 mu_x = (joints_3d[n][joint_id[0]][0] + joints_3d[n][joint_id[1]][0]) / 2.0
279 |                 mu_y = (joints_3d[n][joint_id[0]][1] + joints_3d[n][joint_id[1]][1]) / 2.0
280 |                 mu_z = (joints_3d[n][joint_id[0]][2] + joints_3d[n][joint_id[1]][2]) / 2.0
281 |             i_x = [np.searchsorted(grid1Dx,  mu_x - 3 * cur_sigma),
282 |                        np.searchsorted(grid1Dx,  mu_x + 3 * cur_sigma, 'right')]
283 |             i_y = [np.searchsorted(grid1Dy,  mu_y - 3 * cur_sigma),
284 |                        np.searchsorted(grid1Dy,  mu_y + 3 * cur_sigma, 'right')]
285 |             i_z = [np.searchsorted(grid1Dz,  mu_z - 3 * cur_sigma),
286 |                        np.searchsorted(grid1Dz,  mu_z + 3 * cur_sigma, 'right')]
287 |             if i_x[0] >= i_x[1] or i_y[0] >= i_y[1] or i_z[0] >= i_z[1]:
288 |                 continue
289 | 
290 |             gridx, gridy, gridz = np.meshgrid(grid1Dx[i_x[0]:i_x[1]], grid1Dy[i_y[0]:i_y[1]], grid1Dz[i_z[0]:i_z[1]], indexing='ij')
291 |             g = np.exp(-((gridx - mu_x) ** 2 + (gridy - mu_y) ** 2 + (gridz - mu_z) ** 2) / (2 * cur_sigma ** 2))
292 |             target[i_x[0]:i_x[1], i_y[0]:i_y[1], i_z[0]:i_z[1]] = np.maximum(target[i_x[0]:i_x[1], i_y[0]:i_y[1], i_z[0]:i_z[1]], g)
293 | 
294 |         target = np.clip(target, 0, 1)
295 |         return target
296 | 
297 |     def generate_input_heatmap(self, joints):
298 |         '''
299 |         :param joints:  [[num_joints, 3]]
300 |         :param joints_vis: [num_joints, 3]
301 |         :return: input_heatmap
302 |         '''
303 |         nposes = len(joints)
304 |         num_joints = self.cfg.NETWORK.NUM_JOINTS
305 | 
306 |         assert self.target_type == 'gaussian', \
307 |             'Only support gaussian map now!'
308 | 
309 |         if self.target_type == 'gaussian':
310 |             target = np.zeros(
311 |                 (num_joints, self.heatmap_size[1], self.heatmap_size[0]),
312 |                 dtype=np.float32)
313 |             feat_stride = self.image_size / self.heatmap_size
314 | 
315 |             for n in range(nposes):
316 |                 human_scale = 2 * self.compute_human_scale(joints[n][:, 0:2] / feat_stride, np.ones((num_joints, 1)))
317 |                 if human_scale == 0:
318 |                     continue
319 | 
320 |                 cur_sigma = self.sigma * np.sqrt((human_scale / (96.0 * 96.0)))
321 |                 tmp_size = cur_sigma * 3
322 |                 for joint_id in range(num_joints):
323 |                     feat_stride = self.image_size / self.heatmap_size
324 |                     mu_x = int(joints[n][joint_id][0] / feat_stride[0])
325 |                     mu_y = int(joints[n][joint_id][1] / feat_stride[1])
326 |                     ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
327 |                     br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
328 |                     if ul[0] >= self.heatmap_size[0] or \
329 |                             ul[1] >= self.heatmap_size[1] \
330 |                             or br[0] < 0 or br[1] < 0:
331 |                         continue
332 | 
333 |                     size = 2 * tmp_size + 1
334 |                     x = np.arange(0, size, 1, np.float32)
335 |                     y = x[:, np.newaxis]
336 |                     x0 = y0 = size // 2
337 |                     if 'campus' in self.dataset_name:
338 |                         max_value = 1.0
339 |                     else:
340 |                         max_value = joints[n][joint_id][2] if len(joints[n][joint_id]) == 3 else 1.0
341 |                         # max_value = max_value**0.5
342 |                     g = np.exp(
343 |                         -((x - x0)**2 + (y - y0)**2) / (2 * cur_sigma**2)) * max_value
344 | 
345 |                     # Usable gaussian range
346 |                     g_x = max(0,
347 |                               -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
348 |                     g_y = max(0,
349 |                               -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
350 |                     # Image range
351 |                     img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
352 |                     img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
353 | 
354 |                     target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]],
355 |                         g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
356 |                 target = np.clip(target, 0, 1)
357 | 
358 |         return target
359 | 
360 | 
361 | 
362 | 
363 | 


--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | from dataset.panoptic import Panoptic as panoptic
11 | from dataset.shelf_synthetic import ShelfSynthetic as shelf_synthetic
12 | from dataset.campus_synthetic import CampusSynthetic as campus_synthetic
13 | from dataset.shelf import Shelf as shelf
14 | from dataset.campus import Campus as campus
15 | 


--------------------------------------------------------------------------------
/lib/dataset/campus.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import os.path as osp
 11 | import numpy as np
 12 | import json_tricks as json
 13 | import pickle
 14 | import scipy.io as scio
 15 | import logging
 16 | import copy
 17 | import os
 18 | from collections import OrderedDict
 19 | 
 20 | from dataset.JointsDataset import JointsDataset
 21 | from utils.cameras_cpu import project_pose
 22 | 
 23 | CAMPUS_JOINTS_DEF = {
 24 |     'Right-Ankle': 0,
 25 |     'Right-Knee': 1,
 26 |     'Right-Hip': 2,
 27 |     'Left-Hip': 3,
 28 |     'Left-Knee': 4,
 29 |     'Left-Ankle': 5,
 30 |     'Right-Wrist': 6,
 31 |     'Right-Elbow': 7,
 32 |     'Right-Shoulder': 8,
 33 |     'Left-Shoulder': 9,
 34 |     'Left-Elbow': 10,
 35 |     'Left-Wrist': 11,
 36 |     'Bottom-Head': 12,
 37 |     'Top-Head': 13
 38 | }
 39 | 
 40 | LIMBS = [
 41 |     [0, 1],
 42 |     [1, 2],
 43 |     [3, 4],
 44 |     [4, 5],
 45 |     [2, 3],
 46 |     [6, 7],
 47 |     [7, 8],
 48 |     [9, 10],
 49 |     [10, 11],
 50 |     [2, 8],
 51 |     [3, 9],
 52 |     [8, 12],
 53 |     [9, 12],
 54 |     [12, 13]
 55 | ]
 56 | 
 57 | 
 58 | class Campus(JointsDataset):
 59 |     def __init__(self, cfg, image_set, is_train, transform=None):
 60 |         self.pixel_std = 200.0
 61 |         self.joints_def = CAMPUS_JOINTS_DEF
 62 |         super().__init__(cfg, image_set, is_train, transform)
 63 |         self.limbs = LIMBS
 64 |         self.num_joints = len(CAMPUS_JOINTS_DEF)
 65 |         self.cam_list = [0, 1, 2]
 66 |         self.num_views = len(self.cam_list)
 67 |         self.frame_range = list(range(350, 471)) + list(range(650, 751))
 68 | 
 69 |         self.pred_pose2d = self._get_pred_pose2d()
 70 |         self.db = self._get_db()
 71 | 
 72 |         self.db_size = len(self.db)
 73 | 
 74 |     def _get_pred_pose2d(self):
 75 |         file = os.path.join(self.dataset_root, "pred_campus_maskrcnn_hrnet_coco.pkl")
 76 |         with open(file, "rb") as pfile:
 77 |             logging.info("=> load {}".format(file))
 78 |             pred_2d = pickle.load(pfile)
 79 | 
 80 |         return pred_2d
 81 | 
 82 |     def _get_db(self):
 83 |         width = 360
 84 |         height = 288
 85 | 
 86 |         db = []
 87 |         cameras = self._get_cam()
 88 | 
 89 |         datafile = os.path.join(self.dataset_root, 'actorsGT.mat')
 90 |         data = scio.loadmat(datafile)
 91 |         actor_3d = np.array(np.array(data['actor3D'].tolist()).tolist()).squeeze()  # num_person * num_frame
 92 | 
 93 |         num_person = len(actor_3d)
 94 |         num_frames = len(actor_3d[0])
 95 | 
 96 |         for i in self.frame_range:
 97 |             for k, cam in cameras.items():
 98 |                 image = osp.join("Camera" + k, "campus4-c{0}-{1:05d}.png".format(k, i))
 99 | 
100 |                 all_poses_3d = []
101 |                 all_poses_vis_3d = []
102 |                 all_poses = []
103 |                 all_poses_vis = []
104 |                 for person in range(num_person):
105 |                     pose3d = actor_3d[person][i] * 1000.0
106 |                     if len(pose3d[0]) > 0:
107 |                         all_poses_3d.append(pose3d)
108 |                         all_poses_vis_3d.append(np.ones((self.num_joints, 3)))
109 | 
110 |                         pose2d = project_pose(pose3d, cam)
111 | 
112 |                         x_check = np.bitwise_and(pose2d[:, 0] >= 0,
113 |                                                  pose2d[:, 0] <= width - 1)
114 |                         y_check = np.bitwise_and(pose2d[:, 1] >= 0,
115 |                                                  pose2d[:, 1] <= height - 1)
116 |                         check = np.bitwise_and(x_check, y_check)
117 | 
118 |                         joints_vis = np.ones((len(pose2d), 1))
119 |                         joints_vis[np.logical_not(check)] = 0
120 |                         all_poses.append(pose2d)
121 |                         all_poses_vis.append(
122 |                             np.repeat(
123 |                                 np.reshape(joints_vis, (-1, 1)), 2, axis=1))
124 | 
125 |                 pred_index = '{}_{}'.format(k, i)
126 |                 preds = self.pred_pose2d[pred_index]
127 |                 preds = [np.array(p["pred"]) for p in preds]
128 | 
129 |                 db.append({
130 |                     'image': osp.join(self.dataset_root, image),
131 |                     'joints_3d': all_poses_3d,
132 |                     'joints_3d_vis': all_poses_vis_3d,
133 |                     'joints_2d': all_poses,
134 |                     'joints_2d_vis': all_poses_vis,
135 |                     'camera': cam,
136 |                     'pred_pose2d': preds
137 |                 })
138 |         return db
139 | 
140 |     def _get_cam(self):
141 |         cam_file = osp.join(self.dataset_root, "calibration_campus.json")
142 |         with open(cam_file) as cfile:
143 |             cameras = json.load(cfile)
144 | 
145 |         for id, cam in cameras.items():
146 |             for k, v in cam.items():
147 |                 cameras[id][k] = np.array(v)
148 | 
149 |         return cameras
150 | 
151 |     def __getitem__(self, idx):
152 |         input, target_heatmap, target_weight, target_3d, meta, input_heatmap = [], [], [], [], [], []
153 |         for k in range(self.num_views):
154 |             i, th, tw, t3, m, ih = super().__getitem__(self.num_views * idx + k)
155 |             input.append(i)
156 |             target_heatmap.append(th)
157 |             target_weight.append(tw)
158 |             input_heatmap.append(ih)
159 |             target_3d.append(t3)
160 |             meta.append(m)
161 |         return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
162 | 
163 |     def __len__(self):
164 |         return self.db_size // self.num_views
165 | 
166 |     def evaluate(self, preds, recall_threshold=500):
167 |         datafile = os.path.join(self.dataset_root, 'actorsGT.mat')
168 |         data = scio.loadmat(datafile)
169 |         actor_3d = np.array(np.array(data['actor3D'].tolist()).tolist()).squeeze()  # num_person * num_frame
170 |         num_person = len(actor_3d)
171 |         total_gt = 0
172 |         match_gt = 0
173 | 
174 |         limbs = [[0, 1], [1, 2], [3, 4], [4, 5], [6, 7], [7, 8], [9, 10], [10, 11], [12, 13]]
175 |         correct_parts = np.zeros(num_person)
176 |         total_parts = np.zeros(num_person)
177 |         alpha = 0.5
178 |         bone_correct_parts = np.zeros((num_person, 10))
179 | 
180 |         for i, fi in enumerate(self.frame_range):
181 |             pred_coco = preds[i].copy()
182 |             pred_coco = pred_coco[pred_coco[:, 0, 3] >= 0, :, :3]
183 |             pred = np.stack([self.coco2campus3D(p) for p in copy.deepcopy(pred_coco[:, :, :3])])
184 | 
185 |             for person in range(num_person):
186 |                 gt = actor_3d[person][fi] * 1000.0
187 |                 if len(gt[0]) == 0:
188 |                     continue
189 | 
190 |                 mpjpes = np.mean(np.sqrt(np.sum((gt[np.newaxis] - pred) ** 2, axis=-1)), axis=-1)
191 |                 min_n = np.argmin(mpjpes)
192 |                 min_mpjpe = np.min(mpjpes)
193 |                 if min_mpjpe < recall_threshold:
194 |                     match_gt += 1
195 |                 total_gt += 1
196 | 
197 |                 for j, k in enumerate(limbs):
198 |                     total_parts[person] += 1
199 |                     error_s = np.linalg.norm(pred[min_n, k[0], 0:3] - gt[k[0]])
200 |                     error_e = np.linalg.norm(pred[min_n, k[1], 0:3] - gt[k[1]])
201 |                     limb_length = np.linalg.norm(gt[k[0]] - gt[k[1]])
202 |                     if (error_s + error_e) / 2.0 <= alpha * limb_length:
203 |                         correct_parts[person] += 1
204 |                         bone_correct_parts[person, j] += 1
205 |                 pred_hip = (pred[min_n, 2, 0:3] + pred[min_n, 3, 0:3]) / 2.0
206 |                 gt_hip = (gt[2] + gt[3]) / 2.0
207 |                 total_parts[person] += 1
208 |                 error_s = np.linalg.norm(pred_hip - gt_hip)
209 |                 error_e = np.linalg.norm(pred[min_n, 12, 0:3] - gt[12])
210 |                 limb_length = np.linalg.norm(gt_hip - gt[12])
211 |                 if (error_s + error_e) / 2.0 <= alpha * limb_length:
212 |                     correct_parts[person] += 1
213 |                     bone_correct_parts[person, 9] += 1
214 | 
215 |         actor_pcp = correct_parts / (total_parts + 1e-8)
216 |         avg_pcp = np.mean(actor_pcp[:3])
217 | 
218 |         bone_group = OrderedDict(
219 |             [('Head', [8]), ('Torso', [9]), ('Upper arms', [5, 6]),
220 |              ('Lower arms', [4, 7]), ('Upper legs', [1, 2]), ('Lower legs', [0, 3])])
221 |         bone_person_pcp = OrderedDict()
222 |         for k, v in bone_group.items():
223 |             bone_person_pcp[k] = np.sum(bone_correct_parts[:, v], axis=-1) / (total_parts / 10 * len(v) + 1e-8)
224 | 
225 |         return actor_pcp, avg_pcp, bone_person_pcp, match_gt / (total_gt + 1e-8)
226 | 
227 |     @staticmethod
228 |     def coco2campus3D(coco_pose):
229 |         """
230 |         transform coco order(our method output) 3d pose to shelf dataset order with interpolation
231 |         :param coco_pose: np.array with shape 17x3
232 |         :return: 3D pose in campus order with shape 14x3
233 |         """
234 |         campus_pose = np.zeros((14, 3))
235 |         coco2campus = np.array([16, 14, 12, 11, 13, 15, 10, 8, 6, 5, 7, 9])
236 |         campus_pose[0: 12] += coco_pose[coco2campus]
237 | 
238 |         mid_sho = (coco_pose[5] + coco_pose[6]) / 2  # L and R shoulder
239 |         head_center = (coco_pose[3] + coco_pose[4]) / 2  # middle of two ear
240 | 
241 |         head_bottom = (mid_sho + head_center) / 2  # nose and head center
242 |         head_top = head_bottom + (head_center - head_bottom) * 2
243 |         campus_pose[12] += head_bottom
244 |         campus_pose[13] += head_top
245 | 
246 |         return campus_pose
247 | 


--------------------------------------------------------------------------------
/lib/dataset/campus_synthetic.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import os.path as osp
 11 | import numpy as np
 12 | import torch
 13 | from torch.utils.data import Dataset
 14 | 
 15 | import json_tricks as json
 16 | import pickle
 17 | import logging
 18 | import copy
 19 | import random
 20 | import cv2
 21 | 
 22 | import os
 23 | 
 24 | from utils.transforms import get_affine_transform
 25 | from utils.transforms import affine_transform
 26 | from utils.transforms import rotate_points, get_scale
 27 | from utils.cameras_cpu import project_pose
 28 | 
 29 | logger = logging.getLogger(__name__)
 30 | 
 31 | coco_joints_def = {0: 'nose',
 32 |                    1: 'Leye', 2: 'Reye', 3: 'Lear', 4: 'Rear',
 33 |                    5: 'Lsho', 6: 'Rsho',
 34 |                    7: 'Lelb', 8: 'Relb',
 35 |                    9: 'Lwri', 10: 'Rwri',
 36 |                    11: 'Lhip', 12: 'Rhip',
 37 |                    13: 'Lkne', 14: 'Rkne',
 38 |                    15: 'Lank', 16: 'Rank'}
 39 | 
 40 | LIMBS = [[0, 1], [0, 2], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7], [7, 9], [6, 8], [8, 10], [5, 11], [11, 13], [13, 15],
 41 |         [6, 12], [12, 14], [14, 16], [5, 6], [11, 12]]
 42 | 
 43 | 
 44 | class CampusSynthetic(Dataset):
 45 |     def __init__(self, cfg, image_set, is_train, transform=None):
 46 |         super().__init__()
 47 |         self.pixel_std = 200.0
 48 |         self.joints_def = coco_joints_def
 49 |         self.limbs = LIMBS
 50 |         self.num_joints = len(coco_joints_def)
 51 |         self.cam_list = [0, 1, 2]
 52 |         self.num_views = len(self.cam_list)
 53 |         self.maximum_person = cfg.MULTI_PERSON.MAX_PEOPLE_NUM
 54 | 
 55 |         self.is_train = is_train
 56 | 
 57 |         this_dir = os.path.dirname(__file__)
 58 |         dataset_root = os.path.join(this_dir, '../..', cfg.DATASET.ROOT)
 59 |         self.dataset_root = dataset_root
 60 |         self.image_set = image_set
 61 |         self.dataset_name = cfg.DATASET.TEST_DATASET
 62 | 
 63 |         self.data_format = cfg.DATASET.DATA_FORMAT
 64 |         self.data_augmentation = cfg.DATASET.DATA_AUGMENTATION
 65 | 
 66 |         self.color_rgb = cfg.DATASET.COLOR_RGB
 67 | 
 68 |         self.target_type = cfg.NETWORK.TARGET_TYPE
 69 |         self.image_size = np.array(cfg.NETWORK.IMAGE_SIZE)
 70 |         self.heatmap_size = np.array(cfg.NETWORK.HEATMAP_SIZE)
 71 |         self.sigma = cfg.NETWORK.SIGMA
 72 |         self.use_different_joints_weight = cfg.LOSS.USE_DIFFERENT_JOINTS_WEIGHT
 73 |         self.joints_weight = 1
 74 | 
 75 |         self.transform = transform
 76 | 
 77 |         self.space_size = np.array(cfg.MULTI_PERSON.SPACE_SIZE)
 78 |         self.space_center = np.array(cfg.MULTI_PERSON.SPACE_CENTER)
 79 |         self.initial_cube_size = np.array(cfg.MULTI_PERSON.INITIAL_CUBE_SIZE)
 80 | 
 81 |         pose_db_file = os.path.join(self.dataset_root, "..", "panoptic_training_pose.pkl")
 82 |         self.pose_db = pickle.load(open(pose_db_file, "rb"))
 83 |         self.cameras = self._get_cam()
 84 | 
 85 |     def _get_cam(self):
 86 |         cam_file = osp.join(self.dataset_root, "calibration_campus.json")
 87 |         with open(cam_file) as cfile:
 88 |             cameras = json.load(cfile)
 89 | 
 90 |         for id, cam in cameras.items():
 91 |             for k, v in cam.items():
 92 |                 cameras[id][k] = np.array(v)
 93 | 
 94 |         return cameras
 95 | 
 96 |     def __getitem__(self, idx):
 97 |         # nposes = np.random.choice([1, 2, 3, 4, 5], p=[0.1, 0.1, 0.2, 0.4, 0.2])
 98 |         nposes = np.random.choice(range(1, 10))
 99 |         bbox_list = []
100 |         center_list = []
101 | 
102 |         select_poses = np.random.choice(self.pose_db, nposes)
103 |         joints_3d = np.array([p['pose'] for p in select_poses])
104 |         joints_3d_vis = np.array([p['vis'] for p in select_poses])
105 | 
106 |         for n in range(0, nposes):
107 |             points = joints_3d[n][:, :2].copy()
108 |             center = (points[11, :2] + points[12, :2]) / 2
109 |             rot_rad = np.random.uniform(-180, 180)
110 | 
111 |             new_center = self.get_new_center(center_list)
112 |             new_xy = rotate_points(points, center, rot_rad) - center + new_center
113 | 
114 |             loop_count = 0
115 |             while not self.isvalid(new_center, self.calc_bbox(new_xy, joints_3d_vis[n]), bbox_list):
116 |                 loop_count += 1
117 |                 if loop_count >= 100:
118 |                     break
119 |                 new_center = self.get_new_center(center_list)
120 |                 new_xy = rotate_points(points, center, rot_rad) - center + new_center
121 | 
122 |             if loop_count >= 100:
123 |                 nposes = n
124 |                 joints_3d = joints_3d[:n]
125 |                 joints_3d_vis = joints_3d_vis[:n]
126 |             else:
127 |                 center_list.append(new_center)
128 |                 bbox_list.append(self.calc_bbox(new_xy, joints_3d_vis[n]))
129 |                 joints_3d[n][:, :2] = new_xy
130 | 
131 |         input, target_heatmap, target_weight, target_3d, meta, input_heatmap = [], [], [], [], [], []
132 |         for k, cam in self.cameras.items():
133 |             i, th, tw, t3, m, ih = self._get_single_view_item(joints_3d, joints_3d_vis, cam)
134 |             input.append(i)
135 |             target_heatmap.append(th)
136 |             target_weight.append(tw)
137 |             input_heatmap.append(ih)
138 |             target_3d.append(t3)
139 |             meta.append(m)
140 |         return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
141 | 
142 |     def __len__(self):
143 |         return 3000
144 |         # return self.db_size // self.num_views
145 | 
146 |     def _get_single_view_item(self, joints_3d, joints_3d_vis, cam):
147 |         joints_3d = copy.deepcopy(joints_3d)
148 |         joints_3d_vis = copy.deepcopy(joints_3d_vis)
149 |         nposes = len(joints_3d)
150 | 
151 |         width = 360
152 |         height = 288
153 |         c = np.array([width / 2.0, height / 2.0], dtype=np.float32)
154 |         # s = np.array(
155 |         #     [width / self.pixel_std, height / self.pixel_std], dtype=np.float32)
156 |         s = get_scale((width, height), self.image_size)
157 |         r = 0
158 | 
159 |         joints = []
160 |         joints_vis = []
161 |         for n in range(nposes):
162 |             pose2d = project_pose(joints_3d[n], cam)
163 | 
164 |             x_check = np.bitwise_and(pose2d[:, 0] >= 0,
165 |                                      pose2d[:, 0] <= width - 1)
166 |             y_check = np.bitwise_and(pose2d[:, 1] >= 0,
167 |                                      pose2d[:, 1] <= height - 1)
168 |             check = np.bitwise_and(x_check, y_check)
169 |             vis = joints_3d_vis[n][:, 0] > 0
170 |             vis[np.logical_not(check)] = 0
171 | 
172 |             joints.append(pose2d)
173 |             joints_vis.append(np.repeat(np.reshape(vis, (-1, 1)), 2, axis=1))
174 | 
175 |         trans = get_affine_transform(c, s, r, self.image_size)
176 |         input = np.ones((height, width, 3), dtype=np.float32)
177 |         input = cv2.warpAffine(
178 |             input,
179 |             trans, (int(self.image_size[0]), int(self.image_size[1])),
180 |             flags=cv2.INTER_LINEAR)
181 | 
182 |         if self.transform:
183 |             input = self.transform(input)
184 | 
185 |         for n in range(nposes):
186 |             for i in range(len(joints[0])):
187 |                 if joints_vis[n][i, 0] > 0.0:
188 |                     joints[n][i, 0:2] = affine_transform(
189 |                         joints[n][i, 0:2], trans)
190 |                     if (np.min(joints[n][i, :2]) < 0 or
191 |                             joints[n][i, 0] >= self.image_size[0] or
192 |                             joints[n][i, 1] >= self.image_size[1]):
193 |                         joints_vis[n][i, :] = 0
194 | 
195 |         input_heatmap, _ = self.generate_input_heatmap(
196 |             joints, joints_vis)
197 |         input_heatmap = torch.from_numpy(input_heatmap)
198 |         target_heatmap = torch.zeros_like(input_heatmap)
199 |         target_weight = torch.zeros(len(target_heatmap), 1)
200 | 
201 |         # make joints and joints_vis having same shape
202 |         joints_u = np.zeros((self.maximum_person, len(joints[0]), 2))
203 |         joints_vis_u = np.zeros((self.maximum_person, len(joints[0]), 2))
204 |         for i in range(nposes):
205 |             joints_u[i] = joints[i]
206 |             joints_vis_u[i] = joints_vis[i]
207 | 
208 |         joints_3d_u = np.zeros((self.maximum_person, len(joints[0]), 3))
209 |         joints_3d_vis_u = np.zeros((self.maximum_person, len(joints[0]), 3))
210 |         for i in range(nposes):
211 |             joints_3d_u[i] = joints_3d[i][:, 0:3]
212 |             joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3]
213 | 
214 |         target_3d = self.generate_3d_target(joints_3d)
215 |         target_3d = torch.from_numpy(target_3d)
216 | 
217 |         meta = {
218 |             'image': '',
219 |             'num_person': nposes,
220 |             'joints_3d': joints_3d_u,
221 |             'roots_3d': (joints_3d_u[:, 11] + joints_3d_u[:, 12]) / 2.0,
222 |             'joints_3d_vis': joints_3d_vis_u,
223 |             'joints': joints_u,
224 |             'joints_vis': joints_vis_u,
225 |             'center': c,
226 |             'scale': s,
227 |             'rotation': r,
228 |             'camera': cam
229 |         }
230 | 
231 |         return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
232 | 
233 |     @staticmethod
234 |     def compute_human_scale(pose, joints_vis):
235 |         idx = joints_vis[:, 0] == 1
236 |         if np.sum(idx) == 0:
237 |             return 0
238 |         minx, maxx = np.min(pose[idx, 0]), np.max(pose[idx, 0])
239 |         miny, maxy = np.min(pose[idx, 1]), np.max(pose[idx, 1])
240 |         return np.clip(np.maximum(maxy - miny, maxx - minx) ** 2, 1.0 / 4 * 96 ** 2, 4 * 96 ** 2)
241 | 
242 |     def generate_input_heatmap(self, joints, joints_vis):
243 |         '''
244 |         :param joints:  [[num_joints, 3]]
245 |         :param joints_vis: [num_joints, 3]
246 |         :return: input_heatmap
247 |         '''
248 |         nposes = len(joints)
249 |         num_joints = joints[0].shape[0]
250 |         target_weight = np.zeros((num_joints, 1), dtype=np.float32)
251 |         for i in range(num_joints):
252 |             for n in range(nposes):
253 |                 if joints_vis[n][i, 0] == 1:
254 |                     target_weight[i, 0] = 1
255 | 
256 |         assert self.target_type == 'gaussian', \
257 |             'Only support gaussian map now!'
258 | 
259 |         if self.target_type == 'gaussian':
260 |             target = np.zeros(
261 |                 (num_joints, self.heatmap_size[1], self.heatmap_size[0]),
262 |                 dtype=np.float32)
263 |             feat_stride = self.image_size / self.heatmap_size
264 | 
265 |             for n in range(nposes):
266 |                 # obscured = random.random() < 0.05
267 |                 # if obscured:
268 |                 #     continue
269 |                 human_scale = 2 * self.compute_human_scale(joints[n] / feat_stride, joints_vis[n])
270 |                 if human_scale == 0:
271 |                     continue
272 | 
273 |                 cur_sigma = self.sigma * np.sqrt((human_scale / (96.0 * 96.0)))
274 |                 tmp_size = cur_sigma * 3
275 |                 for joint_id in range(num_joints):
276 |                     feat_stride = self.image_size / self.heatmap_size
277 |                     mu_x = int(joints[n][joint_id][0] / feat_stride[0])
278 |                     mu_y = int(joints[n][joint_id][1] / feat_stride[1])
279 |                     ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
280 |                     br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
281 |                     if joints_vis[n][joint_id, 0] == 0 or \
282 |                             ul[0] >= self.heatmap_size[0] or \
283 |                             ul[1] >= self.heatmap_size[1] \
284 |                             or br[0] < 0 or br[1] < 0:
285 |                         continue
286 | 
287 |                     size = 2 * tmp_size + 1
288 |                     x = np.arange(0, size, 1, np.float32)
289 |                     y = x[:, np.newaxis]
290 |                     x0 = y0 = size // 2
291 |                     scale = 0.9 + np.random.randn(1) * 0.03 if random.random() < 0.6 else 1.0
292 |                     if joint_id in [7, 8]:
293 |                         scale = scale * 0.5 if random.random() < 0.1 else scale
294 |                     elif joint_id in [9, 10]:
295 |                         scale = scale * 0.2 if random.random() < 0.1 else scale
296 |                     else:
297 |                         scale = scale * 0.5 if random.random() < 0.05 else scale
298 |                     g = np.exp(
299 |                         -((x - x0) ** 2 + (y - y0) ** 2) / (2 * cur_sigma ** 2)) * scale
300 | 
301 |                     # Usable gaussian range
302 |                     g_x = max(0,
303 |                               -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
304 |                     g_y = max(0,
305 |                               -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
306 |                     # Image range
307 |                     img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
308 |                     img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
309 | 
310 |                     target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
311 |                         target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]],
312 |                         g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
313 |                 target = np.clip(target, 0, 1)
314 | 
315 |         if self.use_different_joints_weight:
316 |             target_weight = np.multiply(target_weight, self.joints_weight)
317 | 
318 |         return target, target_weight
319 | 
320 |     def generate_3d_target(self, joints_3d):
321 |         num_people = len(joints_3d)
322 | 
323 |         space_size = self.space_size
324 |         space_center = self.space_center
325 |         cube_size = self.initial_cube_size
326 |         grid1Dx = np.linspace(-space_size[0] / 2, space_size[0] / 2, cube_size[0]) + space_center[0]
327 |         grid1Dy = np.linspace(-space_size[1] / 2, space_size[1] / 2, cube_size[1]) + space_center[1]
328 |         grid1Dz = np.linspace(-space_size[2] / 2, space_size[2] / 2, cube_size[2]) + space_center[2]
329 | 
330 |         target = np.zeros((cube_size[0], cube_size[1], cube_size[2]), dtype=np.float32)
331 |         cur_sigma = 200.0
332 | 
333 |         for n in range(num_people):
334 |             joint_id = [11, 12]  # mid-hip
335 |             mu_x = (joints_3d[n][joint_id[0]][0] + joints_3d[n][joint_id[1]][0]) / 2.0
336 |             mu_y = (joints_3d[n][joint_id[0]][1] + joints_3d[n][joint_id[1]][1]) / 2.0
337 |             mu_z = (joints_3d[n][joint_id[0]][2] + joints_3d[n][joint_id[1]][2]) / 2.0
338 | 
339 |             i_x = [np.searchsorted(grid1Dx, mu_x - 3 * cur_sigma),
340 |                    np.searchsorted(grid1Dx, mu_x + 3 * cur_sigma, 'right')]
341 |             i_y = [np.searchsorted(grid1Dy, mu_y - 3 * cur_sigma),
342 |                    np.searchsorted(grid1Dy, mu_y + 3 * cur_sigma, 'right')]
343 |             i_z = [np.searchsorted(grid1Dz, mu_z - 3 * cur_sigma),
344 |                    np.searchsorted(grid1Dz, mu_z + 3 * cur_sigma, 'right')]
345 |             if i_x[0] >= i_x[1] or i_y[0] >= i_y[1] or i_z[0] >= i_z[1]:
346 |                 continue
347 | 
348 |             gridx, gridy, gridz = np.meshgrid(grid1Dx[i_x[0]:i_x[1]], grid1Dy[i_y[0]:i_y[1]], grid1Dz[i_z[0]:i_z[1]],
349 |                                               indexing='ij')
350 |             g = np.exp(-((gridx - mu_x) ** 2 + (gridy - mu_y) ** 2 + (gridz - mu_z) ** 2) / (2 * cur_sigma ** 2))
351 |             target[i_x[0]:i_x[1], i_y[0]:i_y[1], i_z[0]:i_z[1]] = np.maximum(
352 |                 target[i_x[0]:i_x[1], i_y[0]:i_y[1], i_z[0]:i_z[1]], g)
353 | 
354 |         target = np.clip(target, 0, 1)
355 |         return target
356 | 
357 |     def evaluate(self):
358 |         pass
359 | 
360 |     @staticmethod
361 |     def get_new_center(center_list):
362 |         if len(center_list) == 0 or random.random() < 0.7:
363 |             new_center = np.array([np.random.uniform(-2500.0, 8500.0), np.random.uniform(-1000.0, 10000.0)])
364 |         else:
365 |             xy = center_list[np.random.choice(range(len(center_list)))]
366 |             new_center = xy + np.random.normal(500, 50, 2) * np.random.choice([1, -1], 2)
367 | 
368 |         return new_center
369 | 
370 |     def isvalid(self, new_center, bbox, bbox_list):
371 |         new_center_us = new_center.reshape(1, -1)
372 |         vis = 0
373 |         for k, cam in self.cameras.items():
374 |             width = 360
375 |             height = 288
376 |             loc_2d = project_pose(np.hstack((new_center_us, [[1000.0]])), cam)
377 |             if 10 < loc_2d[0, 0] < width - 10 and 10 < loc_2d[0, 1] < height - 10:
378 |                 vis += 1
379 | 
380 |         if len(bbox_list) == 0:
381 |             return vis >= 2
382 | 
383 |         bbox_list = np.array(bbox_list)
384 |         x0 = np.maximum(bbox[0], bbox_list[:, 0])
385 |         y0 = np.maximum(bbox[1], bbox_list[:, 1])
386 |         x1 = np.minimum(bbox[2], bbox_list[:, 2])
387 |         y1 = np.minimum(bbox[3], bbox_list[:, 3])
388 | 
389 |         intersection = np.maximum(0, (x1 - x0) * (y1 - y0))
390 |         area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
391 |         area_list = (bbox_list[:, 2] - bbox_list[:, 0]) * (bbox_list[:, 3] - bbox_list[:, 1])
392 |         iou_list = intersection / (area + area_list - intersection)
393 | 
394 |         return vis >= 2 and np.max(iou_list) < 0.01
395 | 
396 |     @staticmethod
397 |     def calc_bbox(pose, pose_vis):
398 |         index = pose_vis[:, 0] > 0
399 |         bbox = [np.min(pose[index, 0]), np.min(pose[index, 1]),
400 |                 np.max(pose[index, 0]), np.max(pose[index, 1])]
401 | 
402 |         return np.array(bbox)
403 | 


--------------------------------------------------------------------------------
/lib/dataset/panoptic.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import glob
 11 | import os.path as osp
 12 | import numpy as np
 13 | import json_tricks as json
 14 | import pickle
 15 | import logging
 16 | import os
 17 | import copy
 18 | 
 19 | from dataset.JointsDataset import JointsDataset
 20 | from utils.transforms import projectPoints
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | TRAIN_LIST = [
 25 |     '160422_ultimatum1',
 26 |     '160224_haggling1',
 27 |     '160226_haggling1',
 28 |     '161202_haggling1',
 29 |     '160906_ian1',
 30 |     '160906_ian2',
 31 |     '160906_ian3',
 32 |     '160906_band1',
 33 |     '160906_band2',
 34 |     '160906_band3',
 35 | ]
 36 | VAL_LIST = ['160906_pizza1', '160422_haggling1', '160906_ian5', '160906_band4']
 37 | 
 38 | JOINTS_DEF = {
 39 |     'neck': 0,
 40 |     'nose': 1,
 41 |     'mid-hip': 2,
 42 |     'l-shoulder': 3,
 43 |     'l-elbow': 4,
 44 |     'l-wrist': 5,
 45 |     'l-hip': 6,
 46 |     'l-knee': 7,
 47 |     'l-ankle': 8,
 48 |     'r-shoulder': 9,
 49 |     'r-elbow': 10,
 50 |     'r-wrist': 11,
 51 |     'r-hip': 12,
 52 |     'r-knee': 13,
 53 |     'r-ankle': 14,
 54 |     # 'l-eye': 15,
 55 |     # 'l-ear': 16,
 56 |     # 'r-eye': 17,
 57 |     # 'r-ear': 18,
 58 | }
 59 | 
 60 | LIMBS = [[0, 1],
 61 |          [0, 2],
 62 |          [0, 3],
 63 |          [3, 4],
 64 |          [4, 5],
 65 |          [0, 9],
 66 |          [9, 10],
 67 |          [10, 11],
 68 |          [2, 6],
 69 |          [2, 12],
 70 |          [6, 7],
 71 |          [7, 8],
 72 |          [12, 13],
 73 |          [13, 14]]
 74 | 
 75 | 
 76 | class Panoptic(JointsDataset):
 77 |     def __init__(self, cfg, image_set, is_train, transform=None):
 78 |         super().__init__(cfg, image_set, is_train, transform)
 79 |         self.pixel_std = 200.0
 80 |         self.joints_def = JOINTS_DEF
 81 |         self.limbs = LIMBS
 82 |         self.num_joints = len(JOINTS_DEF)
 83 | 
 84 |         if self.image_set == 'train':
 85 |             self.sequence_list = TRAIN_LIST
 86 |             self._interval = 3
 87 |             self.cam_list = [(0, 12), (0, 6), (0, 23), (0, 13), (0, 3)][:self.num_views]
 88 |             # self.cam_list = list(set([(0, n) for n in range(0, 31)]) - {(0, 12), (0, 6), (0, 23), (0, 13), (0, 3)})
 89 |             # self.cam_list.sort()
 90 |             self.num_views = len(self.cam_list)
 91 |         elif self.image_set == 'validation':
 92 |             self.sequence_list = VAL_LIST
 93 |             self._interval = 12
 94 |             self.cam_list = [(0, 12), (0, 6), (0, 23), (0, 13), (0, 3)][:self.num_views]
 95 |             self.num_views = len(self.cam_list)
 96 | 
 97 |         self.db_file = 'group_{}_cam{}.pkl'.format(self.image_set, self.num_views)
 98 |         self.db_file = os.path.join(self.dataset_root, self.db_file)
 99 | 
100 |         if osp.exists(self.db_file):
101 |             info = pickle.load(open(self.db_file, 'rb'))
102 |             assert info['sequence_list'] == self.sequence_list
103 |             assert info['interval'] == self._interval
104 |             assert info['cam_list'] == self.cam_list
105 |             self.db = info['db']
106 |         else:
107 |             self.db = self._get_db()
108 |             info = {
109 |                 'sequence_list': self.sequence_list,
110 |                 'interval': self._interval,
111 |                 'cam_list': self.cam_list,
112 |                 'db': self.db
113 |             }
114 |             pickle.dump(info, open(self.db_file, 'wb'))
115 |         # self.db = self._get_db()
116 |         self.db_size = len(self.db)
117 | 
118 |     def _get_db(self):
119 |         width = 1920
120 |         height = 1080
121 |         db = []
122 |         for seq in self.sequence_list:
123 | 
124 |             cameras = self._get_cam(seq)
125 | 
126 |             curr_anno = osp.join(self.dataset_root, seq, 'hdPose3d_stage1_coco19')
127 |             anno_files = sorted(glob.iglob('{:s}/*.json'.format(curr_anno)))
128 | 
129 |             for i, file in enumerate(anno_files):
130 |                 if i % self._interval == 0:
131 |                     with open(file) as dfile:
132 |                         bodies = json.load(dfile)['bodies']
133 |                     if len(bodies) == 0:
134 |                         continue
135 | 
136 |                     for k, v in cameras.items():
137 |                         postfix = osp.basename(file).replace('body3DScene', '')
138 |                         prefix = '{:02d}_{:02d}'.format(k[0], k[1])
139 |                         image = osp.join(seq, 'hdImgs', prefix,
140 |                                          prefix + postfix)
141 |                         image = image.replace('json', 'jpg')
142 | 
143 |                         all_poses_3d = []
144 |                         all_poses_vis_3d = []
145 |                         all_poses = []
146 |                         all_poses_vis = []
147 |                         for body in bodies:
148 |                             pose3d = np.array(body['joints19']).reshape((-1, 4))
149 |                             pose3d = pose3d[:self.num_joints]
150 | 
151 |                             joints_vis = pose3d[:, -1] > 0.1
152 | 
153 |                             if not joints_vis[self.root_id]:
154 |                                 continue
155 | 
156 |                             # Coordinate transformation
157 |                             M = np.array([[1.0, 0.0, 0.0],
158 |                                           [0.0, 0.0, -1.0],
159 |                                           [0.0, 1.0, 0.0]])
160 |                             pose3d[:, 0:3] = pose3d[:, 0:3].dot(M)
161 | 
162 |                             all_poses_3d.append(pose3d[:, 0:3] * 10.0)
163 |                             all_poses_vis_3d.append(
164 |                                 np.repeat(
165 |                                     np.reshape(joints_vis, (-1, 1)), 3, axis=1))
166 | 
167 |                             pose2d = np.zeros((pose3d.shape[0], 2))
168 |                             pose2d[:, :2] = projectPoints(
169 |                                 pose3d[:, 0:3].transpose(), v['K'], v['R'],
170 |                                 v['t'], v['distCoef']).transpose()[:, :2]
171 |                             x_check = np.bitwise_and(pose2d[:, 0] >= 0,
172 |                                                      pose2d[:, 0] <= width - 1)
173 |                             y_check = np.bitwise_and(pose2d[:, 1] >= 0,
174 |                                                      pose2d[:, 1] <= height - 1)
175 |                             check = np.bitwise_and(x_check, y_check)
176 |                             joints_vis[np.logical_not(check)] = 0
177 | 
178 |                             all_poses.append(pose2d)
179 |                             all_poses_vis.append(
180 |                                 np.repeat(
181 |                                     np.reshape(joints_vis, (-1, 1)), 2, axis=1))
182 | 
183 |                         if len(all_poses_3d) > 0:
184 |                             our_cam = {}
185 |                             our_cam['R'] = v['R']
186 |                             our_cam['T'] = -np.dot(v['R'].T, v['t']) * 10.0  # cm to mm
187 |                             our_cam['fx'] = np.array(v['K'][0, 0])
188 |                             our_cam['fy'] = np.array(v['K'][1, 1])
189 |                             our_cam['cx'] = np.array(v['K'][0, 2])
190 |                             our_cam['cy'] = np.array(v['K'][1, 2])
191 |                             our_cam['k'] = v['distCoef'][[0, 1, 4]].reshape(3, 1)
192 |                             our_cam['p'] = v['distCoef'][[2, 3]].reshape(2, 1)
193 | 
194 |                             db.append({
195 |                                 'key': "{}_{}{}".format(seq, prefix, postfix.split('.')[0]),
196 |                                 'image': osp.join(self.dataset_root, image),
197 |                                 'joints_3d': all_poses_3d,
198 |                                 'joints_3d_vis': all_poses_vis_3d,
199 |                                 'joints_2d': all_poses,
200 |                                 'joints_2d_vis': all_poses_vis,
201 |                                 'camera': our_cam
202 |                             })
203 |         return db
204 | 
205 |     def _get_cam(self, seq):
206 |         cam_file = osp.join(self.dataset_root, seq, 'calibration_{:s}.json'.format(seq))
207 |         with open(cam_file) as cfile:
208 |             calib = json.load(cfile)
209 | 
210 |         M = np.array([[1.0, 0.0, 0.0],
211 |                       [0.0, 0.0, -1.0],
212 |                       [0.0, 1.0, 0.0]])
213 |         cameras = {}
214 |         for cam in calib['cameras']:
215 |             if (cam['panel'], cam['node']) in self.cam_list:
216 |                 sel_cam = {}
217 |                 sel_cam['K'] = np.array(cam['K'])
218 |                 sel_cam['distCoef'] = np.array(cam['distCoef'])
219 |                 sel_cam['R'] = np.array(cam['R']).dot(M)
220 |                 sel_cam['t'] = np.array(cam['t']).reshape((3, 1))
221 |                 cameras[(cam['panel'], cam['node'])] = sel_cam
222 |         return cameras
223 | 
224 |     def __getitem__(self, idx):
225 |         input, target, weight, target_3d, meta, input_heatmap = [], [], [], [], [], []
226 | 
227 |         # if self.image_set == 'train':
228 |         #     # camera_num = np.random.choice([5], size=1)
229 |         #     select_cam = np.random.choice(self.num_views, size=5, replace=False)
230 |         # elif self.image_set == 'validation':
231 |         #     select_cam = list(range(self.num_views))
232 | 
233 |         for k in range(self.num_views):
234 |             i, t, w, t3, m, ih = super().__getitem__(self.num_views * idx + k)
235 |             if i is None:
236 |                 continue
237 |             input.append(i)
238 |             target.append(t)
239 |             weight.append(w)
240 |             target_3d.append(t3)
241 |             meta.append(m)
242 |             input_heatmap.append(ih)
243 |         return input, target, weight, target_3d, meta, input_heatmap
244 | 
245 |     def __len__(self):
246 |         return self.db_size // self.num_views
247 | 
248 |     def evaluate(self, preds):
249 |         eval_list = []
250 |         gt_num = self.db_size // self.num_views
251 |         assert len(preds) == gt_num, 'number mismatch'
252 | 
253 |         total_gt = 0
254 |         for i in range(gt_num):
255 |             index = self.num_views * i
256 |             db_rec = copy.deepcopy(self.db[index])
257 |             joints_3d = db_rec['joints_3d']
258 |             joints_3d_vis = db_rec['joints_3d_vis']
259 | 
260 |             if len(joints_3d) == 0:
261 |                 continue
262 | 
263 |             pred = preds[i].copy()
264 |             pred = pred[pred[:, 0, 3] >= 0]
265 |             for pose in pred:
266 |                 mpjpes = []
267 |                 for (gt, gt_vis) in zip(joints_3d, joints_3d_vis):
268 |                     vis = gt_vis[:, 0] > 0
269 |                     mpjpe = np.mean(np.sqrt(np.sum((pose[vis, 0:3] - gt[vis]) ** 2, axis=-1)))
270 |                     mpjpes.append(mpjpe)
271 |                 min_gt = np.argmin(mpjpes)
272 |                 min_mpjpe = np.min(mpjpes)
273 |                 score = pose[0, 4]
274 |                 eval_list.append({
275 |                     "mpjpe": float(min_mpjpe),
276 |                     "score": float(score),
277 |                     "gt_id": int(total_gt + min_gt)
278 |                 })
279 | 
280 |             total_gt += len(joints_3d)
281 | 
282 |         mpjpe_threshold = np.arange(25, 155, 25)
283 |         aps = []
284 |         recs = []
285 |         for t in mpjpe_threshold:
286 |             ap, rec = self._eval_list_to_ap(eval_list, total_gt, t)
287 |             aps.append(ap)
288 |             recs.append(rec)
289 | 
290 |         return aps, recs, self._eval_list_to_mpjpe(eval_list), self._eval_list_to_recall(eval_list, total_gt)
291 | 
292 |     @staticmethod
293 |     def _eval_list_to_ap(eval_list, total_gt, threshold):
294 |         eval_list.sort(key=lambda k: k["score"], reverse=True)
295 |         total_num = len(eval_list)
296 | 
297 |         tp = np.zeros(total_num)
298 |         fp = np.zeros(total_num)
299 |         gt_det = []
300 |         for i, item in enumerate(eval_list):
301 |             if item["mpjpe"] < threshold and item["gt_id"] not in gt_det:
302 |                 tp[i] = 1
303 |                 gt_det.append(item["gt_id"])
304 |             else:
305 |                 fp[i] = 1
306 |         tp = np.cumsum(tp)
307 |         fp = np.cumsum(fp)
308 |         recall = tp / (total_gt + 1e-5)
309 |         precise = tp / (tp + fp + 1e-5)
310 |         for n in range(total_num - 2, -1, -1):
311 |             precise[n] = max(precise[n], precise[n + 1])
312 | 
313 |         precise = np.concatenate(([0], precise, [0]))
314 |         recall = np.concatenate(([0], recall, [1]))
315 |         index = np.where(recall[1:] != recall[:-1])[0]
316 |         ap = np.sum((recall[index + 1] - recall[index]) * precise[index + 1])
317 | 
318 |         return ap, recall[-2]
319 | 
320 |     @staticmethod
321 |     def _eval_list_to_mpjpe(eval_list, threshold=500):
322 |         eval_list.sort(key=lambda k: k["score"], reverse=True)
323 |         gt_det = []
324 | 
325 |         mpjpes = []
326 |         for i, item in enumerate(eval_list):
327 |             if item["mpjpe"] < threshold and item["gt_id"] not in gt_det:
328 |                 mpjpes.append(item["mpjpe"])
329 |                 gt_det.append(item["gt_id"])
330 | 
331 |         return np.mean(mpjpes) if len(mpjpes) > 0 else np.inf
332 | 
333 |     @staticmethod
334 |     def _eval_list_to_recall(eval_list, total_gt, threshold=500):
335 |         gt_ids = [e["gt_id"] for e in eval_list if e["mpjpe"] < threshold]
336 | 
337 |         return len(np.unique(gt_ids)) / total_gt
338 | 
339 | 
340 | 
341 | 
342 | 


--------------------------------------------------------------------------------
/lib/dataset/shelf.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import os.path as osp
 11 | import numpy as np
 12 | import json_tricks as json
 13 | import pickle
 14 | import scipy.io as scio
 15 | import logging
 16 | import copy
 17 | import os
 18 | from collections import OrderedDict
 19 | 
 20 | from dataset.JointsDataset import JointsDataset
 21 | from utils.cameras_cpu import project_pose
 22 | 
 23 | SHELF_JOINTS_DEF = {
 24 |     'Right-Ankle': 0,
 25 |     'Right-Knee': 1,
 26 |     'Right-Hip': 2,
 27 |     'Left-Hip': 3,
 28 |     'Left-Knee': 4,
 29 |     'Left-Ankle': 5,
 30 |     'Right-Wrist': 6,
 31 |     'Right-Elbow': 7,
 32 |     'Right-Shoulder': 8,
 33 |     'Left-Shoulder': 9,
 34 |     'Left-Elbow': 10,
 35 |     'Left-Wrist': 11,
 36 |     'Bottom-Head': 12,
 37 |     'Top-Head': 13
 38 | }
 39 | 
 40 | LIMBS = [
 41 |     [0, 1],
 42 |     [1, 2],
 43 |     [3, 4],
 44 |     [4, 5],
 45 |     [2, 3],
 46 |     [6, 7],
 47 |     [7, 8],
 48 |     [9, 10],
 49 |     [10, 11],
 50 |     [2, 8],
 51 |     [3, 9],
 52 |     [8, 12],
 53 |     [9, 12],
 54 |     [12, 13]
 55 | ]
 56 | 
 57 | 
 58 | class Shelf(JointsDataset):
 59 |     def __init__(self, cfg, image_set, is_train, transform=None):
 60 |         self.pixel_std = 200.0
 61 |         self.joints_def = SHELF_JOINTS_DEF
 62 |         super().__init__(cfg, image_set, is_train, transform)
 63 |         self.limbs = LIMBS
 64 |         self.num_joints = len(SHELF_JOINTS_DEF)
 65 |         self.cam_list = [0, 1, 2, 3, 4]
 66 |         self.num_views = len(self.cam_list)
 67 |         self.frame_range = list(range(300,  601))
 68 | 
 69 |         self.pred_pose2d = self._get_pred_pose2d()
 70 |         self.db = self._get_db()
 71 | 
 72 |         self.db_size = len(self.db)
 73 | 
 74 |     def _get_pred_pose2d(self):
 75 |         file = os.path.join(self.dataset_root, "pred_shelf_maskrcnn_hrnet_coco.pkl")
 76 |         with open(file, "rb") as pfile:
 77 |             logging.info("=> load {}".format(file))
 78 |             pred_2d = pickle.load(pfile)
 79 | 
 80 |         return pred_2d
 81 | 
 82 |     def _get_db(self):
 83 |         width = 1032
 84 |         height = 776
 85 | 
 86 |         db = []
 87 |         cameras = self._get_cam()
 88 | 
 89 |         datafile = os.path.join(self.dataset_root, 'actorsGT.mat')
 90 |         data = scio.loadmat(datafile)
 91 |         actor_3d = np.array(np.array(data['actor3D'].tolist()).tolist()).squeeze()  # num_person * num_frame
 92 | 
 93 |         num_person = len(actor_3d)
 94 |         num_frames = len(actor_3d[0])
 95 | 
 96 |         for i in self.frame_range:
 97 |             for k, cam in cameras.items():
 98 |                 image = osp.join("Camera" + k, "img_{:06d}.png".format(i))
 99 | 
100 |                 all_poses_3d = []
101 |                 all_poses_vis_3d = []
102 |                 all_poses = []
103 |                 all_poses_vis = []
104 |                 for person in range(num_person):
105 |                     pose3d = actor_3d[person][i] * 1000.0
106 |                     if len(pose3d[0]) > 0:
107 |                         all_poses_3d.append(pose3d)
108 |                         all_poses_vis_3d.append(np.ones((self.num_joints, 3)))
109 | 
110 |                         pose2d = project_pose(pose3d, cam)
111 | 
112 |                         x_check = np.bitwise_and(pose2d[:, 0] >= 0,
113 |                                                  pose2d[:, 0] <= width - 1)
114 |                         y_check = np.bitwise_and(pose2d[:, 1] >= 0,
115 |                                                  pose2d[:, 1] <= height - 1)
116 |                         check = np.bitwise_and(x_check, y_check)
117 | 
118 |                         joints_vis = np.ones((len(pose2d), 1))
119 |                         joints_vis[np.logical_not(check)] = 0
120 |                         all_poses.append(pose2d)
121 |                         all_poses_vis.append(
122 |                             np.repeat(
123 |                                 np.reshape(joints_vis, (-1, 1)), 2, axis=1))
124 | 
125 |                 pred_index = '{}_{}'.format(k, i)
126 |                 preds = self.pred_pose2d[pred_index]
127 |                 preds = [np.array(p["pred"]) for p in preds]
128 |                 db.append({
129 |                     'image': osp.join(self.dataset_root, image),
130 |                     'joints_3d': all_poses_3d,
131 |                     'joints_3d_vis': all_poses_vis_3d,
132 |                     'joints_2d': all_poses,
133 |                     'joints_2d_vis': all_poses_vis,
134 |                     'camera': cam,
135 |                     'pred_pose2d': preds
136 |                 })
137 | 
138 |         return db
139 | 
140 |     def _get_cam(self):
141 |         cam_file = osp.join(self.dataset_root, "calibration_shelf.json")
142 |         with open(cam_file) as cfile:
143 |             cameras = json.load(cfile)
144 | 
145 |         for id, cam in cameras.items():
146 |             for k, v in cam.items():
147 |                 cameras[id][k] = np.array(v)
148 | 
149 |         return cameras
150 | 
151 |     def __getitem__(self, idx):
152 |         input, target_heatmap, target_weight, target_3d, meta, input_heatmap = [], [], [], [], [], []
153 |         for k in range(self.num_views):
154 |             i, th, tw, t3, m, ih = super().__getitem__(self.num_views * idx + k)
155 |             input.append(i)
156 |             target_heatmap.append(th)
157 |             target_weight.append(tw)
158 |             input_heatmap.append(ih)
159 |             target_3d.append(t3)
160 |             meta.append(m)
161 |         return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
162 | 
163 |     def __len__(self):
164 |         return self.db_size // self.num_views
165 | 
166 |     def evaluate(self, preds, recall_threshold=500):
167 |         datafile = os.path.join(self.dataset_root, 'actorsGT.mat')
168 |         data = scio.loadmat(datafile)
169 |         actor_3d = np.array(np.array(data['actor3D'].tolist()).tolist()).squeeze()  # num_person * num_frame
170 |         num_person = len(actor_3d)
171 |         total_gt = 0
172 |         match_gt = 0
173 | 
174 |         limbs = [[0, 1], [1, 2], [3, 4], [4, 5], [6, 7], [7, 8], [9, 10], [10, 11], [12, 13]]
175 |         correct_parts = np.zeros(num_person)
176 |         total_parts = np.zeros(num_person)
177 |         alpha = 0.5
178 |         bone_correct_parts = np.zeros((num_person, 10))
179 | 
180 |         for i, fi in enumerate(self.frame_range):
181 |             pred_coco = preds[i].copy()
182 |             pred_coco = pred_coco[pred_coco[:, 0, 3] >= 0, :, :3]
183 |             pred = np.stack([self.coco2shelf3D(p) for p in copy.deepcopy(pred_coco[:, :, :3])])
184 | 
185 |             for person in range(num_person):
186 |                 gt = actor_3d[person][fi] * 1000.0
187 |                 if len(gt[0]) == 0:
188 |                     continue
189 | 
190 |                 mpjpes = np.mean(np.sqrt(np.sum((gt[np.newaxis] - pred) ** 2, axis=-1)), axis=-1)
191 |                 min_n = np.argmin(mpjpes)
192 |                 min_mpjpe = np.min(mpjpes)
193 |                 if min_mpjpe < recall_threshold:
194 |                     match_gt += 1
195 |                 total_gt += 1
196 | 
197 |                 for j, k in enumerate(limbs):
198 |                     total_parts[person] += 1
199 |                     error_s = np.linalg.norm(pred[min_n, k[0], 0:3] - gt[k[0]])
200 |                     error_e = np.linalg.norm(pred[min_n, k[1], 0:3] - gt[k[1]])
201 |                     limb_length = np.linalg.norm(gt[k[0]] - gt[k[1]])
202 |                     if (error_s + error_e) / 2.0 <= alpha * limb_length:
203 |                         correct_parts[person] += 1
204 |                         bone_correct_parts[person, j] += 1
205 |                 pred_hip = (pred[min_n, 2, 0:3] + pred[min_n, 3, 0:3]) / 2.0
206 |                 gt_hip = (gt[2] + gt[3]) / 2.0
207 |                 total_parts[person] += 1
208 |                 error_s = np.linalg.norm(pred_hip - gt_hip)
209 |                 error_e = np.linalg.norm(pred[min_n, 12, 0:3] - gt[12])
210 |                 limb_length = np.linalg.norm(gt_hip - gt[12])
211 |                 if (error_s + error_e) / 2.0 <= alpha * limb_length:
212 |                     correct_parts[person] += 1
213 |                     bone_correct_parts[person, 9] += 1
214 | 
215 |         actor_pcp = correct_parts / (total_parts + 1e-8)
216 |         avg_pcp = np.mean(actor_pcp[:3])
217 | 
218 |         bone_group = OrderedDict(
219 |             [('Head', [8]), ('Torso', [9]), ('Upper arms', [5, 6]),
220 |              ('Lower arms', [4, 7]), ('Upper legs', [1, 2]), ('Lower legs', [0, 3])])
221 |         bone_person_pcp = OrderedDict()
222 |         for k, v in bone_group.items():
223 |             bone_person_pcp[k] = np.sum(bone_correct_parts[:, v], axis=-1) / (total_parts / 10 * len(v) + 1e-8)
224 | 
225 |         return actor_pcp, avg_pcp, bone_person_pcp, match_gt / (total_gt + 1e-8)
226 | 
227 |     @staticmethod
228 |     def coco2shelf3D(coco_pose):
229 |         """
230 |         transform coco order(our method output) 3d pose to shelf dataset order with interpolation
231 |         :param coco_pose: np.array with shape 17x3
232 |         :return: 3D pose in shelf order with shape 14x3
233 |         """
234 |         shelf_pose = np.zeros((14, 3))
235 |         coco2shelf = np.array([16, 14, 12, 11, 13, 15, 10, 8, 6, 5, 7, 9])
236 |         shelf_pose[0: 12] += coco_pose[coco2shelf]
237 | 
238 |         mid_sho = (coco_pose[5] + coco_pose[6]) / 2  # L and R shoulder
239 |         head_center = (coco_pose[3] + coco_pose[4]) / 2  # middle of two ear
240 | 
241 |         head_bottom = (mid_sho + head_center) / 2  # nose and head center
242 |         head_top = head_bottom + (head_center - head_bottom) * 2
243 |         # shelf_pose[12] += head_bottom
244 |         # shelf_pose[13] += head_top
245 | 
246 |         shelf_pose[12] = (shelf_pose[8] + shelf_pose[9]) / 2  # Use middle of shoulder to init
247 |         shelf_pose[13] = coco_pose[0]  # use nose to init
248 | 
249 |         shelf_pose[13] = shelf_pose[12] + (shelf_pose[13] - shelf_pose[12]) * np.array([0.75, 0.75, 1.5])
250 |         shelf_pose[12] = shelf_pose[12] + (coco_pose[0] - shelf_pose[12]) * np.array([0.5, 0.5, 0.5])
251 | 
252 |         alpha = 0.75
253 |         shelf_pose[13] = shelf_pose[13] * alpha + head_top * (1 - alpha)
254 |         shelf_pose[12] = shelf_pose[12] * alpha + head_bottom * (1 - alpha)
255 | 
256 |         return shelf_pose
257 | 
258 | 
259 | 
260 | 
261 | 
262 | 
263 | 
264 | 


--------------------------------------------------------------------------------
/lib/dataset/shelf_synthetic.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import os.path as osp
 11 | import numpy as np
 12 | import torch
 13 | from torch.utils.data import Dataset
 14 | 
 15 | import json_tricks as json
 16 | import pickle
 17 | import logging
 18 | import copy
 19 | import random
 20 | import cv2
 21 | 
 22 | import os
 23 | 
 24 | from utils.transforms import get_affine_transform
 25 | from utils.transforms import affine_transform
 26 | from utils.transforms import rotate_points, get_scale
 27 | from utils.cameras_cpu import project_pose
 28 | 
 29 | logger = logging.getLogger(__name__)
 30 | 
 31 | coco_joints_def = {0: 'nose',
 32 |                    1: 'Leye', 2: 'Reye', 3: 'Lear', 4: 'Rear',
 33 |                    5: 'Lsho', 6: 'Rsho',
 34 |                    7: 'Lelb', 8: 'Relb',
 35 |                    9: 'Lwri', 10: 'Rwri',
 36 |                    11: 'Lhip', 12: 'Rhip',
 37 |                    13: 'Lkne', 14: 'Rkne',
 38 |                    15: 'Lank', 16: 'Rank'}
 39 | 
 40 | LIMBS = [[0, 1], [0, 2], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7], [7, 9], [6, 8], [8, 10], [5, 11], [11, 13], [13, 15],
 41 |         [6, 12], [12, 14], [14, 16], [5, 6], [11, 12]]
 42 | 
 43 | 
 44 | class ShelfSynthetic(Dataset):
 45 |     def __init__(self, cfg, image_set, is_train, transform=None):
 46 |         super().__init__()
 47 |         self.pixel_std = 200.0
 48 |         self.joints_def = coco_joints_def
 49 |         self.limbs = LIMBS
 50 |         self.num_joints = len(coco_joints_def)
 51 |         self.cam_list = [0, 1, 2, 3, 4]
 52 |         self.num_views = len(self.cam_list)
 53 |         self.maximum_person = cfg.MULTI_PERSON.MAX_PEOPLE_NUM
 54 | 
 55 |         self.is_train = is_train
 56 | 
 57 |         this_dir = os.path.dirname(__file__)
 58 |         dataset_root = os.path.join(this_dir, '../..', cfg.DATASET.ROOT)
 59 |         self.dataset_root = dataset_root
 60 |         self.image_set = image_set
 61 |         self.dataset_name = cfg.DATASET.TEST_DATASET
 62 | 
 63 |         self.data_format = cfg.DATASET.DATA_FORMAT
 64 |         self.data_augmentation = cfg.DATASET.DATA_AUGMENTATION
 65 | 
 66 |         self.color_rgb = cfg.DATASET.COLOR_RGB
 67 | 
 68 |         self.target_type = cfg.NETWORK.TARGET_TYPE
 69 |         self.image_size = np.array(cfg.NETWORK.IMAGE_SIZE)
 70 |         self.heatmap_size = np.array(cfg.NETWORK.HEATMAP_SIZE)
 71 |         self.sigma = cfg.NETWORK.SIGMA
 72 |         self.use_different_joints_weight = cfg.LOSS.USE_DIFFERENT_JOINTS_WEIGHT
 73 |         self.joints_weight = 1
 74 | 
 75 |         self.transform = transform
 76 | 
 77 |         self.space_size = np.array(cfg.MULTI_PERSON.SPACE_SIZE)
 78 |         self.space_center = np.array(cfg.MULTI_PERSON.SPACE_CENTER)
 79 |         self.initial_cube_size = np.array(cfg.MULTI_PERSON.INITIAL_CUBE_SIZE)
 80 | 
 81 |         pose_db_file = os.path.join(self.dataset_root, "..", "panoptic_training_pose.pkl")
 82 |         self.pose_db = pickle.load(open(pose_db_file, "rb"))
 83 |         self.cameras = self._get_cam()
 84 | 
 85 |     def _get_cam(self):
 86 |         cam_file = osp.join(self.dataset_root, "calibration_shelf.json")
 87 |         with open(cam_file) as cfile:
 88 |             cameras = json.load(cfile)
 89 | 
 90 |         for id, cam in cameras.items():
 91 |             for k, v in cam.items():
 92 |                 cameras[id][k] = np.array(v)
 93 | 
 94 |         return cameras
 95 | 
 96 |     def __getitem__(self, idx):
 97 |         # nposes = np.random.choice([1, 2, 3, 4, 5], p=[0.1, 0.1, 0.2, 0.4, 0.2])
 98 |         nposes = np.random.choice(range(1, 6))
 99 |         bbox_list = []
100 |         center_list = []
101 | 
102 |         select_poses = np.random.choice(self.pose_db, nposes)
103 |         joints_3d = np.array([p['pose'] for p in select_poses])
104 |         joints_3d_vis = np.array([p['vis'] for p in select_poses])
105 | 
106 |         for n in range(0, nposes):
107 |             points = joints_3d[n][:, :2].copy()
108 |             center = (points[11, :2] + points[12, :2]) / 2
109 |             rot_rad = np.random.uniform(-180, 180)
110 | 
111 |             new_center = self.get_new_center(center_list)
112 |             new_xy = rotate_points(points, center, rot_rad) - center + new_center
113 | 
114 |             loop_count = 0
115 |             while not self.isvalid(self.calc_bbox(new_xy, joints_3d_vis[n]), bbox_list):
116 |                 loop_count += 1
117 |                 if loop_count >= 100:
118 |                     break
119 |                 new_center = self.get_new_center(center_list)
120 |                 new_xy = rotate_points(points, center, rot_rad) - center + new_center
121 | 
122 |             if loop_count >= 100:
123 |                 nposes = n
124 |                 joints_3d = joints_3d[:n]
125 |                 joints_3d_vis = joints_3d_vis[:n]
126 |             else:
127 |                 center_list.append(new_center)
128 |                 bbox_list.append(self.calc_bbox(new_xy, joints_3d_vis[n]))
129 |                 joints_3d[n][:, :2] = new_xy
130 | 
131 |         input, target_heatmap, target_weight, target_3d, meta, input_heatmap = [], [], [], [], [], []
132 |         for k, cam in self.cameras.items():
133 |             i, th, tw, t3, m, ih = self._get_single_view_item(joints_3d, joints_3d_vis, cam)
134 |             input.append(i)
135 |             target_heatmap.append(th)
136 |             target_weight.append(tw)
137 |             input_heatmap.append(ih)
138 |             target_3d.append(t3)
139 |             meta.append(m)
140 |         return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
141 | 
142 |     def __len__(self):
143 |         return 3000
144 |         # return self.db_size // self.num_views
145 | 
146 |     def _get_single_view_item(self, joints_3d, joints_3d_vis, cam):
147 |         joints_3d = copy.deepcopy(joints_3d)
148 |         joints_3d_vis = copy.deepcopy(joints_3d_vis)
149 |         nposes = len(joints_3d)
150 | 
151 |         width = 1032
152 |         height = 776
153 |         c = np.array([width / 2.0, height / 2.0], dtype=np.float32)
154 |         # s = np.array(
155 |         #     [width / self.pixel_std, height / self.pixel_std], dtype=np.float32)
156 |         s = get_scale((width, height), self.image_size)
157 |         r = 0
158 | 
159 |         joints = []
160 |         joints_vis = []
161 |         for n in range(nposes):
162 |             pose2d = project_pose(joints_3d[n], cam)
163 | 
164 |             x_check = np.bitwise_and(pose2d[:, 0] >= 0,
165 |                                      pose2d[:, 0] <= width - 1)
166 |             y_check = np.bitwise_and(pose2d[:, 1] >= 0,
167 |                                      pose2d[:, 1] <= height - 1)
168 |             check = np.bitwise_and(x_check, y_check)
169 |             vis = joints_3d_vis[n][:, 0] > 0
170 |             vis[np.logical_not(check)] = 0
171 | 
172 |             joints.append(pose2d)
173 |             joints_vis.append(np.repeat(np.reshape(vis, (-1, 1)), 2, axis=1))
174 | 
175 |         trans = get_affine_transform(c, s, r, self.image_size)
176 |         input = np.ones((height, width, 3), dtype=np.float32)
177 |         input = cv2.warpAffine(
178 |             input,
179 |             trans, (int(self.image_size[0]), int(self.image_size[1])),
180 |             flags=cv2.INTER_LINEAR)
181 | 
182 |         if self.transform:
183 |             input = self.transform(input)
184 | 
185 |         for n in range(nposes):
186 |             for i in range(len(joints[0])):
187 |                 if joints_vis[n][i, 0] > 0.0:
188 |                     joints[n][i, 0:2] = affine_transform(
189 |                         joints[n][i, 0:2], trans)
190 |                     if (np.min(joints[n][i, :2]) < 0 or
191 |                             joints[n][i, 0] >= self.image_size[0] or
192 |                             joints[n][i, 1] >= self.image_size[1]):
193 |                         joints_vis[n][i, :] = 0
194 | 
195 |         input_heatmap, _ = self.generate_input_heatmap(
196 |             joints, joints_vis)
197 |         input_heatmap = torch.from_numpy(input_heatmap)
198 |         target_heatmap = torch.zeros_like(input_heatmap)
199 |         target_weight = torch.zeros(len(target_heatmap), 1)
200 | 
201 |         # make joints and joints_vis having same shape
202 |         joints_u = np.zeros((self.maximum_person, len(joints[0]), 2))
203 |         joints_vis_u = np.zeros((self.maximum_person, len(joints[0]), 2))
204 |         for i in range(nposes):
205 |             joints_u[i] = joints[i]
206 |             joints_vis_u[i] = joints_vis[i]
207 | 
208 |         joints_3d_u = np.zeros((self.maximum_person, len(joints[0]), 3))
209 |         joints_3d_vis_u = np.zeros((self.maximum_person, len(joints[0]), 3))
210 |         for i in range(nposes):
211 |             joints_3d_u[i] = joints_3d[i][:, 0:3]
212 |             joints_3d_vis_u[i] = joints_3d_vis[i][:, 0:3]
213 | 
214 |         target_3d = self.generate_3d_target(joints_3d)
215 |         target_3d = torch.from_numpy(target_3d)
216 | 
217 |         meta = {
218 |             'image': '',
219 |             'num_person': nposes,
220 |             'joints_3d': joints_3d_u,
221 |             'roots_3d': (joints_3d_u[:, 11] + joints_3d_u[:, 12]) / 2.0,
222 |             'joints_3d_vis': joints_3d_vis_u,
223 |             'joints': joints_u,
224 |             'joints_vis': joints_vis_u,
225 |             'center': c,
226 |             'scale': s,
227 |             'rotation': r,
228 |             'camera': cam
229 |         }
230 | 
231 |         return input, target_heatmap, target_weight, target_3d, meta, input_heatmap
232 | 
233 |     @staticmethod
234 |     def compute_human_scale(pose, joints_vis):
235 |         idx = joints_vis[:, 0] == 1
236 |         if np.sum(idx) == 0:
237 |             return 0
238 |         minx, maxx = np.min(pose[idx, 0]), np.max(pose[idx, 0])
239 |         miny, maxy = np.min(pose[idx, 1]), np.max(pose[idx, 1])
240 |         return np.clip(np.maximum(maxy - miny, maxx - minx) ** 2, 1.0 / 4 * 96 ** 2, 4 * 96 ** 2)
241 | 
242 |     def generate_input_heatmap(self, joints, joints_vis):
243 |         '''
244 |         :param joints:  [[num_joints, 3]]
245 |         :param joints_vis: [num_joints, 3]
246 |         :return: input_heatmap
247 |         '''
248 |         nposes = len(joints)
249 |         num_joints = joints[0].shape[0]
250 |         target_weight = np.zeros((num_joints, 1), dtype=np.float32)
251 |         for i in range(num_joints):
252 |             for n in range(nposes):
253 |                 if joints_vis[n][i, 0] == 1:
254 |                     target_weight[i, 0] = 1
255 | 
256 |         assert self.target_type == 'gaussian', \
257 |             'Only support gaussian map now!'
258 | 
259 |         if self.target_type == 'gaussian':
260 |             target = np.zeros(
261 |                 (num_joints, self.heatmap_size[1], self.heatmap_size[0]),
262 |                 dtype=np.float32)
263 |             feat_stride = self.image_size / self.heatmap_size
264 | 
265 |             for n in range(nposes):
266 |                 obscured = random.random() < 0.05
267 |                 if obscured:
268 |                     continue
269 |                 human_scale = 2 * self.compute_human_scale(joints[n] / feat_stride, joints_vis[n])
270 |                 if human_scale == 0:
271 |                     continue
272 | 
273 |                 cur_sigma = self.sigma * np.sqrt((human_scale / (96.0 * 96.0)))
274 |                 tmp_size = cur_sigma * 3
275 |                 for joint_id in range(num_joints):
276 |                     feat_stride = self.image_size / self.heatmap_size
277 |                     mu_x = int(joints[n][joint_id][0] / feat_stride[0])
278 |                     mu_y = int(joints[n][joint_id][1] / feat_stride[1])
279 |                     ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
280 |                     br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
281 |                     if joints_vis[n][joint_id, 0] == 0 or \
282 |                             ul[0] >= self.heatmap_size[0] or \
283 |                             ul[1] >= self.heatmap_size[1] \
284 |                             or br[0] < 0 or br[1] < 0:
285 |                         continue
286 | 
287 |                     size = 2 * tmp_size + 1
288 |                     x = np.arange(0, size, 1, np.float32)
289 |                     y = x[:, np.newaxis]
290 |                     x0 = y0 = size // 2
291 |                     # scale = 1 - np.abs(np.random.randn(1) * 0.25)
292 |                     scale = 0.9 + np.random.randn(1) * 0.03 if random.random() < 0.6 else 1.0
293 |                     if joint_id in [7, 8, 13, 14]:
294 |                         scale = scale * 0.5 if random.random() < 0.1 else scale
295 |                     elif joint_id in [9, 10, 15, 16]:
296 |                         scale = scale * 0.2 if random.random() < 0.1 else scale
297 |                     else:
298 |                         scale = scale * 0.5 if random.random() < 0.05 else scale
299 |                     g = np.exp(
300 |                         -((x - x0) ** 2 + (y - y0) ** 2) / (2 * cur_sigma ** 2)) * scale
301 | 
302 |                     # Usable gaussian range
303 |                     g_x = max(0,
304 |                               -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
305 |                     g_y = max(0,
306 |                               -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
307 |                     # Image range
308 |                     img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
309 |                     img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
310 | 
311 |                     target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
312 |                         target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]],
313 |                         g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
314 |                 target = np.clip(target, 0, 1)
315 | 
316 |         if self.use_different_joints_weight:
317 |             target_weight = np.multiply(target_weight, self.joints_weight)
318 | 
319 |         return target, target_weight
320 | 
321 |     def generate_3d_target(self, joints_3d):
322 |         num_people = len(joints_3d)
323 | 
324 |         space_size = self.space_size
325 |         space_center = self.space_center
326 |         cube_size = self.initial_cube_size
327 |         grid1Dx = np.linspace(-space_size[0] / 2, space_size[0] / 2, cube_size[0]) + space_center[0]
328 |         grid1Dy = np.linspace(-space_size[1] / 2, space_size[1] / 2, cube_size[1]) + space_center[1]
329 |         grid1Dz = np.linspace(-space_size[2] / 2, space_size[2] / 2, cube_size[2]) + space_center[2]
330 | 
331 |         target = np.zeros((cube_size[0], cube_size[1], cube_size[2]), dtype=np.float32)
332 |         cur_sigma = 200.0
333 | 
334 |         for n in range(num_people):
335 |             joint_id = [11, 12]  # mid-hip
336 |             mu_x = (joints_3d[n][joint_id[0]][0] + joints_3d[n][joint_id[1]][0]) / 2.0
337 |             mu_y = (joints_3d[n][joint_id[0]][1] + joints_3d[n][joint_id[1]][1]) / 2.0
338 |             mu_z = (joints_3d[n][joint_id[0]][2] + joints_3d[n][joint_id[1]][2]) / 2.0
339 | 
340 |             i_x = [np.searchsorted(grid1Dx, mu_x - 3 * cur_sigma),
341 |                    np.searchsorted(grid1Dx, mu_x + 3 * cur_sigma, 'right')]
342 |             i_y = [np.searchsorted(grid1Dy, mu_y - 3 * cur_sigma),
343 |                    np.searchsorted(grid1Dy, mu_y + 3 * cur_sigma, 'right')]
344 |             i_z = [np.searchsorted(grid1Dz, mu_z - 3 * cur_sigma),
345 |                    np.searchsorted(grid1Dz, mu_z + 3 * cur_sigma, 'right')]
346 |             if i_x[0] >= i_x[1] or i_y[0] >= i_y[1] or i_z[0] >= i_z[1]:
347 |                 continue
348 | 
349 |             gridx, gridy, gridz = np.meshgrid(grid1Dx[i_x[0]:i_x[1]], grid1Dy[i_y[0]:i_y[1]], grid1Dz[i_z[0]:i_z[1]],
350 |                                               indexing='ij')
351 |             g = np.exp(-((gridx - mu_x) ** 2 + (gridy - mu_y) ** 2 + (gridz - mu_z) ** 2) / (2 * cur_sigma ** 2))
352 |             target[i_x[0]:i_x[1], i_y[0]:i_y[1], i_z[0]:i_z[1]] = np.maximum(
353 |                 target[i_x[0]:i_x[1], i_y[0]:i_y[1], i_z[0]:i_z[1]], g)
354 | 
355 |         target = np.clip(target, 0, 1)
356 |         return target
357 | 
358 |     def evaluate(self):
359 |         pass
360 | 
361 |     @staticmethod
362 |     def get_new_center(center_list):
363 |         if len(center_list) == 0 or random.random() < 0.7:
364 |             new_center = np.array([np.random.uniform(-1000.0, 2000.0), np.random.uniform(-1600.0, 1600.0)])
365 |         else:
366 |             xy = center_list[np.random.choice(range(len(center_list)))]
367 |             new_center = xy + np.random.normal(500, 50, 2) * np.random.choice([1, -1], 2)
368 | 
369 |         return new_center
370 | 
371 |     @staticmethod
372 |     def isvalid(bbox, bbox_list):
373 |         if len(bbox_list) == 0:
374 |             return True
375 | 
376 |         bbox_list = np.array(bbox_list)
377 |         x0 = np.maximum(bbox[0], bbox_list[:, 0])
378 |         y0 = np.maximum(bbox[1], bbox_list[:, 1])
379 |         x1 = np.minimum(bbox[2], bbox_list[:, 2])
380 |         y1 = np.minimum(bbox[3], bbox_list[:, 3])
381 | 
382 |         intersection = np.maximum(0, (x1 - x0) * (y1 - y0))
383 |         area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
384 |         area_list = (bbox_list[:, 2] - bbox_list[:, 0]) * (bbox_list[:, 3] - bbox_list[:, 1])
385 |         iou_list = intersection / (area + area_list - intersection)
386 | 
387 |         return np.max(iou_list) < 0.01
388 | 
389 |     @staticmethod
390 |     def calc_bbox(pose, pose_vis):
391 |         index = pose_vis[:, 0] > 0
392 |         bbox = [np.min(pose[index, 0]), np.min(pose[index, 1]),
393 |                 np.max(pose[index, 0]), np.max(pose[index, 1])]
394 | 
395 |         return np.array(bbox)
396 | 


--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import models.pose_resnet
11 | import models.v2v_net
12 | import models.project_layer
13 | import models.cuboid_proposal_net
14 | import models.pose_regression_net
15 | import models.multi_person_posenet
16 | 
17 | 


--------------------------------------------------------------------------------
/lib/models/cuboid_proposal_net.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | from models.v2v_net import V2VNet
 10 | from models.project_layer import ProjectLayer
 11 | from core.proposal import nms
 12 | 
 13 | 
 14 | class ProposalLayer(nn.Module):
 15 |     def __init__(self, cfg):
 16 |         super(ProposalLayer, self).__init__()
 17 |         self.grid_size = torch.tensor(cfg.MULTI_PERSON.SPACE_SIZE)
 18 |         self.cube_size = torch.tensor(cfg.MULTI_PERSON.INITIAL_CUBE_SIZE)
 19 |         self.grid_center = torch.tensor(cfg.MULTI_PERSON.SPACE_CENTER)
 20 |         self.num_cand = cfg.MULTI_PERSON.MAX_PEOPLE_NUM
 21 |         self.root_id = cfg.DATASET.ROOTIDX
 22 |         self.num_joints = cfg.NETWORK.NUM_JOINTS
 23 |         self.threshold = cfg.MULTI_PERSON.THRESHOLD
 24 | 
 25 |     def filter_proposal(self, topk_index, gt_3d, num_person):
 26 |         batch_size = topk_index.shape[0]
 27 |         cand_num = topk_index.shape[1]
 28 |         cand2gt = torch.zeros(batch_size, cand_num)
 29 | 
 30 |         for i in range(batch_size):
 31 |             cand = topk_index[i].reshape(cand_num, 1, -1)
 32 |             gt = gt_3d[i, :num_person[i]].reshape(1, num_person[i], -1)
 33 | 
 34 |             dist = torch.sqrt(torch.sum((cand - gt)**2, dim=-1))
 35 |             min_dist, min_gt = torch.min(dist, dim=-1)
 36 | 
 37 |             cand2gt[i] = min_gt
 38 |             cand2gt[i][min_dist > 500.0] = -1.0
 39 | 
 40 |         return cand2gt
 41 | 
 42 |     def get_real_loc(self, index):
 43 |         device = index.device
 44 |         cube_size = self.cube_size.to(device=device, dtype=torch.float)
 45 |         grid_size = self.grid_size.to(device=device)
 46 |         grid_center = self.grid_center.to(device=device)
 47 |         loc = index.float() / (cube_size - 1) * grid_size + grid_center - grid_size / 2.0
 48 |         return loc
 49 | 
 50 |     def forward(self, root_cubes, meta):
 51 |         batch_size = root_cubes.shape[0]
 52 | 
 53 |         topk_values, topk_unravel_index = nms(root_cubes.detach(), self.num_cand)
 54 |         topk_unravel_index = self.get_real_loc(topk_unravel_index)
 55 | 
 56 |         grid_centers = torch.zeros(batch_size, self.num_cand, 5, device=root_cubes.device)
 57 |         grid_centers[:, :, 0:3] = topk_unravel_index
 58 |         grid_centers[:, :, 4] = topk_values
 59 | 
 60 |         # match gt to filter those invalid proposals for training/validate PRN
 61 |         if self.training and ('roots_3d' in meta[0] and 'num_person' in meta[0]):
 62 |             gt_3d = meta[0]['roots_3d'].float()
 63 |             num_person = meta[0]['num_person']
 64 |             cand2gt = self.filter_proposal(topk_unravel_index, gt_3d, num_person)
 65 |             grid_centers[:, :, 3] = cand2gt
 66 |         else:
 67 |             grid_centers[:, :, 3] = (topk_values > self.threshold).float() - 1.0  # if ground-truths are not available.
 68 | 
 69 |         # nms
 70 |         # for b in range(batch_size):
 71 |         #     centers = copy.deepcopy(topk_unravel_index[b, :, :3])
 72 |         #     scores = copy.deepcopy(topk_values[b])
 73 |         #     keep = []
 74 |         #     keep_s = []
 75 |         #     while len(centers):
 76 |         #         keep.append(centers[0])
 77 |         #         keep_s.append(scores[0])
 78 |         #         dist = torch.sqrt(torch.sum((centers[0] - centers)**2, dim=-1))
 79 |         #         index = (dist > 500.0) & (scores > 0.1)
 80 |         #         centers = centers[index]
 81 |         #         scores = scores[index]
 82 |         #     grid_centers[b, :len(keep), :3] = torch.stack(keep, dim=0)
 83 |         #     grid_centers[b, :len(keep), 3] = 0.0
 84 |         #     grid_centers[b, :len(keep), 4] = torch.stack(keep_s, dim=0)
 85 | 
 86 |         return grid_centers
 87 | 
 88 | 
 89 | class CuboidProposalNet(nn.Module):
 90 |     def __init__(self, cfg):
 91 |         super(CuboidProposalNet, self).__init__()
 92 |         self.grid_size = cfg.MULTI_PERSON.SPACE_SIZE
 93 |         self.cube_size = cfg.MULTI_PERSON.INITIAL_CUBE_SIZE
 94 |         self.grid_center = cfg.MULTI_PERSON.SPACE_CENTER
 95 | 
 96 |         self.project_layer = ProjectLayer(cfg)
 97 |         self.v2v_net = V2VNet(cfg.NETWORK.NUM_JOINTS, 1)
 98 |         self.proposal_layer = ProposalLayer(cfg)
 99 | 
100 |     def forward(self, all_heatmaps, meta):
101 | 
102 |         initial_cubes, grids = self.project_layer(all_heatmaps, meta,
103 |                                                   self.grid_size, [self.grid_center], self.cube_size)
104 |         root_cubes = self.v2v_net(initial_cubes)
105 |         root_cubes = root_cubes.squeeze(1)
106 |         grid_centers = self.proposal_layer(root_cubes, meta)
107 | 
108 |         return root_cubes, grid_centers


--------------------------------------------------------------------------------
/lib/models/multi_person_posenet.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | 
 13 | from models import pose_resnet
 14 | from models.cuboid_proposal_net import CuboidProposalNet
 15 | from models.pose_regression_net import PoseRegressionNet
 16 | from core.loss import PerJointMSELoss
 17 | from core.loss import PerJointL1Loss
 18 | 
 19 | 
 20 | class MultiPersonPoseNet(nn.Module):
 21 |     def __init__(self, backbone, cfg):
 22 |         super(MultiPersonPoseNet, self).__init__()
 23 |         self.num_cand = cfg.MULTI_PERSON.MAX_PEOPLE_NUM
 24 |         self.num_joints = cfg.NETWORK.NUM_JOINTS
 25 | 
 26 |         self.backbone = backbone
 27 |         self.root_net = CuboidProposalNet(cfg)
 28 |         self.pose_net = PoseRegressionNet(cfg)
 29 | 
 30 |         self.USE_GT = cfg.NETWORK.USE_GT
 31 |         self.root_id = cfg.DATASET.ROOTIDX
 32 |         self.dataset_name = cfg.DATASET.TEST_DATASET
 33 | 
 34 |     def forward(self, views=None, meta=None, targets_2d=None, weights_2d=None, targets_3d=None, input_heatmaps=None):
 35 |         if views is not None:
 36 |             all_heatmaps = []
 37 |             for view in views:
 38 |                 heatmaps = self.backbone(view)
 39 |                 all_heatmaps.append(heatmaps)
 40 |         else:
 41 |             all_heatmaps = input_heatmaps
 42 | 
 43 |         # all_heatmaps = targets_2d
 44 |         device = all_heatmaps[0].device
 45 |         batch_size = all_heatmaps[0].shape[0]
 46 | 
 47 |         # calculate 2D heatmap loss
 48 |         criterion = PerJointMSELoss().cuda()
 49 |         loss_2d = criterion(torch.zeros(1, device=device), torch.zeros(1, device=device))
 50 |         if targets_2d is not None:
 51 |             for t, w, o in zip(targets_2d, weights_2d, all_heatmaps):
 52 |                 loss_2d += criterion(o, t, True, w)
 53 |             loss_2d /= len(all_heatmaps)
 54 | 
 55 |         loss_3d = criterion(torch.zeros(1, device=device), torch.zeros(1, device=device))
 56 |         if self.USE_GT:
 57 |             num_person = meta[0]['num_person']
 58 |             grid_centers = torch.zeros(batch_size, self.num_cand, 5, device=device)
 59 |             grid_centers[:, :, 0:3] = meta[0]['roots_3d'].float()
 60 |             grid_centers[:, :, 3] = -1.0
 61 |             for i in range(batch_size):
 62 |                 grid_centers[i, :num_person[i], 3] = torch.tensor(range(num_person[i]), device=device)
 63 |                 grid_centers[i, :num_person[i], 4] = 1.0
 64 |         else:
 65 |             root_cubes, grid_centers = self.root_net(all_heatmaps, meta)
 66 | 
 67 |             # calculate 3D heatmap loss
 68 |             if targets_3d is not None:
 69 |                 loss_3d = criterion(root_cubes, targets_3d)
 70 |             del root_cubes
 71 | 
 72 |         pred = torch.zeros(batch_size, self.num_cand, self.num_joints, 5, device=device)
 73 |         pred[:, :, :, 3:] = grid_centers[:, :, 3:].reshape(batch_size, -1, 1, 2)  # matched gt
 74 | 
 75 |         loss_cord = criterion(torch.zeros(1, device=device), torch.zeros(1, device=device))
 76 |         criterion_cord = PerJointL1Loss().cuda()
 77 |         count = 0
 78 | 
 79 |         for n in range(self.num_cand):
 80 |             index = (pred[:, n, 0, 3] >= 0)
 81 |             if torch.sum(index) > 0:
 82 |                 single_pose = self.pose_net(all_heatmaps, meta, grid_centers[:, n])
 83 |                 pred[:, n, :, 0:3] = single_pose.detach()
 84 | 
 85 |                 # calculate 3D pose loss
 86 |                 if self.training and 'joints_3d' in meta[0] and 'joints_3d_vis' in meta[0]:
 87 |                     gt_3d = meta[0]['joints_3d'].float()
 88 |                     for i in range(batch_size):
 89 |                         if pred[i, n, 0, 3] >= 0:
 90 |                             targets = gt_3d[i:i + 1, pred[i, n, 0, 3].long()]
 91 |                             weights_3d = meta[0]['joints_3d_vis'][i:i + 1, pred[i, n, 0, 3].long(), :, 0:1].float()
 92 |                             count += 1
 93 |                             loss_cord = (loss_cord * (count - 1) +
 94 |                                          criterion_cord(single_pose[i:i + 1], targets, True, weights_3d)) / count
 95 |                 del single_pose
 96 | 
 97 |         return pred, all_heatmaps, grid_centers, loss_2d, loss_3d, loss_cord
 98 | 
 99 | 
100 | def get_multi_person_pose_net(cfg, is_train=True):
101 |     if cfg.BACKBONE_MODEL:
102 |         backbone = eval(cfg.BACKBONE_MODEL + '.get_pose_net')(cfg, is_train=is_train)
103 |     else:
104 |         backbone = None
105 |     model = MultiPersonPoseNet(backbone, cfg)
106 |     return model
107 | 


--------------------------------------------------------------------------------
/lib/models/pose_regression_net.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | from models.v2v_net import V2VNet
11 | from models.project_layer import ProjectLayer
12 | 
13 | 
14 | class SoftArgmaxLayer(nn.Module):
15 |     def __init__(self, cfg):
16 |         super(SoftArgmaxLayer, self).__init__()
17 |         self.beta = cfg.NETWORK.BETA
18 | 
19 |     def forward(self, x, grids):
20 |         batch_size = x.size(0)
21 |         channel = x.size(1)
22 |         x = x.reshape(batch_size, channel, -1, 1)
23 |         # x = F.softmax(x, dim=2)
24 |         x = F.softmax(self.beta * x, dim=2)
25 |         grids = grids.unsqueeze(1)
26 |         x = torch.mul(x, grids)
27 |         x = torch.sum(x, dim=2)
28 |         return x
29 | 
30 | 
31 | class PoseRegressionNet(nn.Module):
32 |     def __init__(self, cfg):
33 |         super(PoseRegressionNet, self).__init__()
34 |         self.grid_size = cfg.PICT_STRUCT.GRID_SIZE
35 |         self.cube_size = cfg.PICT_STRUCT.CUBE_SIZE
36 | 
37 |         self.project_layer = ProjectLayer(cfg)
38 |         self.v2v_net = V2VNet(cfg.NETWORK.NUM_JOINTS, cfg.NETWORK.NUM_JOINTS)
39 |         self.soft_argmax_layer = SoftArgmaxLayer(cfg)
40 | 
41 |     def forward(self, all_heatmaps, meta, grid_centers):
42 |         batch_size = all_heatmaps[0].shape[0]
43 |         num_joints = all_heatmaps[0].shape[1]
44 |         device = all_heatmaps[0].device
45 |         pred = torch.zeros(batch_size, num_joints, 3, device=device)
46 |         cubes, grids = self.project_layer(all_heatmaps, meta,
47 |                                           self.grid_size, grid_centers, self.cube_size)
48 | 
49 |         index = grid_centers[:, 3] >= 0
50 |         valid_cubes = self.v2v_net(cubes[index])
51 |         pred[index] = self.soft_argmax_layer(valid_cubes, grids[index])
52 | 
53 |         return pred
54 | 


--------------------------------------------------------------------------------
/lib/models/pose_resnet.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import os
 11 | import logging
 12 | 
 13 | import torch
 14 | import torch.nn as nn
 15 | 
 16 | 
 17 | BN_MOMENTUM = 0.1
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | def conv3x3(in_planes, out_planes, stride=1):
 22 |     """3x3 convolution with padding"""
 23 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 24 |                      padding=1, bias=False)
 25 | 
 26 | 
 27 | class BasicBlock(nn.Module):
 28 |     expansion = 1
 29 | 
 30 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 31 |         super(BasicBlock, self).__init__()
 32 |         self.conv1 = conv3x3(inplanes, planes, stride)
 33 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 34 |         self.relu = nn.ReLU(inplace=True)
 35 |         self.conv2 = conv3x3(planes, planes)
 36 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 37 |         self.downsample = downsample
 38 |         self.stride = stride
 39 | 
 40 |     def forward(self, x):
 41 |         residual = x
 42 | 
 43 |         out = self.conv1(x)
 44 |         out = self.bn1(out)
 45 |         out = self.relu(out)
 46 | 
 47 |         out = self.conv2(out)
 48 |         out = self.bn2(out)
 49 | 
 50 |         if self.downsample is not None:
 51 |             residual = self.downsample(x)
 52 | 
 53 |         out += residual
 54 |         out = self.relu(out)
 55 | 
 56 |         return out
 57 | 
 58 | 
 59 | class Bottleneck(nn.Module):
 60 |     expansion = 4
 61 | 
 62 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 63 |         super(Bottleneck, self).__init__()
 64 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 65 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 66 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 67 |                                padding=1, bias=False)
 68 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 69 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
 70 |                                bias=False)
 71 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion,
 72 |                                   momentum=BN_MOMENTUM)
 73 |         self.relu = nn.ReLU(inplace=True)
 74 |         self.downsample = downsample
 75 |         self.stride = stride
 76 | 
 77 |     def forward(self, x):
 78 |         residual = x
 79 | 
 80 |         out = self.conv1(x)
 81 |         out = self.bn1(out)
 82 |         out = self.relu(out)
 83 | 
 84 |         out = self.conv2(out)
 85 |         out = self.bn2(out)
 86 |         out = self.relu(out)
 87 | 
 88 |         out = self.conv3(out)
 89 |         out = self.bn3(out)
 90 | 
 91 |         if self.downsample is not None:
 92 |             residual = self.downsample(x)
 93 | 
 94 |         out += residual
 95 |         out = self.relu(out)
 96 | 
 97 |         return out
 98 | 
 99 | 
100 | class PoseResNet(nn.Module):
101 | 
102 |     def __init__(self, block, layers, cfg, **kwargs):
103 |         self.inplanes = 64
104 |         self.deconv_with_bias = cfg.POSE_RESNET.DECONV_WITH_BIAS
105 | 
106 |         super(PoseResNet, self).__init__()
107 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
108 |                                bias=False)
109 |         self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
110 |         self.relu = nn.ReLU(inplace=True)
111 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
112 |         self.layer1 = self._make_layer(block, 64, layers[0])
113 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
114 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
115 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
116 | 
117 |         # used for deconv layers
118 |         self.deconv_layers = self._make_deconv_layer(
119 |             cfg.POSE_RESNET.NUM_DECONV_LAYERS,
120 |             cfg.POSE_RESNET.NUM_DECONV_FILTERS,
121 |             cfg.POSE_RESNET.NUM_DECONV_KERNELS,
122 |         )
123 | 
124 |         self.final_layer = nn.Conv2d(
125 |             in_channels=cfg.POSE_RESNET.NUM_DECONV_FILTERS[-1],
126 |             out_channels=cfg.NETWORK.NUM_JOINTS,
127 |             kernel_size=cfg.POSE_RESNET.FINAL_CONV_KERNEL,
128 |             stride=1,
129 |             padding=1 if cfg.POSE_RESNET.FINAL_CONV_KERNEL == 3 else 0
130 |         )
131 | 
132 |     def _make_layer(self, block, planes, blocks, stride=1):
133 |         downsample = None
134 |         if stride != 1 or self.inplanes != planes * block.expansion:
135 |             downsample = nn.Sequential(
136 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
137 |                           kernel_size=1, stride=stride, bias=False),
138 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
139 |             )
140 | 
141 |         layers = []
142 |         layers.append(block(self.inplanes, planes, stride, downsample))
143 |         self.inplanes = planes * block.expansion
144 |         for i in range(1, blocks):
145 |             layers.append(block(self.inplanes, planes))
146 | 
147 |         return nn.Sequential(*layers)
148 | 
149 |     def _get_deconv_cfg(self, deconv_kernel, index):
150 |         if deconv_kernel == 4:
151 |             padding = 1
152 |             output_padding = 0
153 |         elif deconv_kernel == 3:
154 |             padding = 1
155 |             output_padding = 1
156 |         elif deconv_kernel == 2:
157 |             padding = 0
158 |             output_padding = 0
159 | 
160 |         return deconv_kernel, padding, output_padding
161 | 
162 |     def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
163 |         assert num_layers == len(num_filters), \
164 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
165 |         assert num_layers == len(num_kernels), \
166 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
167 | 
168 |         layers = []
169 |         for i in range(num_layers):
170 |             kernel, padding, output_padding = \
171 |                 self._get_deconv_cfg(num_kernels[i], i)
172 | 
173 |             planes = num_filters[i]
174 |             layers.append(
175 |                 nn.ConvTranspose2d(
176 |                     in_channels=self.inplanes,
177 |                     out_channels=planes,
178 |                     kernel_size=kernel,
179 |                     stride=2,
180 |                     padding=padding,
181 |                     output_padding=output_padding,
182 |                     bias=self.deconv_with_bias))
183 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
184 |             layers.append(nn.ReLU(inplace=True))
185 |             self.inplanes = planes
186 | 
187 |         return nn.Sequential(*layers)
188 | 
189 |     def forward(self, x):
190 |         x = self.conv1(x)
191 |         x = self.bn1(x)
192 |         x = self.relu(x)
193 |         x = self.maxpool(x)
194 | 
195 |         x = self.layer1(x)
196 |         x = self.layer2(x)
197 |         x = self.layer3(x)
198 |         x = self.layer4(x)
199 | 
200 |         x = self.deconv_layers(x)
201 |         x = self.final_layer(x)
202 | 
203 |         return x
204 | 
205 |     def init_weights(self, pretrained=''):
206 |         this_dir = os.path.dirname(__file__)
207 |         pretrained = os.path.join(this_dir, '../..', pretrained)
208 |         if os.path.isfile(pretrained):
209 |             pretrained_state_dict = torch.load(pretrained)
210 |             logger.info('=> loading pretrained models {}'.format(pretrained))
211 | 
212 |             model_state_dict = self.state_dict()
213 |             for k, v in pretrained_state_dict.items():
214 |                 if "final_layer" in k:
215 |                     pretrained_state_dict[k] = torch.zeros_like(model_state_dict[k])
216 |             self.load_state_dict(pretrained_state_dict, strict=False)
217 | 
218 |             logger.info('=> init deconv weights from normal distribution')
219 |             for name, m in self.deconv_layers.named_modules():
220 |                 if isinstance(m, nn.ConvTranspose2d):
221 |                     logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
222 |                     logger.info('=> init {}.bias as 0'.format(name))
223 |                     nn.init.normal_(m.weight, std=0.001)
224 |                     if self.deconv_with_bias:
225 |                         nn.init.constant_(m.bias, 0)
226 |                 elif isinstance(m, nn.BatchNorm2d):
227 |                     logger.info('=> init {}.weight as 1'.format(name))
228 |                     logger.info('=> init {}.bias as 0'.format(name))
229 |                     nn.init.constant_(m.weight, 1)
230 |                     nn.init.constant_(m.bias, 0)
231 |             logger.info('=> init final conv weights from normal distribution')
232 |             for m in self.final_layer.modules():
233 |                 if isinstance(m, nn.Conv2d):
234 |                     # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
235 |                     logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
236 |                     logger.info('=> init {}.bias as 0'.format(name))
237 |                     nn.init.normal_(m.weight, std=0.001)
238 |                     nn.init.constant_(m.bias, 0)
239 |         else:
240 |             logger.info('=> init weights from normal distribution')
241 |             for m in self.modules():
242 |                 if isinstance(m, nn.Conv2d):
243 |                     # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
244 |                     nn.init.normal_(m.weight, std=0.001)
245 |                     # nn.init.constant_(m.bias, 0)
246 |                 elif isinstance(m, nn.BatchNorm2d):
247 |                     nn.init.constant_(m.weight, 1)
248 |                     nn.init.constant_(m.bias, 0)
249 |                 elif isinstance(m, nn.ConvTranspose2d):
250 |                     nn.init.normal_(m.weight, std=0.001)
251 |                     if self.deconv_with_bias:
252 |                         nn.init.constant_(m.bias, 0)
253 | 
254 | 
255 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
256 |                34: (BasicBlock, [3, 4, 6, 3]),
257 |                50: (Bottleneck, [3, 4, 6, 3]),
258 |                101: (Bottleneck, [3, 4, 23, 3]),
259 |                152: (Bottleneck, [3, 8, 36, 3])}
260 | 
261 | 
262 | def get_pose_net(cfg, is_train, **kwargs):
263 |     num_layers = cfg.POSE_RESNET.NUM_LAYERS
264 | 
265 |     block_class, layers = resnet_spec[num_layers]
266 | 
267 |     model = PoseResNet(block_class, layers, cfg, **kwargs)
268 | 
269 |     if is_train:
270 |         model.init_weights(cfg.NETWORK.PRETRAINED)
271 | 
272 |     return model
273 | 


--------------------------------------------------------------------------------
/lib/models/project_layer.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | import utils.cameras as cameras
 11 | from utils.transforms import get_affine_transform as get_transform
 12 | from utils.transforms import affine_transform_pts_cuda as do_transform
 13 | 
 14 | 
 15 | class ProjectLayer(nn.Module):
 16 |     def __init__(self, cfg):
 17 |         super(ProjectLayer, self).__init__()
 18 | 
 19 |         self.img_size = cfg.NETWORK.IMAGE_SIZE
 20 |         self.heatmap_size = cfg.NETWORK.HEATMAP_SIZE
 21 |         self.grid_size = cfg.MULTI_PERSON.SPACE_SIZE
 22 |         self.cube_size = cfg.MULTI_PERSON.INITIAL_CUBE_SIZE
 23 |         self.grid_center = cfg.MULTI_PERSON.SPACE_CENTER
 24 | 
 25 |     def compute_grid(self, boxSize, boxCenter, nBins, device=None):
 26 |         if isinstance(boxSize, int) or isinstance(boxSize, float):
 27 |             boxSize = [boxSize, boxSize, boxSize]
 28 |         if isinstance(nBins, int):
 29 |             nBins = [nBins, nBins, nBins]
 30 | 
 31 |         grid1Dx = torch.linspace(-boxSize[0] / 2, boxSize[0] / 2, nBins[0], device=device)
 32 |         grid1Dy = torch.linspace(-boxSize[1] / 2, boxSize[1] / 2, nBins[1], device=device)
 33 |         grid1Dz = torch.linspace(-boxSize[2] / 2, boxSize[2] / 2, nBins[2], device=device)
 34 |         gridx, gridy, gridz = torch.meshgrid(
 35 |             grid1Dx + boxCenter[0],
 36 |             grid1Dy + boxCenter[1],
 37 |             grid1Dz + boxCenter[2],
 38 |         )
 39 |         gridx = gridx.contiguous().view(-1, 1)
 40 |         gridy = gridy.contiguous().view(-1, 1)
 41 |         gridz = gridz.contiguous().view(-1, 1)
 42 |         grid = torch.cat([gridx, gridy, gridz], dim=1)
 43 |         return grid
 44 | 
 45 |     def get_voxel(self, heatmaps, meta, grid_size, grid_center, cube_size):
 46 |         device = heatmaps[0].device
 47 |         batch_size = heatmaps[0].shape[0]
 48 |         num_joints = heatmaps[0].shape[1]
 49 |         nbins = cube_size[0] * cube_size[1] * cube_size[2]
 50 |         n = len(heatmaps)
 51 |         cubes = torch.zeros(batch_size, num_joints, 1, nbins, n, device=device)
 52 |         # h, w = heatmaps[0].shape[2], heatmaps[0].shape[3]
 53 |         w, h = self.heatmap_size
 54 |         grids = torch.zeros(batch_size, nbins, 3, device=device)
 55 |         bounding = torch.zeros(batch_size, 1, 1, nbins, n, device=device)
 56 |         for i in range(batch_size):
 57 |             if len(grid_center[0]) == 3 or grid_center[i][3] >= 0:
 58 |                 # This part of the code can be optimized because the projection operation is time-consuming.
 59 |                 # If the camera locations always keep the same, the grids and sample_grids are repeated across frames
 60 |                 # and can be computed only one time.
 61 |                 if len(grid_center) == 1:
 62 |                     grid = self.compute_grid(grid_size, grid_center[0], cube_size, device=device)
 63 |                 else:
 64 |                     grid = self.compute_grid(grid_size, grid_center[i], cube_size, device=device)
 65 |                 grids[i:i + 1] = grid
 66 |                 for c in range(n):
 67 |                     center = meta[c]['center'][i]
 68 |                     scale = meta[c]['scale'][i]
 69 | 
 70 |                     width, height = center * 2
 71 |                     trans = torch.as_tensor(
 72 |                         get_transform(center, scale, 0, self.img_size),
 73 |                         dtype=torch.float,
 74 |                         device=device)
 75 |                     cam = {}
 76 |                     for k, v in meta[c]['camera'].items():
 77 |                         cam[k] = v[i]
 78 |                     xy = cameras.project_pose(grid, cam)
 79 | 
 80 |                     bounding[i, 0, 0, :, c] = (xy[:, 0] >= 0) & (xy[:, 1] >= 0) & (xy[:, 0] < width) & (
 81 |                                 xy[:, 1] < height)
 82 |                     xy = torch.clamp(xy, -1.0, max(width, height))
 83 |                     xy = do_transform(xy, trans)
 84 |                     xy = xy * torch.tensor(
 85 |                         [w, h], dtype=torch.float, device=device) / torch.tensor(
 86 |                         self.img_size, dtype=torch.float, device=device)
 87 |                     sample_grid = xy / torch.tensor(
 88 |                         [w - 1, h - 1], dtype=torch.float,
 89 |                         device=device) * 2.0 - 1.0
 90 |                     sample_grid = torch.clamp(sample_grid.view(1, 1, nbins, 2), -1.1, 1.1)
 91 | 
 92 |                     # if pytorch version < 1.3.0, align_corners=True should be omitted.
 93 |                     cubes[i:i + 1, :, :, :, c] += F.grid_sample(heatmaps[c][i:i + 1, :, :, :], sample_grid, align_corners=True)
 94 | 
 95 |         # cubes = cubes.mean(dim=-1)
 96 |         cubes = torch.sum(torch.mul(cubes, bounding), dim=-1) / (torch.sum(bounding, dim=-1) + 1e-6)
 97 |         cubes[cubes != cubes] = 0.0
 98 |         cubes = cubes.clamp(0.0, 1.0)
 99 | 
100 |         cubes = cubes.view(batch_size, num_joints, cube_size[0], cube_size[1], cube_size[2])  ##
101 |         return cubes, grids
102 | 
103 |     def forward(self, heatmaps, meta, grid_size, grid_center, cube_size):
104 |         cubes, grids = self.get_voxel(heatmaps, meta, grid_size, grid_center, cube_size)
105 |         return cubes, grids


--------------------------------------------------------------------------------
/lib/models/v2v_net.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class Basic3DBlock(nn.Module):
 11 |     def __init__(self, in_planes, out_planes, kernel_size):
 12 |         super(Basic3DBlock, self).__init__()
 13 |         self.block = nn.Sequential(
 14 |             nn.Conv3d(in_planes, out_planes, kernel_size=kernel_size, stride=1, padding=((kernel_size-1)//2)),
 15 |             nn.BatchNorm3d(out_planes),
 16 |             nn.ReLU(True)
 17 |         )
 18 |     
 19 |     def forward(self, x):
 20 |         return self.block(x)
 21 | 
 22 | 
 23 | class Res3DBlock(nn.Module):
 24 |     def __init__(self, in_planes, out_planes):
 25 |         super(Res3DBlock, self).__init__()
 26 |         self.res_branch = nn.Sequential(
 27 |             nn.Conv3d(in_planes, out_planes, kernel_size=3, stride=1, padding=1),
 28 |             nn.BatchNorm3d(out_planes),
 29 |             nn.ReLU(True),
 30 |             nn.Conv3d(out_planes, out_planes, kernel_size=3, stride=1, padding=1),
 31 |             nn.BatchNorm3d(out_planes)
 32 |         )
 33 | 
 34 |         if in_planes == out_planes:
 35 |             self.skip_con = nn.Sequential()
 36 |         else:
 37 |             self.skip_con = nn.Sequential(
 38 |                 nn.Conv3d(in_planes, out_planes, kernel_size=1, stride=1, padding=0),
 39 |                 nn.BatchNorm3d(out_planes)
 40 |             )
 41 |     
 42 |     def forward(self, x):
 43 |         res = self.res_branch(x)
 44 |         skip = self.skip_con(x)
 45 |         return F.relu(res + skip, True)
 46 | 
 47 |     
 48 | class Pool3DBlock(nn.Module):
 49 |     def __init__(self, pool_size):
 50 |         super(Pool3DBlock, self).__init__()
 51 |         self.pool_size = pool_size
 52 |     
 53 |     def forward(self, x):
 54 |         return F.max_pool3d(x, kernel_size=self.pool_size, stride=self.pool_size)
 55 |     
 56 | 
 57 | class Upsample3DBlock(nn.Module):
 58 |     def __init__(self, in_planes, out_planes, kernel_size, stride):
 59 |         super(Upsample3DBlock, self).__init__()
 60 |         assert(kernel_size == 2)
 61 |         assert(stride == 2)
 62 |         self.block = nn.Sequential(
 63 |             nn.ConvTranspose3d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=0, output_padding=0),
 64 |             nn.BatchNorm3d(out_planes),
 65 |             nn.ReLU(True)
 66 |         )
 67 | 
 68 |     def forward(self, x):
 69 |         return self.block(x)
 70 |     
 71 | 
 72 | class EncoderDecorder(nn.Module):
 73 |     def __init__(self):
 74 |         super(EncoderDecorder, self).__init__()
 75 | 
 76 |         self.encoder_pool1 = Pool3DBlock(2)
 77 |         self.encoder_res1 = Res3DBlock(32, 64)
 78 |         self.encoder_pool2 = Pool3DBlock(2)
 79 |         self.encoder_res2 = Res3DBlock(64, 128)
 80 | 
 81 |         self.mid_res = Res3DBlock(128, 128)
 82 | 
 83 |         self.decoder_res2 = Res3DBlock(128, 128)
 84 |         self.decoder_upsample2 = Upsample3DBlock(128, 64, 2, 2)
 85 |         self.decoder_res1 = Res3DBlock(64, 64)
 86 |         self.decoder_upsample1 = Upsample3DBlock(64, 32, 2, 2)
 87 | 
 88 |         self.skip_res1 = Res3DBlock(32, 32)
 89 |         self.skip_res2 = Res3DBlock(64, 64)
 90 | 
 91 |     def forward(self, x):
 92 |         skip_x1 = self.skip_res1(x)
 93 |         x = self.encoder_pool1(x)
 94 |         x = self.encoder_res1(x)
 95 | 
 96 |         skip_x2 = self.skip_res2(x)
 97 |         x = self.encoder_pool2(x)
 98 |         x = self.encoder_res2(x)
 99 | 
100 |         x = self.mid_res(x)
101 | 
102 |         x = self.decoder_res2(x)
103 |         x = self.decoder_upsample2(x)
104 |         x = x + skip_x2
105 | 
106 |         x = self.decoder_res1(x)
107 |         x = self.decoder_upsample1(x)
108 |         x = x + skip_x1
109 | 
110 |         return x
111 | 
112 | 
113 | class V2VNet(nn.Module):
114 |     def __init__(self, input_channels, output_channels):
115 |         super(V2VNet, self).__init__()
116 | 
117 |         self.front_layers = nn.Sequential(
118 |             Basic3DBlock(input_channels, 16, 7),
119 |             Res3DBlock(16, 32),
120 |         )
121 | 
122 |         self.encoder_decoder = EncoderDecorder()
123 | 
124 |         self.output_layer = nn.Conv3d(32, output_channels, kernel_size=1, stride=1, padding=0)
125 | 
126 |         self._initialize_weights()
127 | 
128 |     def forward(self, x):
129 |         x = self.front_layers(x)
130 |         x = self.encoder_decoder(x)
131 |         x = self.output_layer(x)
132 | 
133 |         return x
134 | 
135 |     def _initialize_weights(self):
136 |         for m in self.modules():
137 |             if isinstance(m, nn.Conv3d):
138 |                 # nn.init.xavier_normal_(m.weight)
139 |                 nn.init.normal_(m.weight, 0, 0.001)
140 |                 nn.init.constant_(m.bias, 0)
141 |             elif isinstance(m, nn.ConvTranspose3d):
142 |                 # nn.init.xavier_normal_(m.weight)
143 |                 nn.init.normal_(m.weight, 0, 0.001)
144 |                 nn.init.constant_(m.bias, 0)
145 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/voxelpose-pytorch/9ef5d407a597c9647b2c8f6c0a246b725a87a054/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/cameras.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | from __future__ import division
 7 | import torch
 8 | import numpy as np
 9 | 
10 | 
11 | def unfold_camera_param(camera, device=None):
12 |     R = torch.as_tensor(camera['R'], dtype=torch.float, device=device)
13 |     T = torch.as_tensor(camera['T'], dtype=torch.float, device=device)
14 |     fx = torch.as_tensor(camera['fx'], dtype=torch.float, device=device)
15 |     fy = torch.as_tensor(camera['fy'], dtype=torch.float, device=device)
16 |     f = torch.tensor([fx, fy], dtype=torch.float, device=device).reshape(2, 1)
17 |     c = torch.as_tensor(
18 |         [[camera['cx']], [camera['cy']]],
19 |         dtype=torch.float,
20 |         device=device)
21 |     k = torch.as_tensor(camera['k'], dtype=torch.float, device=device)
22 |     p = torch.as_tensor(camera['p'], dtype=torch.float, device=device)
23 |     return R, T, f, c, k, p
24 | 
25 | 
26 | def project_point_radial(x, R, T, f, c, k, p):
27 |     """
28 |     Args
29 |         x: Nx3 points in world coordinates
30 |         R: 3x3 Camera rotation matrix
31 |         T: 3x1 Camera translation parameters
32 |         f: (scalar) Camera focal length
33 |         c: 2x1 Camera center
34 |         k: 3x1 Camera radial distortion coefficients
35 |         p: 2x1 Camera tangential distortion coefficients
36 |     Returns
37 |         ypixel.T: Nx2 points in pixel space
38 |     """
39 |     n = x.shape[0]
40 |     xcam = torch.mm(R, torch.t(x) - T)
41 |     y = xcam[:2] / (xcam[2] + 1e-5)
42 | 
43 |     kexp = k.repeat((1, n))
44 |     r2 = torch.sum(y**2, 0, keepdim=True)
45 |     r2exp = torch.cat([r2, r2**2, r2**3], 0)
46 |     radial = 1 + torch.einsum('ij,ij->j', kexp, r2exp)
47 | 
48 |     tan = p[0] * y[1] + p[1] * y[0]
49 |     corr = (radial + 2 * tan).repeat((2, 1))
50 | 
51 |     y = y * corr + torch.ger(torch.cat([p[1], p[0]]).view(-1), r2.view(-1))
52 |     ypixel = (f * y) + c
53 |     return torch.t(ypixel)
54 | 
55 | 
56 | def project_pose(x, camera):
57 |     R, T, f, c, k, p = unfold_camera_param(camera, device=x.device)
58 |     return project_point_radial(x, R, T, f, c, k, p)
59 | 
60 | 
61 | def world_to_camera_frame(x, R, T):
62 |     """
63 |     Args
64 |         x: Nx3 3d points in world coordinates
65 |         R: 3x3 Camera rotation matrix
66 |         T: 3x1 Camera translation parameters
67 |     Returns
68 |         xcam: Nx3 3d points in camera coordinates
69 |     """
70 | 
71 |     R = torch.as_tensor(R, device=x.device)
72 |     T = torch.as_tensor(T, device=x.device)
73 |     xcam = torch.mm(R, torch.t(x) - T)
74 |     return torch.t(xcam)
75 | 
76 | 
77 | def camera_to_world_frame(x, R, T):
78 |     """
79 |     Args
80 |         x: Nx3 points in camera coordinates
81 |         R: 3x3 Camera rotation matrix
82 |         T: 3x1 Camera translation parameters
83 |     Returns
84 |         xcam: Nx3 points in world coordinates
85 |     """
86 | 
87 |     R = torch.as_tensor(R, device=x.device)
88 |     T = torch.as_tensor(T, device=x.device)
89 |     xcam = torch.mm(torch.t(R), torch.t(x))
90 |     xcam = xcam + T  # rotate and translate
91 |     return torch.t(xcam)
92 | 


--------------------------------------------------------------------------------
/lib/utils/cameras_cpu.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | from __future__ import division
 7 | import numpy as np
 8 | 
 9 | 
10 | def unfold_camera_param(camera):
11 |     R = camera['R']
12 |     T = camera['T']
13 |     fx = camera['fx']
14 |     fy = camera['fy']
15 |     # f = 0.5 * (camera['fx'] + camera['fy'])
16 |     f = np.array([[fx], [fy]]).reshape(-1, 1)
17 |     c = np.array([[camera['cx']], [camera['cy']]]).reshape(-1, 1)
18 |     k = camera['k']
19 |     p = camera['p']
20 |     return R, T, f, c, k, p
21 | 
22 | 
23 | def project_point_radial(x, R, T, f, c, k, p):
24 |     """
25 |     Args
26 |         x: Nx3 points in world coordinates
27 |         R: 3x3 Camera rotation matrix
28 |         T: 3x1 Camera translation parameters
29 |         f: (scalar) Camera focal length
30 |         c: 2x1 Camera center
31 |         k: 3x1 Camera radial distortion coefficients
32 |         p: 2x1 Camera tangential distortion coefficients
33 |     Returns
34 |         ypixel.T: Nx2 points in pixel space
35 |     """
36 |     n = x.shape[0]
37 |     xcam = R.dot(x.T - T)
38 |     y = xcam[:2] / (xcam[2]+1e-5)
39 |     # print(xcam[2])
40 | 
41 |     r2 = np.sum(y**2, axis=0)
42 |     radial = 1 + np.einsum('ij,ij->j', np.tile(k, (1, n)),
43 |                            np.array([r2, r2**2, r2**3]))
44 |     tan = p[0] * y[1] + p[1] * y[0]
45 |     y = y * np.tile(radial + 2 * tan,
46 |                     (2, 1)) + np.outer(np.array([p[1], p[0]]).reshape(-1), r2)
47 |     ypixel = np.multiply(f, y) + c
48 |     return ypixel.T
49 | 
50 | 
51 | def project_pose(x, camera):
52 |     R, T, f, c, k, p = unfold_camera_param(camera)
53 |     return project_point_radial(x, R, T, f, c, k, p)
54 | 
55 | 
56 | def world_to_camera_frame(x, R, T):
57 |     """
58 |     Args
59 |         x: Nx3 3d points in world coordinates
60 |         R: 3x3 Camera rotation matrix
61 |         T: 3x1 Camera translation parameters
62 |     Returns
63 |         xcam: Nx3 3d points in camera coordinates
64 |     """
65 | 
66 |     xcam = R.dot(x.T - T)  # rotate and translate
67 |     return xcam.T
68 | 
69 | 
70 | def camera_to_world_frame(x, R, T):
71 |     """
72 |     Args
73 |         x: Nx3 points in camera coordinates
74 |         R: 3x3 Camera rotation matrix
75 |         T: 3x1 Camera translation parameters
76 |     Returns
77 |         xcam: Nx3 points in world coordinates
78 |     """
79 | 
80 |     xcam = R.T.dot(x.T) + T  # rotate and translate
81 |     return xcam.T
82 | 


--------------------------------------------------------------------------------
/lib/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import numpy as np
 11 | import cv2
 12 | 
 13 | import torch
 14 | 
 15 | 
 16 | def flip_back(output_flipped, matched_parts):
 17 |     '''
 18 |     ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
 19 |     '''
 20 |     assert output_flipped.ndim == 4,\
 21 |         'output_flipped should be [batch_size, num_joints, height, width]'
 22 | 
 23 |     output_flipped = output_flipped[:, :, :, ::-1]
 24 | 
 25 |     for pair in matched_parts:
 26 |         tmp = output_flipped[:, pair[0], :, :].copy()
 27 |         output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
 28 |         output_flipped[:, pair[1], :, :] = tmp
 29 | 
 30 |     return output_flipped
 31 | 
 32 | 
 33 | def fliplr_joints(joints, joints_vis, width, matched_parts):
 34 |     """
 35 |     flip coords
 36 |     """
 37 |     # Flip horizontal
 38 |     joints[:, 0] = width - joints[:, 0] - 1
 39 | 
 40 |     # Change left-right parts
 41 |     for pair in matched_parts:
 42 |         joints[pair[0], :], joints[pair[1], :] = \
 43 |             joints[pair[1], :], joints[pair[0], :].copy()
 44 |         joints_vis[pair[0], :], joints_vis[pair[1], :] = \
 45 |             joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
 46 | 
 47 |     return joints * joints_vis, joints_vis
 48 | 
 49 | 
 50 | def transform_preds(coords, center, scale, output_size):
 51 |     target_coords = np.zeros(coords.shape)
 52 |     trans = get_affine_transform(center, scale, 0, output_size, inv=1)
 53 |     for p in range(coords.shape[0]):
 54 |         target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
 55 |     return target_coords
 56 | 
 57 | 
 58 | def get_affine_transform(center,
 59 |                          scale,
 60 |                          rot,
 61 |                          output_size,
 62 |                          shift=np.array([0, 0], dtype=np.float32),
 63 |                          inv=0):
 64 |     if isinstance(scale, torch.Tensor):
 65 |         scale = np.array(scale.cpu())
 66 |     if isinstance(center, torch.Tensor):
 67 |         center = np.array(center.cpu())
 68 |     if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
 69 |         scale = np.array([scale, scale])
 70 | 
 71 |     scale_tmp = scale * 200.0
 72 |     src_w, src_h = scale_tmp[0], scale_tmp[1]
 73 |     dst_w, dst_h = output_size[0], output_size[1]
 74 | 
 75 |     rot_rad = np.pi * rot / 180
 76 |     if src_w >= src_h:
 77 |         src_dir = get_dir([0, src_w * -0.5], rot_rad)
 78 |         dst_dir = np.array([0, dst_w * -0.5], np.float32)
 79 |     else:
 80 |         src_dir = get_dir([src_h * -0.5, 0], rot_rad)
 81 |         dst_dir = np.array([dst_h * -0.5, 0], np.float32)
 82 | 
 83 |     src = np.zeros((3, 2), dtype=np.float32)
 84 |     dst = np.zeros((3, 2), dtype=np.float32)
 85 |     src[0, :] = center + scale_tmp * shift     # x,y
 86 |     src[1, :] = center + src_dir + scale_tmp * shift
 87 |     dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
 88 |     dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
 89 | 
 90 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
 91 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
 92 | 
 93 |     if inv:
 94 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
 95 |     else:
 96 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
 97 | 
 98 |     return trans
 99 | 
100 | 
101 | def affine_transform(pt, t):
102 |     new_pt = np.array([pt[0], pt[1], 1.]).T
103 |     new_pt = np.dot(t, new_pt)
104 |     return new_pt[:2]
105 | 
106 | 
107 | def affine_transform_pts(pts, t):
108 |     xyz = np.add(
109 |         np.array([[1, 0], [0, 1], [0, 0]]).dot(pts.T), np.array([[0], [0],
110 |                                                                  [1]]))
111 |     return np.dot(t, xyz).T
112 | 
113 | 
114 | def affine_transform_pts_cuda(pts, t):
115 |     npts = pts.shape[0]
116 |     pts_homo = torch.cat([pts, torch.ones(npts, 1, device=pts.device)], dim=1)
117 |     out = torch.mm(t, torch.t(pts_homo))
118 |     return torch.t(out[:2, :])
119 | 
120 | 
121 | def get_3rd_point(a, b):
122 |     direct = a - b
123 |     return np.array(b) + np.array([-direct[1], direct[0]], dtype=np.float32)
124 | 
125 | 
126 | def get_dir(src_point, rot_rad):
127 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
128 | 
129 |     src_result = [0, 0]
130 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
131 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
132 | 
133 |     return src_result
134 | 
135 | 
136 | def crop(img, center, scale, output_size, rot=0):
137 |     trans = get_affine_transform(center, scale, rot, output_size)
138 | 
139 |     dst_img = cv2.warpAffine(
140 |         img,
141 |         trans, (int(output_size[0]), int(output_size[1])),
142 |         flags=cv2.INTER_LINEAR)
143 | 
144 |     return dst_img
145 | 
146 | def get_scale(image_size, resized_size):
147 |     w, h = image_size
148 |     w_resized, h_resized = resized_size
149 |     if w / w_resized < h / h_resized:
150 |         w_pad = h / h_resized * w_resized
151 |         h_pad = h
152 |     else:
153 |         w_pad = w
154 |         h_pad = w / w_resized * h_resized
155 |     scale = np.array([w_pad / 200.0, h_pad / 200.0], dtype=np.float32)
156 | 
157 |     return scale
158 | 
159 | 
160 | def projectPoints(X, K, R, t, Kd):
161 |     """
162 |     Projects points X (3xN) using camera intrinsics K (3x3),
163 |     extrinsics (R,t) and distortion parameters Kd=[k1,k2,p1,p2,k3].
164 |     Roughly, x = K*(R*X + t) + distortion
165 |     See http://docs.opencv.org/2.4/doc/tutorials/calib3d/camera_calibration/camera_calibration.html
166 |     or cv2.projectPoints
167 |     """
168 | 
169 |     x = np.dot(R, X) + t
170 | 
171 |     x[0:2, :] = x[0:2, :] / (x[2, :] + 1e-5)
172 | 
173 |     r = x[0, :] * x[0, :] + x[1, :] * x[1, :]
174 | 
175 |     x[0, :] = x[0, :] * (1 + Kd[0] * r + Kd[1] * r * r + Kd[4] * r * r * r
176 |                         ) + 2 * Kd[2] * x[0, :] * x[1, :] + Kd[3] * (
177 |                             r + 2 * x[0, :] * x[0, :])
178 |     x[1, :] = x[1, :] * (1 + Kd[0] * r + Kd[1] * r * r + Kd[4] * r * r * r
179 |                         ) + 2 * Kd[3] * x[0, :] * x[1, :] + Kd[2] * (
180 |                             r + 2 * x[1, :] * x[1, :])
181 | 
182 |     x[0, :] = K[0, 0] * x[0, :] + K[0, 1] * x[1, :] + K[0, 2]
183 |     x[1, :] = K[1, 0] * x[0, :] + K[1, 1] * x[1, :] + K[1, 2]
184 | 
185 |     return x
186 | 
187 | 
188 | def rotate_points(points, center, rot_rad):
189 |     """
190 |     :param points:  N*2
191 |     :param center:  2
192 |     :param rot_rad: scalar
193 |     :return: N*2
194 |     """
195 |     rot_rad = rot_rad * np.pi / 180.0
196 |     rotate_mat = np.array([[np.cos(rot_rad), -np.sin(rot_rad)],
197 |                           [np.sin(rot_rad), np.cos(rot_rad)]])
198 |     center = center.reshape(2, 1)
199 |     points = points.T
200 |     points = rotate_mat.dot(points - center) + center
201 | 
202 |     return points.T
203 | 
204 | 
205 | def compute_similarity_transform(X, Y, compute_optimal_scale=False):
206 |     """
207 |     A port of MATLAB's `procrustes` function to Numpy.
208 |     Adapted from http://stackoverflow.com/a/18927641/1884420
209 | 
210 |     Args
211 |         X: array NxM of targets, with N number of points and M point dimensionality
212 |         Y: array NxM of inputs
213 |         compute_optimal_scale: whether we compute optimal scale or force it to be 1
214 | 
215 |     Returns:
216 |         d: squared error after transformation
217 |         Z: transformed Y
218 |         T: computed rotation
219 |         b: scaling
220 |         c: translation
221 |     """
222 |     muX = X.mean(0)
223 |     muY = Y.mean(0)
224 | 
225 |     X0 = X - muX
226 |     Y0 = Y - muY
227 | 
228 |     ssX = (X0 ** 2.).sum()
229 |     ssY = (Y0 ** 2.).sum()
230 | 
231 |     # centred Frobenius norm
232 |     normX = np.sqrt(ssX)
233 |     normY = np.sqrt(ssY)
234 | 
235 |     # scale to equal (unit) norm
236 |     X0 = X0 / normX
237 |     Y0 = Y0 / normY
238 | 
239 |     # optimum rotation matrix of Y
240 |     A = np.dot(X0.T, Y0)
241 |     U, s, Vt = np.linalg.svd(A, full_matrices=False)
242 |     V = Vt.T
243 |     T = np.dot(V, U.T)
244 | 
245 |     # Make sure we have a rotation
246 |     detT = np.linalg.det(T)
247 |     V[:, -1] *= np.sign(detT)
248 |     s[-1] *= np.sign(detT)
249 |     T = np.dot(V, U.T)
250 | 
251 |     traceTA = s.sum()
252 | 
253 |     if compute_optimal_scale:  # Compute optimum scaling of Y.
254 |         b = traceTA * normX / normY
255 |         d = 1 - traceTA ** 2
256 |         Z = normX * traceTA * np.dot(Y0, T) + muX
257 |     else:  # If no scaling allowed
258 |         b = 1
259 |         d = 1 + ssY / ssX - 2 * traceTA * normY / normX
260 |         Z = normY * np.dot(Y0, T) + muX
261 | 
262 |     c = muX - b * np.dot(muY, T)
263 | 
264 |     return d, Z, T, b, c
265 | 
266 | 
267 | def procrustes_transform(target_pose, from_pose):
268 |     _, Z, rot, s, t = compute_similarity_transform(target_pose, from_pose, compute_optimal_scale=True)
269 |     align_pose = s * from_pose.dot(rot) + t
270 | 
271 |     return align_pose
272 | 


--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import os
 11 | import logging
 12 | import time
 13 | from pathlib import Path
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.optim as optim
 18 | 
 19 | from core.config import get_model_name
 20 | 
 21 | 
 22 | def create_logger(cfg, cfg_name, phase='train'):
 23 |     this_dir = Path(os.path.dirname(__file__))  ##
 24 |     root_output_dir = (this_dir / '..' / '..' / cfg.OUTPUT_DIR).resolve()  ##
 25 |     tensorboard_log_dir = (this_dir / '..' / '..' / cfg.LOG_DIR).resolve()
 26 |     # set up logger
 27 |     if not root_output_dir.exists():
 28 |         print('=> creating {}'.format(root_output_dir))
 29 |         root_output_dir.mkdir()
 30 | 
 31 |     dataset = cfg.DATASET.TRAIN_DATASET
 32 |     model, _ = get_model_name(cfg)
 33 |     cfg_name = os.path.basename(cfg_name).split('.')[0]
 34 | 
 35 |     final_output_dir = root_output_dir / dataset / model / cfg_name
 36 | 
 37 |     print('=> creating {}'.format(final_output_dir))
 38 |     final_output_dir.mkdir(parents=True, exist_ok=True)
 39 | 
 40 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
 41 |     log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase)
 42 |     final_log_file = final_output_dir / log_file
 43 |     head = '%(asctime)-15s %(message)s'
 44 |     logging.basicConfig(filename=str(final_log_file),
 45 |                         format=head)
 46 |     logger = logging.getLogger()
 47 |     logger.setLevel(logging.INFO)
 48 |     console = logging.StreamHandler()
 49 |     logging.getLogger('').addHandler(console)
 50 | 
 51 |     tensorboard_log_dir = tensorboard_log_dir / dataset / model / \
 52 |         (cfg_name + time_str)
 53 |     print('=> creating {}'.format(tensorboard_log_dir))
 54 |     tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
 55 | 
 56 |     return logger, str(final_output_dir), str(tensorboard_log_dir)
 57 | 
 58 | def get_optimizer(cfg, model):
 59 |     optimizer = None
 60 |     if cfg.TRAIN.OPTIMIZER == 'sgd':
 61 |         optimizer = optim.SGD(
 62 |             model.parameters(),
 63 |             lr=cfg.TRAIN.LR,
 64 |             momentum=cfg.TRAIN.MOMENTUM,
 65 |             weight_decay=cfg.TRAIN.WD,
 66 |             nesterov=cfg.TRAIN.NESTEROV
 67 |         )
 68 |     elif cfg.TRAIN.OPTIMIZER == 'adam':
 69 |         optimizer = optim.Adam(
 70 |             model.parameters(),
 71 |             lr=cfg.TRAIN.LR
 72 |         )
 73 | 
 74 |     return optimizer
 75 | 
 76 | 
 77 | def load_model_state(model, output_dir, epoch):
 78 |     file = os.path.join(output_dir, 'checkpoint_3d_epoch'+str(epoch)+'.pth.tar')
 79 |     if os.path.isfile(file):
 80 |         model.module.load_state_dict(torch.load(file))
 81 |         print('=> load models state {} (epoch {})'
 82 |               .format(file, epoch))
 83 |         return model
 84 |     else:
 85 |         print('=> no checkpoint found at {}'.format(file))
 86 |         return model
 87 | 
 88 | 
 89 | def load_checkpoint(model, optimizer, output_dir, filename='checkpoint.pth.tar'):
 90 |     file = os.path.join(output_dir, filename)
 91 |     if os.path.isfile(file):
 92 |         checkpoint = torch.load(file)
 93 |         start_epoch = checkpoint['epoch']
 94 |         precision = checkpoint['precision'] if 'precision' in checkpoint else 0
 95 |         model.module.load_state_dict(checkpoint['state_dict'])
 96 |         optimizer.load_state_dict(checkpoint['optimizer'])
 97 |         print('=> load checkpoint {} (epoch {})'
 98 |               .format(file, start_epoch))
 99 | 
100 |         return start_epoch, model, optimizer, precision
101 | 
102 |     else:
103 |         print('=> no checkpoint found at {}'.format(file))
104 |         return 0, model, optimizer, 0
105 | 
106 | 
107 | def save_checkpoint(states, is_best, output_dir,
108 |                     filename='checkpoint.pth.tar'):
109 |     torch.save(states, os.path.join(output_dir, filename))
110 |     if is_best and 'state_dict' in states:
111 |         torch.save(states['state_dict'],
112 |                    os.path.join(output_dir, 'model_best.pth.tar'))
113 | 
114 | 
115 | def load_backbone_panoptic(model, pretrained_file):
116 |     this_dir = os.path.dirname(__file__)
117 |     pretrained_file = os.path.abspath(os.path.join(this_dir, '../..', pretrained_file))
118 |     pretrained_state_dict = torch.load(pretrained_file)
119 |     model_state_dict = model.module.backbone.state_dict()
120 | 
121 |     prefix = "module."
122 |     new_pretrained_state_dict = {}
123 |     for k, v in pretrained_state_dict.items():
124 |         if k.replace(prefix, "") in model_state_dict and v.shape == model_state_dict[k.replace(prefix, "")].shape:
125 |             new_pretrained_state_dict[k.replace(prefix, "")] = v
126 |         elif k.replace(prefix, "") == "final_layer.weight":  # TODO
127 |             print("Reiniting final layer filters:", k)
128 | 
129 |             o = torch.zeros_like(model_state_dict[k.replace(prefix, "")][:, :, :, :])
130 |             nn.init.xavier_uniform_(o)
131 |             n_filters = min(o.shape[0], v.shape[0])
132 |             o[:n_filters, :, :, :] = v[:n_filters, :, :, :]
133 | 
134 |             new_pretrained_state_dict[k.replace(prefix, "")] = o
135 |         elif k.replace(prefix, "") == "final_layer.bias":
136 |             print("Reiniting final layer biases:", k)
137 |             o = torch.zeros_like(model_state_dict[k.replace(prefix, "")][:])
138 |             nn.init.zeros_(o)
139 |             n_filters = min(o.shape[0], v.shape[0])
140 |             o[:n_filters] = v[:n_filters]
141 | 
142 |             new_pretrained_state_dict[k.replace(prefix, "")] = o
143 |     logging.info("load backbone statedict from {}".format(pretrained_file))
144 |     model.module.backbone.load_state_dict(new_pretrained_state_dict)
145 | 
146 |     return model
147 | 


--------------------------------------------------------------------------------
/lib/utils/vis.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | import math
  7 | import numpy as np
  8 | import torchvision
  9 | import cv2
 10 | import os
 11 | import matplotlib
 12 | matplotlib.use('Agg')
 13 | from matplotlib import pyplot as plt
 14 | from mpl_toolkits.mplot3d import Axes3D
 15 | 
 16 | 
 17 | def save_batch_image_with_joints_multi(batch_image,
 18 |                                  batch_joints,
 19 |                                  batch_joints_vis,
 20 |                                  num_person,
 21 |                                  file_name,
 22 |                                  nrow=8,
 23 |                                  padding=2):
 24 |     '''
 25 |     batch_image: [batch_size, channel, height, width]
 26 |     batch_joints: [batch_size, num_person, num_joints, 3],
 27 |     batch_joints_vis: [batch_size, num_person, num_joints, 1],
 28 |     num_person: [batch_size]
 29 |     }
 30 |     '''
 31 |     batch_image = batch_image.flip(1)
 32 |     grid = torchvision.utils.make_grid(batch_image, nrow, padding, True)
 33 |     ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
 34 |     ndarr = ndarr.copy()
 35 | 
 36 |     nmaps = batch_image.size(0)
 37 |     xmaps = min(nrow, nmaps)
 38 |     ymaps = int(math.ceil(float(nmaps) / xmaps))
 39 |     height = int(batch_image.size(2) + padding)
 40 |     width = int(batch_image.size(3) + padding)
 41 |     k = 0
 42 |     for y in range(ymaps):
 43 |         for x in range(xmaps):
 44 |             if k >= nmaps:
 45 |                 break
 46 |             for n in range(num_person[k]):
 47 |                 joints = batch_joints[k, n]
 48 |                 joints_vis = batch_joints_vis[k, n]
 49 | 
 50 |                 for joint, joint_vis in zip(joints, joints_vis):
 51 |                     joint[0] = x * width + padding + joint[0]
 52 |                     joint[1] = y * height + padding + joint[1]
 53 |                     if joint_vis[0]:
 54 |                         cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2,
 55 |                                    [0, 255, 255], 2)
 56 |             k = k + 1
 57 |     cv2.imwrite(file_name, ndarr)
 58 | 
 59 | 
 60 | def save_batch_heatmaps_multi(batch_image, batch_heatmaps, file_name, normalize=True):
 61 |     '''
 62 |     batch_image: [batch_size, channel, height, width]
 63 |     batch_heatmaps: ['batch_size, num_joints, height, width]
 64 |     file_name: saved file name
 65 |     '''
 66 |     if normalize:
 67 |         batch_image = batch_image.clone()
 68 |         min = float(batch_image.min())
 69 |         max = float(batch_image.max())
 70 | 
 71 |         batch_image.add_(-min).div_(max - min + 1e-5)
 72 |     batch_image = batch_image.flip(1)
 73 | 
 74 |     batch_size = batch_heatmaps.size(0)
 75 |     num_joints = batch_heatmaps.size(1)
 76 |     heatmap_height = batch_heatmaps.size(2)
 77 |     heatmap_width = batch_heatmaps.size(3)
 78 | 
 79 |     grid_image = np.zeros(
 80 |         (batch_size * heatmap_height, (num_joints + 1) * heatmap_width, 3),
 81 |         dtype=np.uint8)
 82 | 
 83 |     for i in range(batch_size):
 84 |         image = batch_image[i].mul(255)\
 85 |                               .clamp(0, 255)\
 86 |                               .byte()\
 87 |                               .permute(1, 2, 0)\
 88 |                               .cpu().numpy()
 89 |         heatmaps = batch_heatmaps[i].mul(255)\
 90 |                                     .clamp(0, 255)\
 91 |                                     .byte()\
 92 |                                     .cpu().numpy()
 93 | 
 94 |         resized_image = cv2.resize(image,
 95 |                                    (int(heatmap_width), int(heatmap_height)))
 96 | 
 97 |         height_begin = heatmap_height * i
 98 |         height_end = heatmap_height * (i + 1)
 99 |         for j in range(num_joints):
100 |             heatmap = heatmaps[j, :, :]
101 |             colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
102 |             masked_image = colored_heatmap * 0.7 + resized_image * 0.3
103 | 
104 |             width_begin = heatmap_width * (j + 1)
105 |             width_end = heatmap_width * (j + 2)
106 |             grid_image[height_begin:height_end, width_begin:width_end, :] = \
107 |                 masked_image
108 |             # grid_image[height_begin:height_end, width_begin:width_end, :] = \
109 |             #     colored_heatmap*0.7 + resized_image*0.3
110 | 
111 |         grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image
112 | 
113 |     cv2.imwrite(file_name, grid_image)
114 | 
115 | 
116 | def save_debug_images_multi(config, input, meta, target, output, prefix):
117 |     if not config.DEBUG.DEBUG:
118 |         return
119 | 
120 |     basename = os.path.basename(prefix)
121 |     dirname = os.path.dirname(prefix)
122 |     dirname1 = os.path.join(dirname, 'image_with_joints')
123 |     dirname2 = os.path.join(dirname, 'batch_heatmaps')
124 | 
125 |     for dir in [dirname1, dirname2]:
126 |         if not os.path.exists(dir):
127 |             os.makedirs(dir)
128 | 
129 |     prefix1 = os.path.join(dirname1, basename)
130 |     prefix2 = os.path.join(dirname2, basename)
131 | 
132 |     if config.DEBUG.SAVE_BATCH_IMAGES_GT:
133 |         save_batch_image_with_joints_multi(input, meta['joints'], meta['joints_vis'], meta['num_person'], '{}_gt.jpg'.format(prefix1))
134 |     if config.DEBUG.SAVE_HEATMAPS_GT:
135 |         save_batch_heatmaps_multi(input, target, '{}_hm_gt.jpg'.format(prefix2))
136 |     if config.DEBUG.SAVE_HEATMAPS_PRED:
137 |         save_batch_heatmaps_multi(input, output, '{}_hm_pred.jpg'.format(prefix2))
138 | 
139 | # panoptic
140 | LIMBS15 = [[0, 1], [0, 2], [0, 3], [3, 4], [4, 5], [0, 9], [9, 10],
141 |          [10, 11], [2, 6], [2, 12], [6, 7], [7, 8], [12, 13], [13, 14]]
142 | 
143 | # # h36m
144 | # LIMBS17 = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6], [0, 7], [7, 8],
145 | #          [8, 9], [9, 10], [8, 14], [14, 15], [15, 16], [8, 11], [11, 12], [12, 13]]
146 | # coco17
147 | LIMBS17 = [[0, 1], [0, 2], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7], [7, 9], [6, 8], [8, 10], [5, 11], [11, 13], [13, 15],
148 |         [6, 12], [12, 14], [14, 16], [5, 6], [11, 12]]
149 | 
150 | # shelf / campus
151 | LIMBS14 = [[0, 1], [1, 2], [3, 4], [4, 5], [2, 3], [6, 7], [7, 8], [9, 10],
152 |           [10, 11], [2, 8], [3, 9], [8, 12], [9, 12], [12, 13]]
153 | 
154 | 
155 | def save_debug_3d_images(config, meta, preds, prefix):
156 |     if not config.DEBUG.DEBUG:
157 |         return
158 | 
159 |     basename = os.path.basename(prefix)
160 |     dirname = os.path.dirname(prefix)
161 |     dirname1 = os.path.join(dirname, '3d_joints')
162 | 
163 |     if not os.path.exists(dirname1):
164 |         os.makedirs(dirname1)
165 | 
166 |     prefix = os.path.join(dirname1, basename)
167 |     file_name = prefix + "_3d.png"
168 | 
169 |     # preds = preds.cpu().numpy()
170 |     batch_size = meta['num_person'].shape[0]
171 |     xplot = min(4, batch_size)
172 |     yplot = int(math.ceil(float(batch_size) / xplot))
173 | 
174 |     width = 4.0 * xplot
175 |     height = 4.0 * yplot
176 |     fig = plt.figure(0, figsize=(width, height))
177 |     plt.subplots_adjust(left=0.05, right=0.95, bottom=0.05,
178 |                         top=0.95, wspace=0.05, hspace=0.15)
179 |     for i in range(batch_size):
180 |         num_person = meta['num_person'][i]
181 |         joints_3d = meta['joints_3d'][i]
182 |         joints_3d_vis = meta['joints_3d_vis'][i]
183 |         ax = plt.subplot(yplot, xplot, i + 1, projection='3d')
184 |         for n in range(num_person):
185 |             joint = joints_3d[n]
186 |             joint_vis = joints_3d_vis[n]
187 |             for k in eval("LIMBS{}".format(len(joint))):
188 |                 if joint_vis[k[0], 0] and joint_vis[k[1], 0]:
189 |                     x = [float(joint[k[0], 0]), float(joint[k[1], 0])]
190 |                     y = [float(joint[k[0], 1]), float(joint[k[1], 1])]
191 |                     z = [float(joint[k[0], 2]), float(joint[k[1], 2])]
192 |                     ax.plot(x, y, z, c='r', lw=1.5, marker='o', markerfacecolor='w', markersize=2,
193 |                             markeredgewidth=1)
194 |                 else:
195 |                     x = [float(joint[k[0], 0]), float(joint[k[1], 0])]
196 |                     y = [float(joint[k[0], 1]), float(joint[k[1], 1])]
197 |                     z = [float(joint[k[0], 2]), float(joint[k[1], 2])]
198 |                     ax.plot(x, y, z, c='r', ls='--', lw=1.5, marker='o', markerfacecolor='w', markersize=2,
199 |                             markeredgewidth=1)
200 | 
201 |         colors = ['b', 'g', 'c', 'y', 'm', 'orange', 'pink', 'royalblue', 'lightgreen', 'gold']
202 |         if preds is not None:
203 |             pred = preds[i]
204 |             for n in range(len(pred)):
205 |                 joint = pred[n]
206 |                 if joint[0, 3] >= 0:
207 |                     for k in eval("LIMBS{}".format(len(joint))):
208 |                         x = [float(joint[k[0], 0]), float(joint[k[1], 0])]
209 |                         y = [float(joint[k[0], 1]), float(joint[k[1], 1])]
210 |                         z = [float(joint[k[0], 2]), float(joint[k[1], 2])]
211 |                         ax.plot(x, y, z, c=colors[int(n % 10)], lw=1.5, marker='o', markerfacecolor='w', markersize=2,
212 |                                 markeredgewidth=1)
213 |     plt.savefig(file_name)
214 |     plt.close(0)
215 | 
216 | 
217 | def save_debug_3d_cubes(config, meta, root, prefix):
218 |     if not config.DEBUG.DEBUG:
219 |         return
220 | 
221 |     basename = os.path.basename(prefix)
222 |     dirname = os.path.dirname(prefix)
223 |     dirname1 = os.path.join(dirname, 'root_cubes')
224 | 
225 |     if not os.path.exists(dirname1):
226 |         os.makedirs(dirname1)
227 | 
228 |     prefix = os.path.join(dirname1, basename)
229 |     file_name = prefix + "_root.png"
230 | 
231 |     batch_size = root.shape[0]
232 |     root_id = config.DATASET.ROOTIDX
233 | 
234 |     xplot = min(4, batch_size)
235 |     yplot = int(math.ceil(float(batch_size) / xplot))
236 | 
237 |     width = 6.0 * xplot
238 |     height = 4.0 * yplot
239 |     fig = plt.figure(0, figsize=(width, height))
240 |     plt.subplots_adjust(left=0.05, right=0.95, bottom=0.05,
241 |                         top=0.95, wspace=0.05, hspace=0.15)
242 |     for i in range(batch_size):
243 |         roots_gt = meta['roots_3d'][i]
244 |         num_person = meta['num_person'][i]
245 |         roots_pred = root[i]
246 |         ax = plt.subplot(yplot, xplot, i + 1, projection='3d')
247 | 
248 |         x = roots_gt[:num_person, 0].cpu()
249 |         y = roots_gt[:num_person, 1].cpu()
250 |         z = roots_gt[:num_person, 2].cpu()
251 |         ax.scatter(x, y, z, c='r')
252 | 
253 |         index = roots_pred[:, 3] >= 0
254 |         x = roots_pred[index, 0].cpu()
255 |         y = roots_pred[index, 1].cpu()
256 |         z = roots_pred[index, 2].cpu()
257 |         ax.scatter(x, y, z, c='b')
258 | 
259 |         space_size = config.MULTI_PERSON.SPACE_SIZE
260 |         space_center = config.MULTI_PERSON.SPACE_CENTER
261 |         ax.set_xlim(space_center[0] - space_size[0] / 2, space_center[0] + space_size[0] / 2)
262 |         ax.set_ylim(space_center[1] - space_size[1] / 2, space_center[1] + space_size[1] / 2)
263 |         ax.set_zlim(space_center[2] - space_size[2] / 2, space_center[2] + space_size[2] / 2)
264 | 
265 |     plt.savefig(file_name)
266 |     plt.close(0)
267 | 


--------------------------------------------------------------------------------
/lib/utils/zipreader.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | import os
 7 | import zipfile
 8 | import xml.etree.ElementTree as ET
 9 | 
10 | import cv2
11 | import numpy as np
12 | 
13 | _im_zfile = []
14 | _xml_path_zip = []
15 | _xml_zfile = []
16 | 
17 | 
18 | def imread(filename, flags=cv2.IMREAD_COLOR):
19 |     global _im_zfile
20 |     path = filename
21 |     pos_at = path.index('@')
22 |     if pos_at == -1:
23 |         print("character '@' is not found from the given path '%s'"%(path))
24 |         assert 0
25 |     path_zip = path[0: pos_at]
26 |     path_img = path[pos_at + 2:]
27 |     if not os.path.isfile(path_zip):
28 |         print("zip file '%s' is not found"%(path_zip))
29 |         assert 0
30 |     for i in range(len(_im_zfile)):
31 |         if _im_zfile[i]['path'] == path_zip:
32 |             data = _im_zfile[i]['zipfile'].read(path_img)
33 |             return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
34 | 
35 |     _im_zfile.append({
36 |         'path': path_zip,
37 |         'zipfile': zipfile.ZipFile(path_zip, 'r')
38 |     })
39 |     data = _im_zfile[-1]['zipfile'].read(path_img)
40 | 
41 |     return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
42 | 
43 | 
44 | def xmlread(filename):
45 |     global _xml_path_zip
46 |     global _xml_zfile
47 |     path = filename
48 |     pos_at = path.index('@')
49 |     if pos_at == -1:
50 |         print("character '@' is not found from the given path '%s'"%(path))
51 |         assert 0
52 |     path_zip = path[0: pos_at]
53 |     path_xml = path[pos_at + 2:]
54 |     if not os.path.isfile(path_zip):
55 |         print("zip file '%s' is not found"%(path_zip))
56 |         assert 0
57 |     for i in xrange(len(_xml_path_zip)):
58 |         if _xml_path_zip[i] == path_zip:
59 |             data = _xml_zfile[i].open(path_xml)
60 |             return ET.fromstring(data.read())
61 |     _xml_path_zip.append(path_zip)
62 |     print("read new xml file '%s'"%(path_zip))
63 |     _xml_zfile.append(zipfile.ZipFile(path_zip, 'r'))
64 |     data = _xml_zfile[-1].open(path_xml)
65 |     return ET.fromstring(data.read())
66 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | tqdm==4.29.1
 2 | json_tricks==3.13.2
 3 | torch==1.4.0
 4 | opencv_python==4.0.0.21
 5 | prettytable==0.7.2
 6 | scipy==1.4.1
 7 | torchvision==0.5.0
 8 | numpy==1.16.2
 9 | matplotlib==2.0.2
10 | easydict==1.9
11 | PyYAML==5.4
12 | tensorboardX==2.1
13 | 


--------------------------------------------------------------------------------
/run/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | 
14 | def add_path(path):
15 |     if path not in sys.path:
16 |         sys.path.insert(0, path)
17 | 
18 | 
19 | this_dir = osp.dirname(__file__)
20 | 
21 | lib_path = osp.join(this_dir, '..', 'lib')
22 | add_path(lib_path)
23 | 


--------------------------------------------------------------------------------
/run/train_3d.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.optim as optim
 13 | import torch.backends.cudnn as cudnn
 14 | import torch.utils.data
 15 | import torch.utils.data.distributed
 16 | import torchvision.transforms as transforms
 17 | from tensorboardX import SummaryWriter
 18 | import argparse
 19 | import os
 20 | import pprint
 21 | import logging
 22 | import json
 23 | 
 24 | import _init_paths
 25 | from core.config import config
 26 | from core.config import update_config
 27 | from core.function import train_3d, validate_3d
 28 | from utils.utils import create_logger
 29 | from utils.utils import save_checkpoint, load_checkpoint, load_model_state
 30 | from utils.utils import load_backbone_panoptic
 31 | import dataset
 32 | import models
 33 | 
 34 | 
 35 | def parse_args():
 36 |     parser = argparse.ArgumentParser(description='Train keypoints network')
 37 |     parser.add_argument(
 38 |         '--cfg', help='experiment configure file name', required=True, type=str)
 39 | 
 40 |     args, rest = parser.parse_known_args()
 41 |     update_config(args.cfg)
 42 | 
 43 |     return args
 44 | 
 45 | 
 46 | def get_optimizer(model):
 47 |     lr = config.TRAIN.LR
 48 |     if model.module.backbone is not None:
 49 |         for params in model.module.backbone.parameters():
 50 |             params.requires_grad = False   # If you want to train the whole model jointly, set it to be True.
 51 |     for params in model.module.root_net.parameters():
 52 |         params.requires_grad = True
 53 |     for params in model.module.pose_net.parameters():
 54 |         params.requires_grad = True
 55 |     optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.module.parameters()), lr=lr)
 56 |     # optimizer = optim.Adam(model.module.parameters(), lr=lr)
 57 | 
 58 |     return model, optimizer
 59 | 
 60 | 
 61 | def main():
 62 |     args = parse_args()
 63 |     logger, final_output_dir, tb_log_dir = create_logger(
 64 |         config, args.cfg, 'train')
 65 | 
 66 |     logger.info(pprint.pformat(args))
 67 |     logger.info(pprint.pformat(config))
 68 | 
 69 |     gpus = [int(i) for i in config.GPUS.split(',')]
 70 |     print('=> Loading data ..')
 71 |     normalize = transforms.Normalize(
 72 |         mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 73 |     train_dataset = eval('dataset.' + config.DATASET.TRAIN_DATASET)(
 74 |         config, config.DATASET.TRAIN_SUBSET, True,
 75 |         transforms.Compose([
 76 |             transforms.ToTensor(),
 77 |             normalize,
 78 |         ]))
 79 | 
 80 |     train_loader = torch.utils.data.DataLoader(
 81 |         train_dataset,
 82 |         batch_size=config.TRAIN.BATCH_SIZE * len(gpus),
 83 |         shuffle=config.TRAIN.SHUFFLE,
 84 |         num_workers=config.WORKERS,
 85 |         pin_memory=True)
 86 | 
 87 |     test_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)(
 88 |         config, config.DATASET.TEST_SUBSET, False,
 89 |         transforms.Compose([
 90 |             transforms.ToTensor(),
 91 |             normalize,
 92 |         ]))
 93 | 
 94 |     test_loader = torch.utils.data.DataLoader(
 95 |         test_dataset,
 96 |         batch_size=config.TEST.BATCH_SIZE * len(gpus),
 97 |         shuffle=False,
 98 |         num_workers=config.WORKERS,
 99 |         pin_memory=True)
100 | 
101 |     cudnn.benchmark = config.CUDNN.BENCHMARK
102 |     torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
103 |     torch.backends.cudnn.enabled = config.CUDNN.ENABLED
104 | 
105 |     print('=> Constructing models ..')
106 |     model = eval('models.' + config.MODEL + '.get_multi_person_pose_net')(
107 |         config, is_train=True)
108 |     with torch.no_grad():
109 |         model = torch.nn.DataParallel(model, device_ids=gpus).cuda()
110 | 
111 |     model, optimizer = get_optimizer(model)
112 | 
113 |     start_epoch = config.TRAIN.BEGIN_EPOCH
114 |     end_epoch = config.TRAIN.END_EPOCH
115 | 
116 |     best_precision = 0
117 |     if config.NETWORK.PRETRAINED_BACKBONE:
118 |         model = load_backbone_panoptic(model, config.NETWORK.PRETRAINED_BACKBONE)
119 |     if config.TRAIN.RESUME:
120 |         start_epoch, model, optimizer, best_precision = load_checkpoint(model, optimizer, final_output_dir)
121 | 
122 |     writer_dict = {
123 |         'writer': SummaryWriter(log_dir=tb_log_dir),
124 |         'train_global_steps': 0,
125 |         'valid_global_steps': 0,
126 |     }
127 | 
128 |     print('=> Training...')
129 |     for epoch in range(start_epoch, end_epoch):
130 |         print('Epoch: {}'.format(epoch))
131 | 
132 |         # lr_scheduler.step()
133 |         train_3d(config, model, optimizer, train_loader, epoch, final_output_dir, writer_dict)
134 |         precision = validate_3d(config, model, test_loader, final_output_dir)
135 | 
136 |         if precision > best_precision:
137 |             best_precision = precision
138 |             best_model = True
139 |         else:
140 |             best_model = False
141 | 
142 |         logger.info('=> saving checkpoint to {} (Best: {})'.format(final_output_dir, best_model))
143 |         save_checkpoint({
144 |             'epoch': epoch + 1,
145 |             'state_dict': model.module.state_dict(),
146 |             'precision': best_precision,
147 |             'optimizer': optimizer.state_dict(),
148 |         }, best_model, final_output_dir)
149 | 
150 |     final_model_state_file = os.path.join(final_output_dir,
151 |                                           'final_state.pth.tar')
152 |     logger.info('saving final model state to {}'.format(
153 |         final_model_state_file))
154 |     torch.save(model.module.state_dict(), final_model_state_file)
155 | 
156 |     writer_dict['writer'].close()
157 | 
158 | 
159 | if __name__ == '__main__':
160 |     main()
161 | 


--------------------------------------------------------------------------------
/run/validate_3d.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.optim as optim
13 | import torch.backends.cudnn as cudnn
14 | import torch.utils.data
15 | import torch.utils.data.distributed
16 | import torchvision.transforms as transforms
17 | from tensorboardX import SummaryWriter
18 | import argparse
19 | import os
20 | import pprint
21 | import logging
22 | import json
23 | 
24 | import _init_paths
25 | from core.config import config
26 | from core.config import update_config
27 | from core.function import train_3d, validate_3d
28 | from utils.utils import create_logger
29 | from utils.utils import save_checkpoint, load_checkpoint, load_model_state
30 | from utils.utils import load_backbone_panoptic
31 | import dataset
32 | import models
33 | 
34 | 
35 | def parse_args():
36 |     parser = argparse.ArgumentParser(description='Train keypoints network')
37 |     parser.add_argument(
38 |         '--cfg', help='experiment configure file name', required=True, type=str)
39 | 
40 |     args, rest = parser.parse_known_args()
41 |     update_config(args.cfg)
42 | 
43 |     return args
44 | 
45 | 
46 | def main():
47 |     args = parse_args()
48 |     logger, final_output_dir, tb_log_dir = create_logger(
49 |         config, args.cfg, 'validate')
50 | 
51 |     logger.info(pprint.pformat(args))
52 |     logger.info(pprint.pformat(config))
53 | 
54 |     gpus = [int(i) for i in config.GPUS.split(',')]
55 |     print('=> Loading data ..')
56 |     normalize = transforms.Normalize(
57 |         mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
58 | 
59 |     test_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)(
60 |         config, config.DATASET.TEST_SUBSET, False,
61 |         transforms.Compose([
62 |             transforms.ToTensor(),
63 |             normalize,
64 |         ]))
65 | 
66 |     test_loader = torch.utils.data.DataLoader(
67 |         test_dataset,
68 |         batch_size=config.TEST.BATCH_SIZE * len(gpus),
69 |         shuffle=False,
70 |         num_workers=config.WORKERS,
71 |         pin_memory=True)
72 | 
73 |     cudnn.benchmark = config.CUDNN.BENCHMARK
74 |     torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
75 |     torch.backends.cudnn.enabled = config.CUDNN.ENABLED
76 | 
77 |     print('=> Constructing models ..')
78 |     model = eval('models.' + config.MODEL + '.get_multi_person_pose_net')(
79 |         config, is_train=True)
80 |     with torch.no_grad():
81 |         model = torch.nn.DataParallel(model, device_ids=gpus).cuda()
82 | 
83 |     test_model_file = os.path.join(final_output_dir, config.TEST.MODEL_FILE)
84 |     if config.TEST.MODEL_FILE and os.path.isfile(test_model_file):
85 |         logger.info('=> load models state {}'.format(test_model_file))
86 |         model.module.load_state_dict(torch.load(test_model_file))
87 |     else:
88 |         raise ValueError('Check the model file for testing!')
89 | 
90 |     validate_3d(config, model, test_loader, final_output_dir)
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     main()
95 | 


--------------------------------------------------------------------------------
/test/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import os.path as osp
11 | import sys
12 | 
13 | 
14 | def add_path(path):
15 |     if path not in sys.path:
16 |         sys.path.insert(0, path)
17 | 
18 | 
19 | this_dir = osp.dirname(__file__)
20 | 
21 | lib_path = osp.join(this_dir, '..', 'lib')
22 | add_path(lib_path)
23 | 


--------------------------------------------------------------------------------
/test/evaluate.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft Corporation. All rights reserved.
  3 | # Licensed under the MIT License.
  4 | # ------------------------------------------------------------------------------
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | import torch.backends.cudnn as cudnn
 13 | import torch.utils.data
 14 | import torch.utils.data.distributed
 15 | import torchvision.transforms as transforms
 16 | import argparse
 17 | import os
 18 | from tqdm import tqdm
 19 | from prettytable import PrettyTable
 20 | import copy
 21 | 
 22 | import _init_paths
 23 | from core.config import config
 24 | from core.config import update_config
 25 | from utils.utils import create_logger, load_backbone_panoptic
 26 | import dataset
 27 | import models
 28 | 
 29 | 
 30 | def parse_args():
 31 |     parser = argparse.ArgumentParser(description='Train keypoints network')
 32 |     parser.add_argument(
 33 |         '--cfg', help='experiment configure file name', required=True, type=str)
 34 | 
 35 |     args, rest = parser.parse_known_args()
 36 |     update_config(args.cfg)
 37 | 
 38 |     return args
 39 | 
 40 | 
 41 | def main():
 42 |     args = parse_args()
 43 |     logger, final_output_dir, tb_log_dir = create_logger(
 44 |         config, args.cfg, 'eval_map')
 45 |     cfg_name = os.path.basename(args.cfg).split('.')[0]
 46 | 
 47 |     gpus = [int(i) for i in config.GPUS.split(',')]
 48 |     print('=> Loading data ..')
 49 |     normalize = transforms.Normalize(
 50 |         mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 51 | 
 52 |     test_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)(
 53 |         config, config.DATASET.TEST_SUBSET, False,
 54 |         transforms.Compose([
 55 |             transforms.ToTensor(),
 56 |             normalize,
 57 |         ]))
 58 | 
 59 |     test_loader = torch.utils.data.DataLoader(
 60 |         test_dataset,
 61 |         batch_size=config.TEST.BATCH_SIZE * len(gpus),
 62 |         shuffle=False,
 63 |         num_workers=config.WORKERS,
 64 |         pin_memory=True)
 65 | 
 66 |     cudnn.benchmark = config.CUDNN.BENCHMARK
 67 |     torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
 68 |     torch.backends.cudnn.enabled = config.CUDNN.ENABLED
 69 | 
 70 |     print('=> Constructing models ..')
 71 |     model = eval('models.' + config.MODEL + '.get_multi_person_pose_net')(
 72 |         config, is_train=True)
 73 |     with torch.no_grad():
 74 |         model = torch.nn.DataParallel(model, device_ids=gpus).cuda()
 75 | 
 76 |     test_model_file = os.path.join(final_output_dir, config.TEST.MODEL_FILE)
 77 |     if config.TEST.MODEL_FILE and os.path.isfile(test_model_file):
 78 |         logger.info('=> load models state {}'.format(test_model_file))
 79 |         model.module.load_state_dict(torch.load(test_model_file))
 80 |     else:
 81 |         raise ValueError('Check the model file for testing!')
 82 | 
 83 |     model.eval()
 84 |     preds = []
 85 |     with torch.no_grad():
 86 |         for i, (inputs, targets_2d, weights_2d, targets_3d, meta, input_heatmap) in enumerate(tqdm(test_loader)):
 87 |             if 'panoptic' in config.DATASET.TEST_DATASET:
 88 |                 pred, _, _, _, _, _ = model(views=inputs, meta=meta)
 89 |             elif 'campus' in config.DATASET.TEST_DATASET or 'shelf' in config.DATASET.TEST_DATASET:
 90 |                 pred, _, _, _, _, _ = model(meta=meta, input_heatmaps=input_heatmap)
 91 | 
 92 |             pred = pred.detach().cpu().numpy()
 93 |             for b in range(pred.shape[0]):
 94 |                 preds.append(pred[b])
 95 | 
 96 |         tb = PrettyTable()
 97 |         if 'panoptic' in config.DATASET.TEST_DATASET:
 98 |             mpjpe_threshold = np.arange(25, 155, 25)
 99 |             aps, recs, mpjpe, _ = test_dataset.evaluate(preds)
100 |             tb.field_names = ['Threshold/mm'] + [f'{i}' for i in mpjpe_threshold]
101 |             tb.add_row(['AP'] + [f'{ap * 100:.2f}' for ap in aps])
102 |             tb.add_row(['Recall'] + [f'{re * 100:.2f}' for re in recs])
103 |             print(tb)
104 |             print(f'MPJPE: {mpjpe:.2f}mm')
105 |         else:
106 |             actor_pcp, avg_pcp, bone_person_pcp, _ = test_dataset.evaluate(preds)
107 |             tb.field_names = ['Bone Group'] + [f'Actor {i+1}' for i in range(len(actor_pcp))] + ['Average']
108 |             for k, v in bone_person_pcp.items():
109 |                 tb.add_row([k] + [f'{i*100:.1f}' for i in v] + [f'{np.mean(v)*100:.1f}'])
110 |             tb.add_row(['Total'] + [f'{i*100:.1f}' for i in actor_pcp] + [f'{avg_pcp*100:.1f}'])
111 |             print(tb)
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     main()
116 | 


--------------------------------------------------------------------------------