├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── config.py ├── dataset ├── coco │ └── INFO └── get_dataset.sh ├── demo.ipynb ├── demo_camera.py ├── demo_image.py ├── model.py ├── model ├── caffe │ ├── _trained_COCO │ │ └── pose_deploy.prototxt │ ├── _trained_MPI │ │ └── pose_deploy.prototxt │ └── layers │ │ └── INFO ├── caffe_to_keras.py ├── dump_caffe_layers.py ├── get_caffe_model.sh └── get_keras_model.sh ├── py_rmpe_server ├── py_rmpe_data_iterator.py ├── py_rmpe_heatmapper.py ├── py_rmpe_transformer.py └── rmpe_server.py ├── readme ├── 5ep_result.png ├── dance.gif ├── losses.png ├── result.png └── tr_results.png ├── sample_images └── ski.jpg ├── testing ├── coco.ipynb ├── coco_metric.py ├── inhouse_metric.py ├── inspect_dataset.ipynb ├── inspect_nn_input_output.ipynb ├── rmpe_server_comparator.py └── rmpe_server_tester.py ├── training ├── coco_masks_hdf5.py ├── ds_generators.py ├── optimizers.py ├── train_common.py └── train_pose.py ├── trash ├── lmdb.parse.ipynb └── matload_test.ipynb └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | model/**/*.caffemodel 2 | model/**/*.h5 3 | model/**/*.npy 4 | dataset/train2017 5 | dataset/val2017 6 | dataset/test2017 7 | dataset/trainmask2017 8 | dataset/valmask2017 9 | dataset/*.h5 10 | dataset/annotations 11 | dataset/coco 12 | .idea 13 | __pycache__ 14 | .ipynb_checkpoints 15 | logs/ 16 | training.csv 17 | weights.best.h5 18 | .Rproj.user 19 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "addins"] 2 | path = addins 3 | url = git@github.com:anatolix/multipose_addins.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | SOFTWARE LICENSE AGREEMENT 2 | ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY 3 | 4 | BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE. 5 | 6 | This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Michal Faber (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor. 7 | 8 | RESERVATION OF OWNERSHIP AND GRANT OF LICENSE: 9 | Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive, 10 | non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement. As used in this Agreement, the term "Software" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor, including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i). 11 | 12 | CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement. Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication. 13 | 14 | COPYRIGHT: The Software is owned by Licensor and is protected by United 15 | States copyright laws and applicable international treaties and/or conventions. 16 | 17 | PERMITTED USES: The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto. 18 | 19 | DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You under this Agreement. You may only use such derivatives and modifications for your own noncommercial internal research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in violation of this Agreement. 20 | 21 | BACKUPS: If Licensee is an organization, it may make that number of copies of the Software necessary for internal noncommercial use at a single site within its organization provided that all information appearing in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies. 22 | 23 | USES NOT PERMITTED: You may not distribute, copy or use the Software except as explicitly permitted herein. 24 | 25 | You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third parties access to prior or present versions (or any parts thereof) of the Software. 26 | 27 | ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. Any attempted assignment without such consent shall be null and void. 28 | 29 | TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by downloading the Software or by using the Software until terminated as provided below. 30 | 31 | The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement. Licensee may terminate this Agreement by ceasing using the Software. Upon any termination of this Agreement, Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement. 32 | 33 | FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to Licensor for Licensee's use of the Software in accordance with this Agreement. 34 | 35 | DISCLAIMER OF WARRANTIES: THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT. LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS. 36 | 37 | SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement. 38 | 39 | EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage. 40 | 41 | EXPORT REGULATION: Licensee agrees to comply with any and all applicable 42 | U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control. 43 | 44 | SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby. 45 | 46 | NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be construed as a waiver of any future or other exercise of such right or remedy by Licensor. 47 | 48 | GOVERNING LAW: This Agreement shall be construed and enforced in accordance with the laws of the Commonwealth of Pennsylvania without reference to conflict of laws principles. You consent to the personal jurisdiction of the courts of this County and waive their rights to venue outside of Allegheny County, Pennsylvania. 49 | 50 | ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, and arrangements between the parties relating hereto. 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### About this fork 2 | 3 | This fork contains **pure python version** of [Realtime Multi-Person Pose Estimation](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation). Initially it was forked from [Michal Faber fork](https://github.com/michalfaber/keras_Realtime_Multi-Person_Pose_Estimation), all credit for porting original work to Keras goes to him. 4 | 5 | I this fork I've reimplemented images argumentation in pure python, it is significanly shorter(**285** lines vs **1202** lines in Michal Faber's C++ **rmpe_server**, and way less than in original work) 6 | 7 | Despite of Python language this code is **significantly faster** than original implementation(140 images/s vs 30 images/s C++ code on my machine). This is not really useful since most of people don't have 5 GPUs, but just to prove the point python programs could be fast. The magic is in combining all affine transformations to one matrix, and calling single **warpAffine**, and vectorized numpy computation of PAFs and Heatmaps. 8 | 9 | 10 | Could be run as iterator inside **train_pose.py** (default), or as separate **./rmpe_server.py** 11 | 12 | #### Current status 13 | - [x] image augmentation: rotate, shift, scale, crop, flip (implemented as single affine transform, i.e. much faster) 14 | - [x] mask calculation: rotate, shift, scale, crop, flip 15 | - [x] joint heatmaps 16 | - [x] limbs part affinity fields 17 | - [x] quality is same as original work and bit better than Michal's version. 18 | 19 | #### Current work 20 | - [ ] Ability to easily modify config and train different models. See addins submodule for head detector example and example how to add new datasets(MPII, Brainwash) 21 | 22 | 23 | # Realtime Multi-Person Pose Estimation 24 | This is a keras version of project 25 | 26 | ## Introduction 27 | Code repo for reproducing [2017 CVPR](https://arxiv.org/abs/1611.08050) paper using keras. 28 | 29 | ## Results 30 | 31 |

32 | 33 |

34 | 35 |
36 | 37 |   38 | 39 |
40 | 41 | 42 | ## Contents 43 | 1. [Converting caffe model](#converting-caffe-model-to-keras-model) 44 | 2. [Testing](#testing-steps) 45 | 3. [Training](#training-steps) 46 | 47 | ## Require 48 | 1. [Keras](https://keras.io/) 49 | 2. [Caffe - docker](https://hub.docker.com/r/bvlc/caffe/) required if you would like to convert caffe model to keras model. You 50 | don't have to compile/install caffe on your local machine. 51 | 52 | ## Converting Caffe model to Keras model 53 | Authors of [original implementation](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation) released already trained caffe model 54 | which you can use to extract weights data. 55 | 56 | - Download caffe model `cd model; sh get_caffe_model.sh` 57 | - Dump caffe layers to numpy data `cd ..; docker run -v [absolute path to your keras_Realtime_Multi-Person_Pose_Estimation folder]:/workspace -it bvlc/caffe:cpu python dump_caffe_layers.py` 58 | Note that docker accepts only absolute paths so you have to set the full path to the folder containing this project. 59 | - Convert caffe model (from numpy data) to keras model `python caffe_to_keras.py` 60 | 61 | ## Testing steps 62 | - Convert caffe model to keras model or download already converted keras model https://www.dropbox.com/s/llpxd14is7gyj0z/model.h5 63 | - Run the notebook `demo.ipynb`. 64 | - `python demo_image.py --image sample_images/ski.jpg` to run the picture demo. Result will be stored in the file result.png. You can use 65 | any image file as an input. 66 | - `python demo_camera.py` to run the web demo. 67 | 68 | ## Training steps 69 | 70 | **UPDATE 26/10/2017** 71 | 72 | **Fixed problem with the training procedure. 73 | Here are my results after training for 5 epochs = 25000 iterations (1 epoch is ~5000 batches) 74 | The loss values are quite similar as in the original training - [output.txt](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation/blob/master/training/example_loss/output.txt)** 75 | 76 |

77 | 78 |

79 | 80 | **Results of running `demo_image --image sample_images/ski.jpg --model training/weights.best.h5` with model trained only 25000 iterations. Not too bad !!! Training on my single 1070 GPU took around 10 hours.** 81 | 82 |

83 | 84 |

85 | 86 | **UPDATE 22/10/2017:** 87 | 88 | **Augmented samples are fetched from the [server](https://github.com/michalfaber/rmpe_dataset_server). The network never sees the same image twice 89 | which was a problem in previous approach (tool rmpe_dataset_transformer) 90 | This allows you to run augmentation locally or on separate node. 91 | You can start 2 instances, one serving training set and a second one serving validation set (on different port if locally)** 92 | 93 | - Install gsutil `curl https://sdk.cloud.google.com | bash`. This is a really helpful tool for downloading large datasets. 94 | - Download the data set (~25 GB) `cd dataset; sh get_dataset.sh`, 95 | - Download [COCO official toolbox](https://github.com/pdollar/coco) in `dataset/coco/` . 96 | - `cd coco/PythonAPI; sudo python setup.py install` to install pycocotools. 97 | - Go to the "training" folder `cd ../../../training`. 98 | - Generate masks `python generate_masks.py`. Note: set the parameter "mode" in generate_masks.py (validation or training) 99 | - Create intermediate dataset `python generate_hdf5.py`. This tool creates a dataset in hdf5 format. The structure of this dataset is very similar to the 100 | original lmdb dataset where a sample is represented as an array: 5 x width x height (3 channels for image, 1 channel for metedata, 1 channel for miss masks) 101 | For MPI dataset there are 6 channels with additional all masks. 102 | Note: set the parameters `datasets` and `val_size` in `generate_hdf5.py` 103 | - Download and compile the dataset server [rmpe_dataset_server](https://github.com/michalfaber/rmpe_dataset_server). 104 | This server generates augmented samples on the fly. Source samples are retrieved from previously generated hdf5 dataset file. 105 | - Start training data server in the first terminal session. 106 | `./rmpe_dataset_server ../../keras_Realtime_Multi-Person_Pose_Estimation/dataset/train_dataset.h5 5555` 107 | - Start validation data server in a second terminal session. 108 | `./rmpe_dataset_server ../../keras_Realtime_Multi-Person_Pose_Estimation/dataset/val_dataset.h5 5556` 109 | - Optionally you can verify the datasets `inspect_dataset.ipynb` 110 | - Set the correct number of samples within `python train_pose.py` - variables "train_samples = ???" and "val_samples = ???". 111 | This number is used by keras to determine how many samples are in 1 epoch. 112 | - Train the model in a third terminal `python train_pose.py` 113 | 114 | ## Related repository 115 | - CVPR'16, [Convolutional Pose Machines](https://github.com/shihenw/convolutional-pose-machines-release). 116 | - CVPR'17, [Realtime Multi-Person Pose Estimation](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation). 117 | 118 | ## Citation 119 | Please cite the paper in your publications if it helps your research: 120 | 121 | @InProceedings{cao2017realtime, 122 | title = {Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields}, 123 | author = {Zhe Cao and Tomas Simon and Shih-En Wei and Yaser Sheikh}, 124 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 125 | year = {2017} 126 | } 127 | 128 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | 5 | 6 | Configs = {} 7 | 8 | class CanonicalConfig: 9 | 10 | def __init__(self): 11 | 12 | self.width = 368 13 | self.height = 368 14 | 15 | self.stride = 8 16 | 17 | self.parts = ["nose", "neck", "Rsho", "Relb", "Rwri", "Lsho", "Lelb", "Lwri", "Rhip", "Rkne", "Rank", "Lhip", "Lkne", "Lank", "Reye", "Leye", "Rear", "Lear"] 18 | self.num_parts = len(self.parts) 19 | self.parts_dict = dict(zip(self.parts, range(self.num_parts))) 20 | self.parts += ["background"] 21 | self.num_parts_with_background = len(self.parts) 22 | 23 | leftParts, rightParts = CanonicalConfig.ltr_parts(self.parts_dict) 24 | self.leftParts = leftParts 25 | self.rightParts = rightParts 26 | 27 | 28 | # this numbers probably copied from matlab they are 1.. based not 0.. based 29 | self.limb_from = ['neck', 'Rhip', 'Rkne', 'neck', 'Lhip', 'Lkne', 'neck', 'Rsho', 'Relb', 'Rsho', 'neck', 'Lsho', 'Lelb', 'Lsho', 30 | 'neck', 'nose', 'nose', 'Reye', 'Leye'] 31 | self.limb_to = ['Rhip', 'Rkne', 'Rank', 'Lhip', 'Lkne', 'Lank', 'Rsho', 'Relb', 'Rwri', 'Rear', 'Lsho', 'Lelb', 'Lwri', 'Lear', 32 | 'nose', 'Reye', 'Leye', 'Rear', 'Lear'] 33 | 34 | self.limb_from = [ self.parts_dict[n] for n in self.limb_from ] 35 | self.limb_to = [ self.parts_dict[n] for n in self.limb_to ] 36 | 37 | assert self.limb_from == [x-1 for x in [2, 9, 10, 2, 12, 13, 2, 3, 4, 3, 2, 6, 7, 6, 2, 1, 1, 15, 16]] 38 | assert self.limb_to == [x-1 for x in [9, 10, 11, 12, 13, 14, 3, 4, 5, 17, 6, 7, 8, 18, 1, 15, 16, 17, 18]] 39 | 40 | self.limbs_conn = list(zip(self.limb_from, self.limb_to)) 41 | 42 | self.paf_layers = 2*len(self.limbs_conn) 43 | self.heat_layers = self.num_parts 44 | self.num_layers = self.paf_layers + self.heat_layers + 1 45 | 46 | self.paf_start = 0 47 | self.heat_start = self.paf_layers 48 | self.bkg_start = self.paf_layers + self.heat_layers 49 | 50 | #self.data_shape = (self.height, self.width, 3) # 368, 368, 3 51 | self.mask_shape = (self.height//self.stride, self.width//self.stride) # 46, 46 52 | self.parts_shape = (self.height//self.stride, self.width//self.stride, self.num_layers) # 46, 46, 57 53 | 54 | class TransformationParams: 55 | 56 | def __init__(self): 57 | self.target_dist = 0.6; 58 | self.scale_prob = 1; # TODO: this is actually scale unprobability, i.e. 1 = off, 0 = always, not sure if it is a bug or not 59 | self.scale_min = 0.5; 60 | self.scale_max = 1.1; 61 | self.max_rotate_degree = 40. 62 | self.center_perterb_max = 40. 63 | self.flip_prob = 0.5 64 | self.sigma = 7. 65 | self.paf_thre = 8. # it is original 1.0 * stride in this program 66 | 67 | self.transform_params = TransformationParams() 68 | 69 | @staticmethod 70 | def ltr_parts(parts_dict): 71 | # when we flip image left parts became right parts and vice versa. This is the list of parts to exchange each other. 72 | leftParts = [ parts_dict[p] for p in ["Lsho", "Lelb", "Lwri", "Lhip", "Lkne", "Lank", "Leye", "Lear"] ] 73 | rightParts = [ parts_dict[p] for p in ["Rsho", "Relb", "Rwri", "Rhip", "Rkne", "Rank", "Reye", "Rear"] ] 74 | return leftParts,rightParts 75 | 76 | 77 | 78 | class COCOSourceConfig: 79 | 80 | 81 | def __init__(self, hdf5_source): 82 | 83 | self.hdf5_source = hdf5_source 84 | self.parts = ['nose', 'Leye', 'Reye', 'Lear', 'Rear', 'Lsho', 'Rsho', 'Lelb', 85 | 'Relb', 'Lwri', 'Rwri', 'Lhip', 'Rhip', 'Lkne', 'Rkne', 'Lank', 86 | 'Rank'] 87 | 88 | self.num_parts = len(self.parts) 89 | 90 | # for COCO neck is calculated like mean of 2 shoulders. 91 | self.parts_dict = dict(zip(self.parts, range(self.num_parts))) 92 | 93 | def convert(self, meta, global_config): 94 | 95 | joints = np.array(meta['joints']) 96 | 97 | assert joints.shape[1] == len(self.parts) 98 | 99 | result = np.zeros((joints.shape[0], global_config.num_parts, 3), dtype=np.float) 100 | result[:,:,2]=3. # OURS - # 3 never marked up in this dataset, 2 - not marked up in this person, 1 - marked and visible, 0 - marked but invisible 101 | 102 | for p in self.parts: 103 | coco_id = self.parts_dict[p] 104 | 105 | if p in global_config.parts_dict: 106 | global_id = global_config.parts_dict[p] 107 | assert global_id!=1, "neck shouldn't be known yet" 108 | result[:,global_id,:]=joints[:,coco_id,:] 109 | 110 | if 'neck' in global_config.parts_dict: 111 | neckG = global_config.parts_dict['neck'] 112 | RshoC = self.parts_dict['Rsho'] 113 | LshoC = self.parts_dict['Lsho'] 114 | 115 | # no neck in coco database, we calculate it as average of shoulders 116 | # TODO: we use 0 - hidden, 1 visible, 2 absent - it is not coco values they processed by generate_hdf5 117 | both_shoulders_known = (joints[:, LshoC, 2]<2) & (joints[:, RshoC, 2] < 2) 118 | 119 | result[~both_shoulders_known, neckG, 2] = 2. # otherwise they will be 3. aka 'never marked in this dataset' 120 | 121 | result[both_shoulders_known, neckG, 0:2] = (joints[both_shoulders_known, RshoC, 0:2] + 122 | joints[both_shoulders_known, LshoC, 0:2]) / 2 123 | result[both_shoulders_known, neckG, 2] = np.minimum(joints[both_shoulders_known, RshoC, 2], 124 | joints[both_shoulders_known, LshoC, 2]) 125 | 126 | meta['joints'] = result 127 | 128 | return meta 129 | 130 | def convert_mask(self, mask, global_config, joints = None): 131 | 132 | mask = np.repeat(mask[:,:,np.newaxis], global_config.num_layers, axis=2) 133 | return mask 134 | 135 | def source(self): 136 | 137 | return self.hdf5_source 138 | 139 | 140 | 141 | # more information on keypoints mapping is here 142 | # https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation/issues/7 143 | 144 | 145 | Configs["Canonical"] = CanonicalConfig 146 | 147 | 148 | def GetConfig(config_name): 149 | 150 | config = Configs[config_name]() 151 | 152 | dct = config.parts[:] 153 | dct = [None]*(config.num_layers-len(dct)) + dct 154 | 155 | for (i,(fr,to)) in enumerate(config.limbs_conn): 156 | name = "%s->%s" % (config.parts[fr], config.parts[to]) 157 | print(i, name) 158 | x = i*2 159 | y = i*2+1 160 | 161 | assert dct[x] is None 162 | dct[x] = name + ":x" 163 | assert dct[y] is None 164 | dct[y] = name + ":y" 165 | 166 | from pprint import pprint 167 | pprint(dict(zip(range(len(dct)), dct))) 168 | 169 | return config 170 | 171 | if __name__ == "__main__": 172 | 173 | # test it 174 | foo = GetConfig("Canonical") 175 | print(foo.paf_layers, foo.heat_layers) 176 | 177 | 178 | -------------------------------------------------------------------------------- /dataset/coco/INFO: -------------------------------------------------------------------------------- 1 | Folder for https://github.com/pdollar/coco files -------------------------------------------------------------------------------- /dataset/get_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Install gsutil which provides tools for efficiently accessing datasets 4 | # without unzipping large files. 5 | # Install gsutil via:curl https://sdk.cloud.google.com | bash 6 | 7 | mkdir train2017 8 | mkdir val2017 9 | mkdir test2017 10 | mkdir annotations 11 | 12 | echo "Downloading train2017..." 13 | gsutil -m rsync gs://images.cocodataset.org/train2017 train2017 14 | 15 | echo "Downloading val2017..." 16 | gsutil -m rsync gs://images.cocodataset.org/val2017 val2017 17 | 18 | echo "Downloading test2017..." 19 | gsutil -m rsync gs://images.cocodataset.org/test2017 test2017 20 | 21 | echo "Downloading annotations..." 22 | gsutil -m rsync gs://images.cocodataset.org/annotations annotations 23 | 24 | -------------------------------------------------------------------------------- /demo_camera.py: -------------------------------------------------------------------------------- 1 | # TODO -------------------------------------------------------------------------------- /demo_image.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import math 4 | import time 5 | import numpy as np 6 | import util 7 | from config_reader import config_reader 8 | from scipy.ndimage.filters import gaussian_filter 9 | from model import get_testing_model 10 | 11 | 12 | # find connection in the specified sequence, center 29 is in the position 15 13 | limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ 14 | [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ 15 | [1, 16], [16, 18], [3, 17], [6, 18]] 16 | 17 | # the middle joints heatmap correpondence 18 | mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \ 19 | [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \ 20 | [55, 56], [37, 38], [45, 46]] 21 | 22 | # visualize 23 | colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], 24 | [0, 255, 0], \ 25 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], 26 | [85, 0, 255], \ 27 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] 28 | 29 | 30 | def process (input_image, params, model_params): 31 | 32 | oriImg = cv2.imread(input_image) # B,G,R order 33 | multiplier = [x * model_params['boxsize'] / oriImg.shape[0] for x in params['scale_search']] 34 | 35 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19)) 36 | paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38)) 37 | 38 | for m in range(len(multiplier)): 39 | scale = multiplier[m] 40 | 41 | imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 42 | imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_params['stride'], 43 | model_params['padValue']) 44 | 45 | input_img = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,0,1,2)) # required shape (1, width, height, channels) 46 | 47 | output_blobs = model.predict(input_img) 48 | 49 | # extract outputs, resize, and remove padding 50 | heatmap = np.squeeze(output_blobs[1]) # output 1 is heatmaps 51 | heatmap = cv2.resize(heatmap, (0, 0), fx=model_params['stride'], fy=model_params['stride'], 52 | interpolation=cv2.INTER_CUBIC) 53 | heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], 54 | :] 55 | heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) 56 | 57 | paf = np.squeeze(output_blobs[0]) # output 0 is PAFs 58 | paf = cv2.resize(paf, (0, 0), fx=model_params['stride'], fy=model_params['stride'], 59 | interpolation=cv2.INTER_CUBIC) 60 | paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] 61 | paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) 62 | 63 | heatmap_avg = heatmap_avg + heatmap / len(multiplier) 64 | paf_avg = paf_avg + paf / len(multiplier) 65 | 66 | all_peaks = [] 67 | peak_counter = 0 68 | 69 | for part in range(18): 70 | map_ori = heatmap_avg[:, :, part] 71 | map = gaussian_filter(map_ori, sigma=3) 72 | 73 | map_left = np.zeros(map.shape) 74 | map_left[1:, :] = map[:-1, :] 75 | map_right = np.zeros(map.shape) 76 | map_right[:-1, :] = map[1:, :] 77 | map_up = np.zeros(map.shape) 78 | map_up[:, 1:] = map[:, :-1] 79 | map_down = np.zeros(map.shape) 80 | map_down[:, :-1] = map[:, 1:] 81 | 82 | peaks_binary = np.logical_and.reduce( 83 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > params['thre1'])) 84 | peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse 85 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 86 | id = range(peak_counter, peak_counter + len(peaks)) 87 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] 88 | 89 | all_peaks.append(peaks_with_score_and_id) 90 | peak_counter += len(peaks) 91 | 92 | connection_all = [] 93 | special_k = [] 94 | mid_num = 10 95 | 96 | for k in range(len(mapIdx)): 97 | score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]] 98 | candA = all_peaks[limbSeq[k][0] - 1] 99 | candB = all_peaks[limbSeq[k][1] - 1] 100 | nA = len(candA) 101 | nB = len(candB) 102 | indexA, indexB = limbSeq[k] 103 | if (nA != 0 and nB != 0): 104 | connection_candidate = [] 105 | for i in range(nA): 106 | for j in range(nB): 107 | vec = np.subtract(candB[j][:2], candA[i][:2]) 108 | norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) 109 | # failure case when 2 body parts overlaps 110 | if norm == 0: 111 | continue 112 | vec = np.divide(vec, norm) 113 | 114 | startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ 115 | np.linspace(candA[i][1], candB[j][1], num=mid_num))) 116 | 117 | vec_x = np.array( 118 | [score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ 119 | for I in range(len(startend))]) 120 | vec_y = np.array( 121 | [score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ 122 | for I in range(len(startend))]) 123 | 124 | score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) 125 | score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min( 126 | 0.5 * oriImg.shape[0] / norm - 1, 0) 127 | criterion1 = len(np.nonzero(score_midpts > params['thre2'])[0]) > 0.8 * len( 128 | score_midpts) 129 | criterion2 = score_with_dist_prior > 0 130 | if criterion1 and criterion2: 131 | connection_candidate.append([i, j, score_with_dist_prior, 132 | score_with_dist_prior + candA[i][2] + candB[j][2]]) 133 | 134 | connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) 135 | connection = np.zeros((0, 5)) 136 | for c in range(len(connection_candidate)): 137 | i, j, s = connection_candidate[c][0:3] 138 | if (i not in connection[:, 3] and j not in connection[:, 4]): 139 | connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) 140 | if (len(connection) >= min(nA, nB)): 141 | break 142 | 143 | connection_all.append(connection) 144 | else: 145 | special_k.append(k) 146 | connection_all.append([]) 147 | 148 | # last number in each row is the total parts number of that person 149 | # the second last number in each row is the score of the overall configuration 150 | subset = -1 * np.ones((0, 20)) 151 | candidate = np.array([item for sublist in all_peaks for item in sublist]) 152 | 153 | for k in range(len(mapIdx)): 154 | if k not in special_k: 155 | partAs = connection_all[k][:, 0] 156 | partBs = connection_all[k][:, 1] 157 | indexA, indexB = np.array(limbSeq[k]) - 1 158 | 159 | for i in range(len(connection_all[k])): # = 1:size(temp,1) 160 | found = 0 161 | subset_idx = [-1, -1] 162 | for j in range(len(subset)): # 1:size(subset,1): 163 | if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: 164 | subset_idx[found] = j 165 | found += 1 166 | 167 | if found == 1: 168 | j = subset_idx[0] 169 | if (subset[j][indexB] != partBs[i]): 170 | subset[j][indexB] = partBs[i] 171 | subset[j][-1] += 1 172 | subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 173 | elif found == 2: # if found 2 and disjoint, merge them 174 | j1, j2 = subset_idx 175 | membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] 176 | if len(np.nonzero(membership == 2)[0]) == 0: # merge 177 | subset[j1][:-2] += (subset[j2][:-2] + 1) 178 | subset[j1][-2:] += subset[j2][-2:] 179 | subset[j1][-2] += connection_all[k][i][2] 180 | subset = np.delete(subset, j2, 0) 181 | else: # as like found == 1 182 | subset[j1][indexB] = partBs[i] 183 | subset[j1][-1] += 1 184 | subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 185 | 186 | # if find no partA in the subset, create a new subset 187 | elif not found and k < 17: 188 | row = -1 * np.ones(20) 189 | row[indexA] = partAs[i] 190 | row[indexB] = partBs[i] 191 | row[-1] = 2 192 | row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + \ 193 | connection_all[k][i][2] 194 | subset = np.vstack([subset, row]) 195 | 196 | # delete some rows of subset which has few parts occur 197 | deleteIdx = []; 198 | for i in range(len(subset)): 199 | if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: 200 | deleteIdx.append(i) 201 | subset = np.delete(subset, deleteIdx, axis=0) 202 | 203 | canvas = cv2.imread(input_image) # B,G,R order 204 | for i in range(18): 205 | for j in range(len(all_peaks[i])): 206 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) 207 | 208 | stickwidth = 4 209 | 210 | for i in range(17): 211 | for n in range(len(subset)): 212 | index = subset[n][np.array(limbSeq[i]) - 1] 213 | if -1 in index: 214 | continue 215 | cur_canvas = canvas.copy() 216 | Y = candidate[index.astype(int), 0] 217 | X = candidate[index.astype(int), 1] 218 | mX = np.mean(X) 219 | mY = np.mean(Y) 220 | length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 221 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) 222 | polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 223 | 360, 1) 224 | cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) 225 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) 226 | 227 | return canvas 228 | 229 | if __name__ == '__main__': 230 | parser = argparse.ArgumentParser() 231 | parser.add_argument('--image', type=str, required=True, help='input image') 232 | parser.add_argument('--output', type=str, default='result.png', help='output image') 233 | parser.add_argument('--model', type=str, default='model/keras/model.h5', help='path to the weights file') 234 | 235 | args = parser.parse_args() 236 | input_image = args.image 237 | output = args.output 238 | keras_weights_file = args.model 239 | 240 | tic = time.time() 241 | print('start processing...') 242 | 243 | # load model 244 | 245 | # authors of original model don't use 246 | # vgg normalization (subtracting mean) on input images 247 | model = get_testing_model() 248 | model.load_weights(keras_weights_file) 249 | 250 | # load config 251 | params, model_params = config_reader() 252 | 253 | # generate image with body parts 254 | canvas = process(input_image, params, model_params) 255 | 256 | toc = time.time() 257 | print ('processing time is %.5f' % (toc - tic)) 258 | 259 | cv2.imwrite(output, canvas) 260 | 261 | cv2.destroyAllWindows() 262 | 263 | 264 | 265 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers.merge import Concatenate 3 | from keras.layers import Activation, Input, Lambda 4 | from keras.layers.convolutional import Conv2D 5 | from keras.layers.pooling import MaxPooling2D 6 | from keras.layers.merge import Multiply 7 | from keras.regularizers import l2 8 | from keras.initializers import random_normal, constant 9 | 10 | import re 11 | 12 | 13 | #stages = 6 14 | #np_branch1 = 38 15 | #np_branch2 = 19 16 | 17 | def relu(x): return Activation('relu')(x) 18 | 19 | def conv(x, nf, ks, name, weight_decay): 20 | kernel_reg = l2(weight_decay[0]) if weight_decay else None 21 | bias_reg = l2(weight_decay[1]) if weight_decay else None 22 | 23 | x = Conv2D(nf, (ks, ks), padding='same', name=name, 24 | kernel_regularizer=kernel_reg, 25 | bias_regularizer=bias_reg, 26 | kernel_initializer=random_normal(stddev=0.01), 27 | bias_initializer=constant(0.0))(x) 28 | return x 29 | 30 | def pooling(x, ks, st, name): 31 | x = MaxPooling2D((ks, ks), strides=(st, st), name=name)(x) 32 | return x 33 | 34 | def vgg_block(x, weight_decay): 35 | # Block 1 36 | x = conv(x, 64, 3, "conv1_1", (weight_decay, 0)) 37 | x = relu(x) 38 | x = conv(x, 64, 3, "conv1_2", (weight_decay, 0)) 39 | x = relu(x) 40 | x = pooling(x, 2, 2, "pool1_1") 41 | 42 | # Block 2 43 | x = conv(x, 128, 3, "conv2_1", (weight_decay, 0)) 44 | x = relu(x) 45 | x = conv(x, 128, 3, "conv2_2", (weight_decay, 0)) 46 | x = relu(x) 47 | x = pooling(x, 2, 2, "pool2_1") 48 | 49 | # Block 3 50 | x = conv(x, 256, 3, "conv3_1", (weight_decay, 0)) 51 | x = relu(x) 52 | x = conv(x, 256, 3, "conv3_2", (weight_decay, 0)) 53 | x = relu(x) 54 | x = conv(x, 256, 3, "conv3_3", (weight_decay, 0)) 55 | x = relu(x) 56 | x = conv(x, 256, 3, "conv3_4", (weight_decay, 0)) 57 | x = relu(x) 58 | x = pooling(x, 2, 2, "pool3_1") 59 | 60 | # Block 4 61 | x = conv(x, 512, 3, "conv4_1", (weight_decay, 0)) 62 | x = relu(x) 63 | x = conv(x, 512, 3, "conv4_2", (weight_decay, 0)) 64 | x = relu(x) 65 | 66 | # Additional non vgg layers 67 | x = conv(x, 256, 3, "conv4_3_CPM", (weight_decay, 0)) 68 | x = relu(x) 69 | x = conv(x, 128, 3, "conv4_4_CPM", (weight_decay, 0)) 70 | x = relu(x) 71 | 72 | return x 73 | 74 | 75 | def stage1_block(x, num_p, branch, weight_decay): 76 | # Block 1 77 | x = conv(x, 128, 3, "Mconv1_stage1_L%d" % branch, (weight_decay, 0)) 78 | x = relu(x) 79 | x = conv(x, 128, 3, "Mconv2_stage1_L%d" % branch, (weight_decay, 0)) 80 | x = relu(x) 81 | x = conv(x, 128, 3, "Mconv3_stage1_L%d" % branch, (weight_decay, 0)) 82 | x = relu(x) 83 | x = conv(x, 512, 1, "Mconv4_stage1_L%d" % branch, (weight_decay, 0)) 84 | x = relu(x) 85 | x = conv(x, num_p, 1, "Mconv5_stage1_L%d" % branch, (weight_decay, 0)) 86 | 87 | return x 88 | 89 | 90 | def stageT_block(x, num_p, stage, branch, weight_decay): 91 | # Block 1 92 | x = conv(x, 128, 7, "Mconv1_stage%d_L%d" % (stage, branch), (weight_decay, 0)) 93 | x = relu(x) 94 | x = conv(x, 128, 7, "Mconv2_stage%d_L%d" % (stage, branch), (weight_decay, 0)) 95 | x = relu(x) 96 | x = conv(x, 128, 7, "Mconv3_stage%d_L%d" % (stage, branch), (weight_decay, 0)) 97 | x = relu(x) 98 | x = conv(x, 128, 7, "Mconv4_stage%d_L%d" % (stage, branch), (weight_decay, 0)) 99 | x = relu(x) 100 | x = conv(x, 128, 7, "Mconv5_stage%d_L%d" % (stage, branch), (weight_decay, 0)) 101 | x = relu(x) 102 | x = conv(x, 128, 1, "Mconv6_stage%d_L%d" % (stage, branch), (weight_decay, 0)) 103 | x = relu(x) 104 | x = conv(x, num_p, 1, "Mconv7_stage%d_L%d" % (stage, branch), (weight_decay, 0)) 105 | 106 | return x 107 | 108 | 109 | def apply_mask(x, mask1, mask2, num_p, stage, branch, np_branch1, np_branch2): 110 | w_name = "weight_stage%d_L%d" % (stage, branch) 111 | 112 | # TODO: we have branch number here why we made so strange check 113 | assert np_branch1 != np_branch2 # we selecting branches by number of pafs, if they accidentally became the same it will be disaster 114 | 115 | if num_p == np_branch1: 116 | w = Multiply(name=w_name)([x, mask1]) # vec_weight 117 | elif num_p == np_branch2: 118 | w = Multiply(name=w_name)([x, mask2]) # vec_heat 119 | else: 120 | assert False, "wrong number of layers num_p=%d " % num_p 121 | return w 122 | 123 | 124 | def get_training_model(weight_decay, np_branch1, np_branch2, stages = 6, gpus = None): 125 | 126 | img_input_shape = (None, None, 3) 127 | vec_input_shape = (None, None, np_branch1) 128 | heat_input_shape = (None, None, np_branch2) 129 | 130 | inputs = [] 131 | outputs = [] 132 | 133 | img_input = Input(shape=img_input_shape) 134 | vec_weight_input = Input(shape=vec_input_shape) 135 | heat_weight_input = Input(shape=heat_input_shape) 136 | 137 | inputs.append(img_input) 138 | if np_branch1 > 0: 139 | inputs.append(vec_weight_input) 140 | 141 | if np_branch2 > 0: 142 | inputs.append(heat_weight_input) 143 | 144 | #img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5] 145 | img_normalized = img_input # will be done on augmentation stage 146 | 147 | # VGG 148 | stage0_out = vgg_block(img_normalized, weight_decay) 149 | 150 | # stage 1 - branch 1 (PAF) 151 | new_x = [] 152 | if np_branch1 > 0: 153 | stage1_branch1_out = stage1_block(stage0_out, np_branch1, 1, weight_decay) 154 | w1 = apply_mask(stage1_branch1_out, vec_weight_input, heat_weight_input, np_branch1, 1, 1, np_branch1, np_branch2) 155 | outputs.append(w1) 156 | new_x.append(stage1_branch1_out) 157 | 158 | # stage 1 - branch 2 (confidence maps) 159 | 160 | if np_branch2 > 0: 161 | stage1_branch2_out = stage1_block(stage0_out, np_branch2, 2, weight_decay) 162 | w2 = apply_mask(stage1_branch2_out, vec_weight_input, heat_weight_input, np_branch2, 1, 2, np_branch1, np_branch2) 163 | outputs.append(w2) 164 | new_x.append(stage1_branch2_out) 165 | 166 | new_x.append(stage0_out) 167 | 168 | x = Concatenate()(new_x) 169 | 170 | # stage sn >= 2 171 | for sn in range(2, stages + 1): 172 | 173 | new_x = [] 174 | # stage SN - branch 1 (PAF) 175 | if np_branch1 > 0: 176 | stageT_branch1_out = stageT_block(x, np_branch1, sn, 1, weight_decay) 177 | w1 = apply_mask(stageT_branch1_out, vec_weight_input, heat_weight_input, np_branch1, sn, 1, np_branch1, np_branch2) 178 | outputs.append(w1) 179 | new_x.append(stageT_branch1_out) 180 | 181 | # stage SN - branch 2 (confidence maps) 182 | if np_branch2 > 0: 183 | stageT_branch2_out = stageT_block(x, np_branch2, sn, 2, weight_decay) 184 | w2 = apply_mask(stageT_branch2_out, vec_weight_input, heat_weight_input, np_branch2, sn, 2, np_branch1, np_branch2) 185 | outputs.append(w2) 186 | new_x.append(stageT_branch2_out) 187 | 188 | new_x.append(stage0_out) 189 | 190 | if sn < stages: 191 | x = Concatenate()(new_x) 192 | 193 | model = Model(inputs=inputs, outputs=outputs) 194 | return model 195 | 196 | def get_lrmult(model): 197 | 198 | # setup lr multipliers for conv layers 199 | lr_mult = dict() 200 | 201 | for layer in model.layers: 202 | 203 | if isinstance(layer, Conv2D): 204 | 205 | # stage = 1 206 | if re.match("Mconv\d_stage1.*", layer.name): 207 | kernel_name = layer.weights[0].name 208 | bias_name = layer.weights[1].name 209 | lr_mult[kernel_name] = 1 210 | lr_mult[bias_name] = 2 211 | 212 | # stage > 1 213 | elif re.match("Mconv\d_stage.*", layer.name): 214 | kernel_name = layer.weights[0].name 215 | bias_name = layer.weights[1].name 216 | lr_mult[kernel_name] = 4 217 | lr_mult[bias_name] = 8 218 | 219 | # vgg 220 | else: 221 | print("matched as vgg layer", layer.name) 222 | kernel_name = layer.weights[0].name 223 | bias_name = layer.weights[1].name 224 | lr_mult[kernel_name] = 1 225 | lr_mult[bias_name] = 2 226 | 227 | return lr_mult 228 | 229 | 230 | def get_testing_model(np_branch1, np_branch2, stages = 6): 231 | 232 | img_input_shape = (None, None, 3) 233 | 234 | img_input = Input(shape=img_input_shape) 235 | 236 | img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5] 237 | 238 | # VGG 239 | stage0_out = vgg_block(img_normalized, None) 240 | 241 | stages_out = [] 242 | 243 | # stage 1 - branch 1 (PAF) 244 | if np_branch1 > 0: 245 | stage1_branch1_out = stage1_block(stage0_out, np_branch1, 1, None) 246 | stages_out.append(stage1_branch1_out) 247 | 248 | # stage 1 - branch 2 (confidence maps) 249 | if np_branch2 > 0: 250 | stage1_branch2_out = stage1_block(stage0_out, np_branch2, 2, None) 251 | stages_out.append(stage1_branch2_out) 252 | 253 | x = Concatenate()(stages_out + [stage0_out]) 254 | 255 | # stage t >= 2 256 | stageT_branch1_out = None 257 | stageT_branch2_out = None 258 | for sn in range(2, stages + 1): 259 | 260 | stages_out = [] 261 | 262 | if np_branch1 > 0: 263 | stageT_branch1_out = stageT_block(x, np_branch1, sn, 1, None) 264 | stages_out.append(stageT_branch1_out) 265 | if np_branch2 > 0: 266 | stageT_branch2_out = stageT_block(x, np_branch2, sn, 2, None) 267 | stages_out.append(stageT_branch2_out) 268 | 269 | if sn < stages: 270 | x = Concatenate()(stages_out + [stage0_out]) 271 | 272 | model = Model(inputs=[img_input], outputs=[stageT_branch1_out, stageT_branch2_out]) 273 | 274 | return model -------------------------------------------------------------------------------- /model/caffe/layers/INFO: -------------------------------------------------------------------------------- 1 | Folder for layers extracted from the caffe model by the tool dump_caffe_layers.py -------------------------------------------------------------------------------- /model/caffe_to_keras.py: -------------------------------------------------------------------------------- 1 | from model import get_testing_model 2 | import numpy as np 3 | import os 4 | 5 | CAFFE_LAYERS_DIR = "model/caffe/layers" 6 | KERAS_MODEL_FILE = "model/keras/model.h5" 7 | 8 | m = get_testing_model() 9 | 10 | for layer in m.layers: 11 | layer_name = layer.name 12 | if (os.path.exists(os.path.join(CAFFE_LAYERS_DIR, "W_%s.npy" % layer_name))): 13 | w = np.load(os.path.join(CAFFE_LAYERS_DIR, "W_%s.npy" % layer_name)) 14 | b = np.load(os.path.join(CAFFE_LAYERS_DIR, "b_%s.npy" % layer_name)) 15 | 16 | w = np.transpose(w, (2, 3, 1, 0)) 17 | 18 | layer_weights = [w, b] 19 | layer.set_weights(layer_weights) 20 | 21 | m.save_weights(KERAS_MODEL_FILE) 22 | 23 | print("Done !") -------------------------------------------------------------------------------- /model/dump_caffe_layers.py: -------------------------------------------------------------------------------- 1 | # 2 | # Run this file from docker: 3 | # 4 | # docker run -v [absolute path to your keras_Realtime_Multi-Person_Pose_Estimation folder]:/workspace -it bvlc/caffe:cpu python dump_caffe_layers.py 5 | # 6 | 7 | from __future__ import division, print_function 8 | import caffe 9 | import numpy as np 10 | import os 11 | 12 | layers_output = 'model/caffe/layers' 13 | caffe_model = 'model/caffe/_trained_COCO/pose_iter_440000.caffemodel' 14 | caffe_proto = 'model/caffe/_trained_COCO/pose_deploy.prototxt' 15 | 16 | caffe.set_mode_cpu() 17 | net = caffe.Net(caffe_proto, caffe_model, caffe.TEST) 18 | 19 | # layer names and output shapes 20 | for layer_name, blob in net.blobs.iteritems(): 21 | print(layer_name, blob.data.shape) 22 | 23 | # write out weight matrices and bias vectors 24 | for k, v in net.params.items(): 25 | print(k, v[0].data.shape, v[1].data.shape) 26 | np.save(os.path.join(layers_output, "W_{:s}.npy".format(k)), v[0].data) 27 | np.save(os.path.join(layers_output, "b_{:s}.npy".format(k)), v[1].data) 28 | 29 | print("Done !") 30 | -------------------------------------------------------------------------------- /model/get_caffe_model.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | wget -nc --directory-prefix=./caffe/_trained_COCO/ http://posefs1.perception.cs.cmu.edu/Users/ZheCao/pose_iter_440000.caffemodel 4 | wget -nc --directory-prefix=./caffe/_trained_MPI/ http://posefs1.perception.cs.cmu.edu/Users/ZheCao/pose_iter_146000.caffemodel 5 | -------------------------------------------------------------------------------- /model/get_keras_model.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | wget -nc --directory-prefix=./keras/ https://www.dropbox.com/s/llpxd14is7gyj0z/model.h5 -------------------------------------------------------------------------------- /py_rmpe_server/py_rmpe_data_iterator.py: -------------------------------------------------------------------------------- 1 | 2 | import h5py 3 | import random 4 | import json 5 | import numpy as np 6 | import cv2 7 | 8 | from py_rmpe_server.py_rmpe_transformer import Transformer, AugmentSelection 9 | from py_rmpe_server.py_rmpe_heatmapper import Heatmapper 10 | 11 | from time import time 12 | 13 | class RawDataIterator: 14 | 15 | def __init__(self, global_config, configs, shuffle = True, augment = True): 16 | 17 | self.global_config = global_config 18 | 19 | if not isinstance(configs, (list,tuple)): 20 | configs = [configs] 21 | 22 | self.h5files = [c.source() for c in configs] 23 | self.configs = configs 24 | self.h5s = [h5py.File(fname, "r") for fname in self.h5files] 25 | self.datums = [ h5['datum'] if 'datum' in h5 else (h5['dataset'], h5['images'], h5['masks'] if 'masks' in h5 else None) for h5 in self.h5s ] 26 | 27 | self.heatmapper = Heatmapper(global_config) 28 | self.transformer = Transformer(global_config) 29 | self.augment = augment 30 | self.shuffle = shuffle 31 | 32 | self.keys = [] 33 | 34 | for n,d in enumerate(self.datums): 35 | if isinstance(d, (list, tuple)): 36 | k = list(d[0].keys()) 37 | else: 38 | k = list(d.keys()) 39 | 40 | print(len(k)) 41 | 42 | self.keys += zip([n] * len(k), k) 43 | 44 | def gen(self, timing = False): 45 | 46 | if self.shuffle: 47 | random.shuffle(self.keys) 48 | 49 | for num, key in self.keys: 50 | 51 | read_start = time() 52 | image, mask, meta, debug = self.read_data(num, key) 53 | 54 | aug_start = time() 55 | 56 | # transform picture 57 | assert mask.dtype == np.uint8, mask.dtype 58 | image, mask, meta = self.transformer.transform(image, mask, meta, aug=None if self.augment else AugmentSelection.unrandom()) 59 | assert mask.dtype == np.float, mask.dtype 60 | 61 | # we need layered mask on next stage 62 | mask = self.configs[num].convert_mask(mask, self.global_config, joints = meta['joints']) 63 | 64 | # create heatmaps and pafs 65 | labels = self.heatmapper.create_heatmaps(meta['joints'], mask) 66 | 67 | # normalize image to save gpu/cpu time for keras 68 | image = image/256.0 - 0.5 69 | 70 | if timing: 71 | yield image, mask, labels, meta['joints'], time()-read_start, time()-aug_start 72 | else: 73 | yield image, mask, labels, meta['joints'] 74 | 75 | def num_keys(self): 76 | 77 | return len(self.keys) 78 | 79 | def read_data(self, num, key): 80 | 81 | config = self.configs[num] 82 | datum = self.datums[num] 83 | if isinstance(datum, (list, tuple)): 84 | dataset, images, masks = datum 85 | return self.read_data_new(dataset, images, masks, key, config) 86 | else: 87 | return self.read_data_old(datum, key, config) 88 | 89 | 90 | def read_data_old(self, datum, key, config): 91 | 92 | entry = datum[key] 93 | 94 | assert 'meta' in entry.attrs, "No 'meta' attribute in .h5 file. Did you generate .h5 with new code?" 95 | 96 | debug = json.loads(entry.attrs['meta']) 97 | meta = {} 98 | meta["objpos"]=debug["objpos"] 99 | meta["scale_provided"] = debug["scale_provided"] 100 | meta["joints"] = debug["joints"] 101 | 102 | meta = config.convert(meta, self.global_config) 103 | data = entry.value 104 | 105 | if data.shape[0] <= 6: 106 | # TODO: this is extra work, should write in store in correct format (not transposed) 107 | # can't do now because I want storage compatibility yet 108 | # we need image in classical not transposed format in this program for warp affine 109 | data = data.transpose([1,2,0]) 110 | 111 | img = data[:,:,0:3] 112 | mask_miss = data[:,:,4] 113 | #mask = data[:,:,5] 114 | 115 | return img, mask_miss, meta, debug 116 | 117 | def read_data_new(self, dataset, images, masks, key, config): 118 | 119 | entry = dataset[key] 120 | 121 | assert 'meta' in entry.attrs, "No 'meta' attribute in .h5 file. Did you generate .h5 with new code?" 122 | 123 | meta = json.loads(entry.value) 124 | debug = json.loads(entry.attrs['meta']) 125 | meta = config.convert(meta, self.global_config) 126 | 127 | img = images[meta['image']].value 128 | mask_miss = None 129 | 130 | if len(img.shape)==2 and img.shape[1]==1: 131 | img = cv2.imdecode(img, flags=-1) 132 | 133 | if img.shape[2]>3: 134 | mask_miss = img[:, :, 3] 135 | img = img[:, :, 0:3] 136 | 137 | if mask_miss is None: 138 | if masks is not None: 139 | mask_miss = masks[meta['image']].value 140 | if len(mask_miss.shape) == 2 and mask_miss.shape[1]==1: 141 | mask_miss = cv2.imdecode(mask_miss, flags = -1) 142 | 143 | if mask_miss is None: 144 | mask_miss = np.full((img.shape[0], img.shape[1]), fill_value=255, dtype=np.uint8) 145 | 146 | 147 | return img, mask_miss, meta, debug 148 | 149 | def __del__(self): 150 | 151 | if 'h5s' in vars(self): 152 | for h5 in self.h5s: 153 | h5.close() 154 | -------------------------------------------------------------------------------- /py_rmpe_server/py_rmpe_heatmapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | from math import sqrt, isnan 5 | 6 | class Heatmapper: 7 | 8 | def __init__(self, config): 9 | 10 | self.config = config 11 | sigma = config.transform_params.sigma 12 | thre = config.transform_params.paf_thre 13 | 14 | self.double_sigma2 = 2 * sigma * sigma 15 | self.thre = thre 16 | 17 | # cached common parameters which same for all iterations and all pictures 18 | 19 | stride = self.config.stride 20 | width = self.config.width//stride 21 | height = self.config.height//stride 22 | 23 | # this is coordinates of centers of bigger grid 24 | self.grid_x = np.arange(width)*stride + stride/2-0.5 25 | self.grid_y = np.arange(height)*stride + stride/2-0.5 26 | 27 | self.Y, self.X = np.mgrid[0:self.config.height:stride, 0:self.config.width:stride] 28 | 29 | # TODO: check it again 30 | # basically we should use center of grid, but in this place classic implementation uses left-top point. 31 | # self.X = self.X + stride / 2 - 0.5 32 | # self.Y = self.Y + stride / 2 - 0.5 33 | 34 | 35 | def create_heatmaps(self, joints, mask): 36 | 37 | heatmaps = np.zeros(self.config.parts_shape, dtype=np.float) 38 | 39 | self.put_joints(heatmaps, joints) 40 | sl = slice(self.config.heat_start, self.config.heat_start + self.config.heat_layers) 41 | heatmaps[:,:,self.config.bkg_start] = 1. - np.amax(heatmaps[:,:,sl], axis=2) 42 | 43 | self.put_limbs(heatmaps, joints) 44 | 45 | heatmaps *= mask 46 | 47 | return heatmaps 48 | 49 | 50 | def put_gaussian_maps(self, heatmaps, layer, joints): 51 | 52 | # actually exp(a+b) = exp(a)*exp(b), lets use it calculating 2d exponent, it could just be calculated by 53 | 54 | for i in range(joints.shape[0]): 55 | 56 | exp_x = np.exp(-(self.grid_x-joints[i,0])**2/self.double_sigma2) 57 | exp_y = np.exp(-(self.grid_y-joints[i,1])**2/self.double_sigma2) 58 | 59 | exp = np.outer(exp_y, exp_x) 60 | 61 | # note this is correct way of combination - min(sum(...),1.0) as was in C++ code is incorrect 62 | # https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation/issues/118 63 | heatmaps[:, :, self.config.heat_start + layer] = np.maximum(heatmaps[:, :, self.config.heat_start + layer], exp) 64 | 65 | def put_joints(self, heatmaps, joints): 66 | 67 | for i in range(self.config.num_parts): 68 | visible = joints[:,i,2] < 2 69 | self.put_gaussian_maps(heatmaps, i, joints[visible, i, 0:2]) 70 | 71 | 72 | def put_vector_maps(self, heatmaps, layerX, layerY, joint_from, joint_to): 73 | 74 | count = np.zeros(heatmaps.shape[:-1], dtype=np.int) 75 | 76 | for i in range(joint_from.shape[0]): 77 | (x1, y1) = joint_from[i] 78 | (x2, y2) = joint_to[i] 79 | 80 | dx = x2-x1 81 | dy = y2-y1 82 | dnorm = sqrt(dx*dx + dy*dy) 83 | 84 | if dnorm==0: # we get nan here sometimes, it's kills NN 85 | # TODO: handle it better. probably we should add zero paf, centered paf, or skip this completely 86 | print("Parts are too close to each other. Length is zero. Skipping") 87 | continue 88 | 89 | dx = dx / dnorm 90 | dy = dy / dnorm 91 | 92 | assert not isnan(dx) and not isnan(dy), "dnorm is zero, wtf" 93 | 94 | min_sx, max_sx = (x1, x2) if x1 < x2 else (x2, x1) 95 | min_sy, max_sy = (y1, y2) if y1 < y2 else (y2, y1) 96 | 97 | min_sx = int(round((min_sx - self.thre) / self.config.stride)) 98 | min_sy = int(round((min_sy - self.thre) / self.config.stride)) 99 | max_sx = int(round((max_sx + self.thre) / self.config.stride)) 100 | max_sy = int(round((max_sy + self.thre) / self.config.stride)) 101 | 102 | # check PAF off screen. do not really need to do it with max>grid size 103 | if max_sy < 0: 104 | continue 105 | 106 | if max_sx < 0: 107 | continue 108 | 109 | if min_sx < 0: 110 | min_sx = 0 111 | 112 | if min_sy < 0: 113 | min_sy = 0 114 | 115 | #TODO: check it again 116 | slice_x = slice(min_sx, max_sx) # + 1 this mask is not only speed up but crops paf really. This copied from original code 117 | slice_y = slice(min_sy, max_sy) # + 1 int g_y = min_y; g_y < max_y; g_y++ -- note strict < 118 | 119 | dist = distances(self.X[slice_y,slice_x], self.Y[slice_y,slice_x], x1, y1, x2, y2) 120 | dist = dist <= self.thre 121 | 122 | # TODO: averaging by pafs mentioned in the paper but never worked in C++ augmentation code 123 | heatmaps[slice_y, slice_x, layerX][dist] = (dist * dx)[dist] # += dist * dx 124 | heatmaps[slice_y, slice_x, layerY][dist] = (dist * dy)[dist] # += dist * dy 125 | count[slice_y, slice_x][dist] += 1 126 | 127 | # TODO: averaging by pafs mentioned in the paper but never worked in C++ augmentation code 128 | # heatmaps[:, :, layerX][count > 0] /= count[count > 0] 129 | # heatmaps[:, :, layerY][count > 0] /= count[count > 0] 130 | 131 | def put_limbs(self, heatmaps, joints): 132 | 133 | for (i,(fr,to)) in enumerate(self.config.limbs_conn): 134 | 135 | visible_from = joints[:,fr,2] < 2 136 | visible_to = joints[:,to, 2] < 2 137 | visible = visible_from & visible_to 138 | 139 | layerX, layerY = (self.config.paf_start + i*2, self.config.paf_start + i*2 + 1) 140 | self.put_vector_maps(heatmaps, layerX, layerY, joints[visible, fr, 0:2], joints[visible, to, 0:2]) 141 | 142 | 143 | 144 | #parallel calculation distance from any number of points of arbitrary shape(X, Y), to line defined by segment (x1,y1) -> (x2, y2) 145 | 146 | def distances(X, Y, x1, y1, x2, y2): 147 | 148 | # classic formula is: 149 | # d = (x2-x1)*(y1-y)-(x1-x)*(y2-y1)/sqrt((x2-x1)**2 + (y2-y1)**2) 150 | 151 | xD = (x2-x1) 152 | yD = (y2-y1) 153 | norm2 = sqrt(xD**2 + yD**2) 154 | dist = xD*(y1-Y)-(x1-X)*yD 155 | dist /= norm2 156 | 157 | return np.abs(dist) 158 | 159 | def test(): 160 | 161 | hm = Heatmapper() 162 | d = distances(hm.X, hm.Y, 100, 100, 50, 150) 163 | print(d < 8.) 164 | 165 | if __name__ == "__main__": 166 | np.set_printoptions(precision=1, linewidth=1000, suppress=True, threshold=100000) 167 | test() 168 | 169 | -------------------------------------------------------------------------------- /py_rmpe_server/py_rmpe_transformer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | from math import cos, sin, pi 5 | import cv2 6 | import random 7 | 8 | class AugmentSelection: 9 | 10 | def __init__(self, flip=False, degree = 0., crop = (0,0), scale = 1.): 11 | self.flip = flip 12 | self.degree = degree #rotate 13 | self.crop = crop #shift actually 14 | self.scale = scale 15 | 16 | @staticmethod 17 | def random(transform_params): 18 | flip = random.uniform(0.,1.) > transform_params.flip_prob 19 | degree = random.uniform(-1.,1.) * transform_params.max_rotate_degree 20 | scale = (transform_params.scale_max - transform_params.scale_min)*random.uniform(0.,1.)+transform_params.scale_min \ 21 | if random.uniform(0.,1.) > transform_params.scale_prob else 1. # TODO: see 'scale improbability' TODO above 22 | x_offset = int(random.uniform(-1.,1.) * transform_params.center_perterb_max); 23 | y_offset = int(random.uniform(-1.,1.) * transform_params.center_perterb_max); 24 | 25 | return AugmentSelection(flip, degree, (x_offset,y_offset), scale) 26 | 27 | @staticmethod 28 | def unrandom(): 29 | flip = False 30 | degree = 0. 31 | scale = 1. 32 | x_offset = 0 33 | y_offset = 0 34 | 35 | return AugmentSelection(flip, degree, (x_offset,y_offset), scale) 36 | 37 | def affine(self, center, scale_self, config): 38 | 39 | # the main idea: we will do all image transformations with one affine matrix. 40 | # this saves lot of cpu and make code significantly shorter 41 | # same affine matrix could be used to transform joint coordinates afterwards 42 | 43 | 44 | A = self.scale * cos(self.degree / 180. * pi ) 45 | B = self.scale * sin(self.degree / 180. * pi ) 46 | 47 | scale_size = config.transform_params.target_dist / scale_self * self.scale 48 | 49 | (width, height) = center 50 | center_x = width + self.crop[0] 51 | center_y = height + self.crop[1] 52 | 53 | center2zero = np.array( [[ 1., 0., -center_x], 54 | [ 0., 1., -center_y ], 55 | [ 0., 0., 1. ]] ) 56 | 57 | rotate = np.array( [[ A, B, 0 ], 58 | [ -B, A, 0 ], 59 | [ 0, 0, 1. ] ]) 60 | 61 | scale = np.array( [[ scale_size, 0, 0 ], 62 | [ 0, scale_size, 0 ], 63 | [ 0, 0, 1. ] ]) 64 | 65 | flip = np.array( [[ -1 if self.flip else 1., 0., 0. ], 66 | [ 0., 1., 0. ], 67 | [ 0., 0., 1. ]] ) 68 | 69 | center2center = np.array( [[ 1., 0., config.width//2], 70 | [ 0., 1., config.height//2 ], 71 | [ 0., 0., 1. ]] ) 72 | 73 | # order of combination is reversed 74 | combined = center2center.dot(flip).dot(scale).dot(rotate).dot(center2zero) 75 | 76 | return combined[0:2] 77 | 78 | class Transformer: 79 | 80 | def __init__(self, config): 81 | 82 | self.config = config 83 | 84 | def transform(self, img, mask, meta, aug = None): 85 | 86 | if aug is None: 87 | aug = AugmentSelection.random(self.config.transform_params) 88 | 89 | # warp picture and mask 90 | M = aug.affine(meta['objpos'][0], meta['scale_provided'][0], self.config) 91 | 92 | # TODO: need to understand this, scale_provided[0] is height of main person divided by 368, caclulated in generate_hdf5.py 93 | # print(img.shape) 94 | img = cv2.warpAffine(img, M, (self.config.height, self.config.width), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(127,127,127)) 95 | mask = cv2.warpAffine(mask, M, (self.config.height, self.config.width), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=255) 96 | mask = cv2.resize(mask, self.config.mask_shape, interpolation=cv2.INTER_CUBIC) # TODO: should be combined with warp for speed 97 | mask = mask.astype(np.float) / 255. 98 | 99 | # warp key points 100 | #TODO: joint could be cropped by augmentation, in this case we should mark it as invisible. 101 | #update: may be we don't need it actually, original code removed part sliced more than half totally, may be we should keep it 102 | original_points = meta['joints'].copy() 103 | original_points[:,:,2]=1 # we reuse 3rd column in completely different way here, it is hack 104 | converted_points = np.matmul(M, original_points.transpose([0,2,1])).transpose([0,2,1]) 105 | meta['joints'][:,:,0:2]=converted_points 106 | 107 | # we just made image flip, i.e. right leg just became left leg, and vice versa 108 | 109 | if aug.flip: 110 | tmpLeft = meta['joints'][:, self.config.leftParts, :] 111 | tmpRight = meta['joints'][:, self.config.rightParts, :] 112 | meta['joints'][:, self.config.leftParts, :] = tmpRight 113 | meta['joints'][:, self.config.rightParts, :] = tmpLeft 114 | 115 | 116 | return img, mask, meta 117 | 118 | -------------------------------------------------------------------------------- /py_rmpe_server/rmpe_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import numpy as np 4 | import zmq 5 | from multiprocessing import Process 6 | from time import time 7 | 8 | sys.path.append("..") 9 | 10 | from py_rmpe_data_iterator import RawDataIterator 11 | from config import COCOSourceConfig, MPIISourceConfig, GetConfig 12 | 13 | 14 | class Server: 15 | 16 | # these methods all called in parent process 17 | 18 | def __init__(self, global_config, configs, port, name, shuffle, augment): 19 | 20 | self.name = name 21 | self.port = port 22 | self.configs = configs 23 | self.global_config = global_config 24 | 25 | self.shuffle = shuffle 26 | self.augment = augment 27 | 28 | self.process = Process(target=Server.loop, args=(self,)) 29 | self.process.daemon = True 30 | self.process.start() 31 | 32 | 33 | def join(self): 34 | 35 | return self.process.join(10) 36 | 37 | # these methods all called in child process 38 | 39 | def init(self): 40 | 41 | self.context = zmq.Context() 42 | self.socket = self.context.socket(zmq.PUSH) 43 | self.socket.set_hwm(160) 44 | self.socket.bind("tcp://*:%s" % self.port) 45 | 46 | @staticmethod 47 | def loop(self): 48 | 49 | print("%s: Child process init... " % self.name) 50 | self.init() 51 | 52 | iterator = RawDataIterator(self.global_config, self.configs, shuffle=self.shuffle, augment=self.augment) 53 | 54 | print("%s: Loop started... " % self.name) 55 | 56 | num = 0 57 | generation = 0 58 | cycle_start = time() 59 | 60 | while True: 61 | 62 | keys = iterator.num_keys() 63 | print("%s: generation %s, %d images " % (self.name, generation, keys)) 64 | 65 | start = time() 66 | for (image, mask, labels, keypoints, read_time, aug_time) in iterator.gen(timing=True): 67 | 68 | augment_time = time()-start 69 | 70 | headers = self.produce_headers(image, mask, labels, keypoints) 71 | self.socket.send_json(headers) 72 | self.socket.send(np.ascontiguousarray(image)) 73 | self.socket.send(np.ascontiguousarray(mask)) 74 | self.socket.send(np.ascontiguousarray(labels)) 75 | self.socket.send(np.ascontiguousarray(keypoints)) 76 | 77 | num += 1 78 | print("%s [%d/%d] read/decompress %0.2f ms, aug %0.2f ms (%0.2f im/s), send %0.2f s" % (self.name, num, keys, read_time*1000, aug_time*1000, 1./aug_time, time() - start - aug_time) ) 79 | start = time() 80 | 81 | def produce_headers(self, img, mask, labels, keypoints): 82 | 83 | header_data = {"descr": img.dtype.str, "shape": img.shape, "fortran_order": False, "normalized": True } 84 | header_mask = {"descr": mask.dtype.str, "shape": mask.shape, "fortran_order": False} 85 | header_label = {"descr": labels.dtype.str, "shape": labels.shape, "fortran_order": False} 86 | header_keypoints = {"descr": keypoints.dtype.str, "shape": keypoints.shape, "fortran_order": False} 87 | 88 | headers = [header_data, header_mask, header_label, header_keypoints] 89 | 90 | return headers 91 | 92 | 93 | def main(): 94 | 95 | train = Server(GetConfig("Canonical"), COCOSourceConfig("../dataset/coco_train_dataset.h5"), 5555, "Train", shuffle=True, augment=True) 96 | val = Server(GetConfig("Canonical"), COCOSourceConfig("../dataset/coco_val_dataset.h5"), 5556, "Val", shuffle=False, augment=False) 97 | 98 | processes = [train, val] 99 | 100 | while None in [p.process.exitcode for p in processes]: 101 | 102 | print("exitcodes", [p.process.exitcode for p in processes]) 103 | for p in processes: 104 | if p.process.exitcode is None: 105 | p.join() 106 | 107 | 108 | np.set_printoptions(precision=1, linewidth=100*3, suppress=True, threshold=100000) 109 | main() 110 | 111 | 112 | -------------------------------------------------------------------------------- /readme/5ep_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/readme/5ep_result.png -------------------------------------------------------------------------------- /readme/dance.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/readme/dance.gif -------------------------------------------------------------------------------- /readme/losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/readme/losses.png -------------------------------------------------------------------------------- /readme/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/readme/result.png -------------------------------------------------------------------------------- /readme/tr_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/readme/tr_results.png -------------------------------------------------------------------------------- /sample_images/ski.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/sample_images/ski.jpg -------------------------------------------------------------------------------- /testing/coco.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# This is actually dremovd@github code for calculating coco metric. " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import sys\n", 21 | "import pandas as pd" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "import os\n", 33 | "\n", 34 | "os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'\n", 35 | "os.environ['CUDA_VISIBLE_DEVICES']='0'" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "Create keras model and load weights" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [ 52 | { 53 | "name": "stderr", 54 | "output_type": "stream", 55 | "text": [ 56 | "Using TensorFlow backend.\n" 57 | ] 58 | } 59 | ], 60 | "source": [ 61 | "from model import get_testing_model\n", 62 | "model = get_testing_model()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "from coco_metric import per_image_scores, validation" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 6, 79 | "metadata": { 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "training_dir = './training/'\n", 85 | "trained_models = [\n", 86 | " 'weights'\n", 87 | " #'weights-cpp-lr',\n", 88 | " #'weights-python-last',\n", 89 | "]\n", 90 | "optimal_epoch_loss = 'val_weight_stage6_L1_loss'" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 7, 96 | "metadata": { 97 | "collapsed": false 98 | }, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "Model 'weights', optimal loss: 78.969 at epoch 36\n", 105 | "./training/weights/weights.0036.h5\n", 106 | "loading annotations into memory...\n" 107 | ] 108 | }, 109 | { 110 | "name": "stderr", 111 | "output_type": "stream", 112 | "text": [ 113 | "\r", 114 | " 0%| | 0/5000 [00:00= map_left, map >= map_right, map >= map_up, map >= map_down, map > threshold)) 89 | peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse 90 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 91 | id = range(peak_counter, peak_counter + len(peaks)) 92 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] 93 | 94 | all_peaks.append(peaks_with_score_and_id) 95 | peak_counter += len(peaks) 96 | 97 | return all_peaks 98 | 99 | 100 | def find_connections(all_peaks, paf_avg, image_width, threshold): 101 | connection_all = [] 102 | special_k = [] 103 | mid_num = 10 104 | 105 | for k in range(len(mapIdx)): 106 | score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]] 107 | candA = all_peaks[limbSeq[k][0] - 1] 108 | candB = all_peaks[limbSeq[k][1] - 1] 109 | nA = len(candA) 110 | nB = len(candB) 111 | if (nA != 0 and nB != 0): 112 | connection_candidate = [] 113 | for i in range(nA): 114 | for j in range(nB): 115 | vec = np.subtract(candB[j][:2], candA[i][:2]) 116 | norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) 117 | # failure case when 2 body parts overlaps 118 | if norm == 0: 119 | continue 120 | vec = np.divide(vec, norm) 121 | 122 | startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ 123 | np.linspace(candA[i][1], candB[j][1], num=mid_num))) 124 | 125 | vec_x = np.array( 126 | [score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ 127 | for I in range(len(startend))]) 128 | vec_y = np.array( 129 | [score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ 130 | for I in range(len(startend))]) 131 | 132 | score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) 133 | score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min( 134 | 0.5 * image_width / norm - 1, 0) 135 | criterion1 = len(np.nonzero(score_midpts > threshold)[0]) > 0.8 * len( 136 | score_midpts) 137 | criterion2 = score_with_dist_prior > 0 138 | if criterion1 and criterion2: 139 | connection_candidate.append([i, j, score_with_dist_prior, 140 | score_with_dist_prior + candA[i][2] + candB[j][2]]) 141 | 142 | connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) 143 | connection = np.zeros((0, 5)) 144 | for c in range(len(connection_candidate)): 145 | i, j, s = connection_candidate[c][0:3] 146 | if (i not in connection[:, 3] and j not in connection[:, 4]): 147 | connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) 148 | if (len(connection) >= min(nA, nB)): 149 | break 150 | 151 | connection_all.append(connection) 152 | else: 153 | special_k.append(k) 154 | connection_all.append([]) 155 | 156 | return connection_all, special_k 157 | 158 | 159 | def find_people(connection_all, special_k, all_peaks): 160 | # last number in each row is the total parts number of that person 161 | # the second last number in each row is the score of the overall configuration 162 | subset = -1 * np.ones((0, 20)) 163 | candidate = np.array([item for sublist in all_peaks for item in sublist]) 164 | 165 | for k in range(len(mapIdx)): 166 | if k not in special_k: 167 | partAs = connection_all[k][:, 0] 168 | partBs = connection_all[k][:, 1] 169 | indexA, indexB = np.array(limbSeq[k]) - 1 170 | 171 | for i in range(len(connection_all[k])): # = 1:size(temp,1) 172 | found = 0 173 | subset_idx = [-1, -1] 174 | for j in range(len(subset)): # 1:size(subset,1): 175 | if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: 176 | subset_idx[found] = j 177 | found += 1 178 | 179 | if found == 1: 180 | j = subset_idx[0] 181 | if (subset[j][indexB] != partBs[i]): 182 | subset[j][indexB] = partBs[i] 183 | subset[j][-1] += 1 184 | subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 185 | elif found == 2: # if found 2 and disjoint, merge them 186 | j1, j2 = subset_idx 187 | membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] 188 | if len(np.nonzero(membership == 2)[0]) == 0: # merge 189 | subset[j1][:-2] += (subset[j2][:-2] + 1) 190 | subset[j1][-2:] += subset[j2][-2:] 191 | subset[j1][-2] += connection_all[k][i][2] 192 | subset = np.delete(subset, j2, 0) 193 | else: # as like found == 1 194 | subset[j1][indexB] = partBs[i] 195 | subset[j1][-1] += 1 196 | subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 197 | 198 | # if find no partA in the subset, create a new subset 199 | elif not found and k < 17: 200 | row = -1 * np.ones(20) 201 | row[indexA] = partAs[i] 202 | row[indexB] = partBs[i] 203 | row[-1] = 2 204 | row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + \ 205 | connection_all[k][i][2] 206 | subset = np.vstack([subset, row]) 207 | 208 | # delete some rows of subset which has few parts occur 209 | deleteIdx = []; 210 | for i in range(len(subset)): 211 | if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: 212 | deleteIdx.append(i) 213 | subset = np.delete(subset, deleteIdx, axis=0) 214 | return subset, candidate 215 | 216 | 217 | dt_gt_mapping = { 218 | 0: 0, 219 | 1: None, 220 | 2: 6, 221 | 3: 8, 222 | 4: 10, 223 | 5: 5, 224 | 6: 7, 225 | 7: 9, 226 | 8: 12, 227 | 9: 14, 228 | 10: 16, 229 | 11: 11, 230 | 12: 13, 231 | 13: 15, 232 | 14: 2, 233 | 15: 1, 234 | 16: 4, 235 | 17: 3, 236 | } 237 | 238 | 239 | def process(input_image, params, model, model_params): 240 | oriImg = cv2.imread(input_image) # B,G,R order 241 | heatmap_avg, paf_avg = predict(oriImg, model, model_params) 242 | 243 | all_peaks = find_peaks(heatmap_avg, params['thre1']) 244 | connection_all, special_k = find_connections(all_peaks, paf_avg, oriImg.shape[0], params['thre2']) 245 | subset, candidate = find_people(connection_all, special_k, all_peaks) 246 | 247 | keypoints = [] 248 | for s in subset: 249 | keypoint_indexes = s[:18] 250 | person_keypoint_coordinates = [] 251 | for index in keypoint_indexes: 252 | if index == -1: 253 | # "No candidate for keypoint" 254 | X, Y = 0, 0 255 | else: 256 | X, Y = candidate[index.astype(int)][:2] 257 | person_keypoint_coordinates.append((X, Y)) 258 | person_keypoint_coordinates_coco = [None] * 17 259 | 260 | for dt_index, gt_index in dt_gt_mapping.items(): 261 | if gt_index is None: 262 | continue 263 | person_keypoint_coordinates_coco[gt_index] = person_keypoint_coordinates[dt_index] 264 | 265 | keypoints.append((person_keypoint_coordinates_coco, 1 - 1.0 / s[18])) 266 | return keypoints 267 | 268 | 269 | def get_image_name(coco, image_id): 270 | return coco.imgs[image_id]['file_name'] 271 | 272 | 273 | def predict_many(coco, images_directory, validation_ids, params, model, model_params): 274 | assert (not set(validation_ids).difference(set(coco.getImgIds()))) 275 | 276 | keypoints = {} 277 | for image_id in tqdm.tqdm(validation_ids): 278 | image_name = get_image_name(coco, image_id) 279 | image_name = os.path.join(images_directory, image_name) 280 | keypoints[image_id] = process(image_name, dict(params), model, dict(model_params)) 281 | return keypoints 282 | 283 | 284 | def format_results(keypoints, resFile): 285 | format_keypoints = [] 286 | 287 | for image_id, people in keypoints.items(): 288 | for keypoint_list, score in people: 289 | format_keypoint_list = [] 290 | for x, y in keypoint_list: 291 | for v in [int(x), int(y), 1 if x > 0 or y > 0 else 0]: 292 | format_keypoint_list.append(v) 293 | 294 | format_keypoints.append({ 295 | "image_id": image_id, 296 | "category_id": 1, 297 | "keypoints": format_keypoint_list, 298 | "score": score, 299 | }) 300 | 301 | json.dump(format_keypoints, open(resFile, 'w')) 302 | 303 | 304 | def validation(model, dump_name, validation_ids=None, dataset='val2017'): 305 | annType = 'keypoints' 306 | prefix = 'person_keypoints' 307 | 308 | dataDir = 'dataset' 309 | annFile = '%s/annotations/%s_%s.json' % (dataDir, prefix, dataset) 310 | cocoGt = COCO(annFile) 311 | 312 | if validation_ids == None: 313 | validation_ids = cocoGt.getImgIds() 314 | 315 | resFile = '%s/results/%s_%s_%s100_results.json' 316 | resFile = resFile % (dataDir, prefix, dataset, dump_name) 317 | os.makedirs(os.path.dirname(resFile), exist_ok=True) 318 | 319 | keypoints = predict_many(cocoGt, os.path.join(dataDir, dataset), validation_ids, params, model, model_params) 320 | format_results(keypoints, resFile) 321 | 322 | cocoDt = cocoGt.loadRes(resFile) 323 | 324 | cocoEval = COCOeval(cocoGt, cocoDt, annType) 325 | 326 | cocoEval.params.imgIds = validation_ids 327 | cocoEval.evaluate() 328 | cocoEval.accumulate() 329 | cocoEval.summarize() 330 | return cocoEval 331 | 332 | 333 | def per_image_scores(eval_result): 334 | def convert_match_to_score(match): 335 | matches = match['gtMatches'][:, np.array(match['gtIgnore']) == 0] 336 | scores = { 337 | 'image_id': match['image_id'], 338 | 'gt_person_count': matches.shape[1], 339 | } 340 | 341 | for i in range(matches.shape[0]): 342 | okp_threshold = eval_result.params.iouThrs[i] 343 | scores['matched_%.2f' % okp_threshold] = sum(matches[i, :] != 0) 344 | scores['average'] = np.mean(np.sum(matches != 0, axis=1)) / scores['gt_person_count'] 345 | 346 | return scores 347 | 348 | evalImgs = eval_result.evalImgs 349 | scores = [convert_match_to_score(image_match) for image_match in evalImgs if image_match is not None] 350 | 351 | return pd.DataFrame(scores) 352 | -------------------------------------------------------------------------------- /testing/inhouse_metric.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pandas as pd 4 | import cv2 5 | from time import sleep 6 | from scipy.ndimage.filters import gaussian_filter, maximum_filter 7 | 8 | # this is find peak function 9 | from scipy.optimize import linear_sum_assignment 10 | 11 | 12 | def find_peaks(layer, thre1=0.01): 13 | map_ori = cv2.resize(layer, (0, 0), fx=8, fy=8, interpolation=cv2.INTER_CUBIC) 14 | map = gaussian_filter(map_ori, sigma=3) 15 | peaks_binary = (map == maximum_filter(map, 3)) & (map > thre1) 16 | 17 | if np.count_nonzero(peaks_binary) > 50: 18 | return [] #safety valve from N^2 in next stages 19 | 20 | peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse 21 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 22 | 23 | return peaks_with_score 24 | 25 | 26 | def assign_peaks(layer_y, layer_gt): 27 | 28 | if len(layer_y) == 0 and len(layer_gt) == 0: 29 | return np.nan 30 | 31 | if len(layer_y) == 0 or len(layer_gt) == 0: 32 | return 400 33 | 34 | d = np.array(layer_y) 35 | t = np.array(layer_gt) 36 | 37 | dx = np.subtract.outer(d[:, 0], t[:, 0]) 38 | dy = np.subtract.outer(d[:, 1], t[:, 1]) 39 | distance = np.sqrt(dx ** 2 + dy ** 2) 40 | # print(distance) 41 | 42 | y, gt = linear_sum_assignment(distance) 43 | # print(np.array(list(zip(y,gt)))) 44 | 45 | dist = [distance[foo] for foo in zip(y, gt)] # TODO: use numpy 46 | # print(dist) 47 | 48 | dist += [400] * (len(layer_y) - len(y)) 49 | dist += [400] * (len(layer_gt) - len(gt)) 50 | 51 | dist = np.mean(dist) 52 | 53 | return dist 54 | 55 | 56 | def calc_batch_metrics(batch_no, gt, Y, heatmap_layers): 57 | 58 | MAE = Y - gt 59 | MAE = np.abs(MAE) 60 | MAE = np.mean(MAE, axis=(1, 2)) 61 | 62 | RMSE = (Y - gt) ** 2 63 | RMSE = np.mean(RMSE, axis=(1, 2)) 64 | RMSE = np.sqrt(RMSE) 65 | 66 | 67 | gt_parts = np.full((gt.shape[0], gt.shape[3]), np.nan) 68 | y_parts = np.full((gt.shape[0], gt.shape[3]), np.nan) 69 | y_dist = np.full((gt.shape[0], gt.shape[3]), np.nan) 70 | 71 | 72 | for n in range(gt.shape[0]): 73 | for l in heatmap_layers: 74 | y_peaks = find_peaks(Y[n, :, :, l]) 75 | y_parts[n, l] = len(y_peaks) 76 | gt_peaks = find_peaks(gt[n, :, :, l]) 77 | gt_parts[n, l] = len(gt_peaks) 78 | y_dist[n, l] = assign_peaks(y_peaks, gt_peaks) 79 | 80 | batch_index = np.full(fill_value=batch_no, shape=MAE.shape) 81 | item_index, layer_index = np.mgrid[0:MAE.shape[0], 0:MAE.shape[1]] 82 | 83 | metrics = pd.DataFrame({'batch': batch_index.ravel(), 84 | 'item': item_index.ravel(), 85 | 'layer': layer_index.ravel(), 86 | 'MAE': MAE.ravel(), 87 | 'RMSE': RMSE.ravel(), 88 | 'GT_PARTS': gt_parts.ravel(), 89 | 'Y_PARTS': y_parts.ravel(), 90 | 'DIST': y_dist.ravel() 91 | }, 92 | columns=('batch', 'item', 'layer', 'MAE', 'RMSE', 'GT_PARTS', 'Y_PARTS', 'DIST') 93 | ) 94 | 95 | return metrics 96 | 97 | -------------------------------------------------------------------------------- /testing/rmpe_server_comparator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | sys.path.append("..") 6 | 7 | from time import time 8 | from training.ds_generators import DataGeneratorClient 9 | from py_rmpe_config import RmpeGlobalConfig 10 | 11 | import numpy as np 12 | import pandas as pd 13 | import cv2 14 | 15 | servers = [('py-server', 'localhost', 5556), ('new-server', 'localhost', 5558)] 16 | clients = {} 17 | save_to = 'comparator-hdf5' # save new server to output, c++ server to original and compare images 18 | 19 | def cmp_pics(num, lhsd, rhsd, lhsn, rhsn): 20 | diff = lhsd.astype(float) - rhsd.astype(float) 21 | L1 = np.average(np.abs(diff)) 22 | L2 = np.sqrt(np.average(diff**2)) 23 | AC = np.average(lhsd==rhsd) 24 | 25 | print("Image: ", num, lhsd.shape, rhsd.shape, L1, L2, AC) 26 | 27 | diff = diff.transpose([1,2,0]) 28 | diff = np.abs(diff) 29 | diff = diff.astype(np.uint8) 30 | 31 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "image." + lhsn), lhsd.transpose([1,2,0])) 32 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "image." + rhsn), rhsd.transpose([1,2,0])) 33 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "imagediff" ), diff) 34 | 35 | return (L1,L2,AC) 36 | 37 | def cmp_masks(num, lhsd, rhsd, lhsn, rhsn): 38 | diff = (lhsd.astype(float) - rhsd.astype(float))*255.0 39 | L1 = np.average(np.abs(diff)) 40 | L2 = np.sqrt(np.average(diff**2)) 41 | AC = np.average(lhsd == rhsd) 42 | 43 | print("Mask: ", num, lhsd.shape, rhsd.shape, L1, L2, AC) 44 | 45 | lhsd = lhsd.reshape((lhsd.shape[0], lhsd.shape[1], 1)) 46 | lhsd = (lhsd*255).astype(np.uint8) 47 | lhsd = cv2.resize(lhsd, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST) 48 | lhsd = lhsd.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1)) 49 | 50 | rhsd = rhsd.reshape((rhsd.shape[0], rhsd.shape[1], 1)) 51 | rhsd = (rhsd*255).astype(np.uint8) 52 | rhsd = cv2.resize(rhsd, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST) 53 | rhsd = rhsd.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1)) 54 | 55 | diff = np.abs(diff).reshape((diff.shape[0], diff.shape[1], 1)) 56 | diff = diff.astype(np.uint8) 57 | diff = cv2.resize(diff, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST) 58 | diff = diff.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1)) 59 | 60 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "mask."+lhsn), lhsd) 61 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "mask."+rhsn), rhsd) 62 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "maskdiff"), diff) 63 | 64 | return (L1,L2,AC) 65 | 66 | def cmp_layers(num, lhsd_all, rhsd_all, lhsn, rhsn): 67 | 68 | result = [] 69 | 70 | L1T = 0 71 | L2T = 0 72 | ACT = 0 73 | 74 | for layer in range(RmpeGlobalConfig.num_layers): 75 | lhsd = lhsd_all[layer, :, :] 76 | rhsd = rhsd_all[layer, :, :] 77 | 78 | diff = (lhsd.astype(float) - rhsd.astype(float))*255.0 79 | L1 = np.average(np.abs(diff)) 80 | L2 = np.sqrt(np.average(diff**2)) 81 | AC = np.average(lhsd == rhsd) 82 | 83 | #print("Layers(%d): " % layer, num, lhsd.shape, rhsd.shape, L1, L2, AC) 84 | L1T += L1 85 | L2T += L2 86 | ACT += AC 87 | 88 | lhsd = lhsd.reshape((lhsd.shape[0], lhsd.shape[1], 1)) 89 | lhsd = (127+lhsd*128).astype(np.uint8) 90 | lhsd = cv2.resize(lhsd, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST) 91 | lhsd = lhsd.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1)) 92 | 93 | rhsd = rhsd.reshape((rhsd.shape[0], rhsd.shape[1], 1)) 94 | rhsd = (127+rhsd*128).astype(np.uint8) 95 | rhsd = cv2.resize(rhsd, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST) 96 | rhsd = rhsd.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1)) 97 | 98 | diff = np.abs(diff).reshape((diff.shape[0], diff.shape[1], 1)) 99 | diff = diff.astype(np.uint8) 100 | diff = cv2.resize(diff, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST) 101 | diff = diff.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1)) 102 | 103 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "layer" + str(layer) + "." + lhsn), lhsd) 104 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "layer" + str(layer) + "." + rhsn), rhsd) 105 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "layer" + str(layer) + "diff"), diff) 106 | 107 | result += [L1, L2, AC] 108 | 109 | print("Layers: ", num, lhsd.shape, rhsd.shape, L1T/RmpeGlobalConfig.num_layers, L2T/RmpeGlobalConfig.num_layers, ACT/RmpeGlobalConfig.num_layers) 110 | 111 | return result 112 | 113 | def step(num, augs): 114 | 115 | all_res = [] 116 | 117 | os.makedirs(save_to+("/%5d" % num), exist_ok=True) 118 | 119 | for (i,lhs) in enumerate(augs): 120 | for (j,rhs) in enumerate(augs): 121 | if i < j: 122 | 123 | res = [] 124 | 125 | res += cmp_pics(num, augs[lhs][0], augs[rhs][0], lhs, rhs) 126 | res += cmp_masks(num, augs[lhs][1], augs[rhs][1], lhs, rhs) 127 | res += cmp_layers(num, augs[lhs][2], augs[rhs][2], lhs, rhs) 128 | 129 | all_res += [res] 130 | 131 | return all_res 132 | 133 | def main(servers, batch_size): 134 | 135 | for (name, host, port) in servers: 136 | clients[name] = DataGeneratorClient(port=port, host=host, hwm=1, batch_size=batch_size).gen_raw() 137 | 138 | res_all = [] 139 | 140 | for i in range(2645): #2645 141 | print(i) 142 | augs = dict([(name, next(value)) for (name, value) in clients.items()]) 143 | res = step(i, augs) 144 | res_all += res 145 | 146 | columns = ["ImageL1", "ImageL2", "ImageAC", "MaskL1", "MaskL2", "MaskAC"] 147 | for layer in range(RmpeGlobalConfig.num_layers): 148 | columns += ["Layer"+str(layer)+"L1", "Layer"+str(layer)+"L2", "Layer"+str(layer)+"AC"] 149 | 150 | res_all = np.array(res_all) 151 | print(res_all.shape) 152 | 153 | results = pd.DataFrame(res_all, columns=columns ) 154 | results.to_csv("weights.tsv", sep="\t") 155 | 156 | batch_size=20 157 | np.set_printoptions(precision=1, linewidth=1000, suppress=True, threshold=100000) 158 | main(servers, batch_size) 159 | -------------------------------------------------------------------------------- /testing/rmpe_server_tester.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | sys.path.append("..") 6 | 7 | from time import time 8 | from training.ds_generators import DataGeneratorClient 9 | 10 | import cv2 11 | import numpy as np 12 | 13 | from py_rmpe_config import RmpeGlobalConfig 14 | 15 | mask_pattern = np.zeros((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 3), dtype=np.uint8) 16 | heat_pattern = np.zeros((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 3), dtype=np.uint8) 17 | mask_y, mask_x = np.mgrid[0:RmpeGlobalConfig.height, 0:RmpeGlobalConfig.width] 18 | grid = (mask_x//8 % 2) + (mask_y//8 % 2) 19 | 20 | mask_pattern[grid==1]=(255,255,255) 21 | mask_pattern[grid!=1]=(128,128,128) 22 | 23 | heat_pattern[...] = (0,0,255) 24 | 25 | save_to = 'old_val' # save new server to output, c++ server to original and compare images 26 | 27 | 28 | def save_images(num, image, mask, paf): 29 | 30 | image = image.transpose([1,2,0]) 31 | 32 | mask_img = mask.reshape((mask.shape[0], mask.shape[1], 1)) 33 | mask_img = (mask_img*255).astype(np.uint8) 34 | mask_img = cv2.resize(mask_img, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST) 35 | mask_img = mask_img.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1)) 36 | 37 | masked_img = image.copy() 38 | masked_img = masked_img*(mask_img/255.0) + mask_pattern*(1.-mask_img/255.0) 39 | 40 | os.makedirs(save_to, exist_ok=True) 41 | 42 | #cv2.imwrite(save_to+"/%07d%s.png" % (num, ""), image) 43 | #cv2.imwrite(save_to+"/%07d%s.png" % (num, "mask"), mask_img) 44 | cv2.imwrite(save_to + "/%07d%s.png" % (num, "masked"), masked_img) 45 | 46 | parts = [] 47 | 48 | for i in range(RmpeGlobalConfig.num_parts_with_background): 49 | heated_image = image.copy() 50 | 51 | heat_img = paf[RmpeGlobalConfig.heat_start+i] 52 | 53 | heat_img = cv2.resize(heat_img, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST) 54 | heat_img = heat_img.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1)) 55 | 56 | heated_image = heated_image*(1-heat_img) + heat_pattern*heat_img 57 | 58 | parts += [heated_image] 59 | 60 | parts = np.vstack(parts) 61 | cv2.imwrite(save_to+"/%07d%s.png" % (num, "heat"), parts) 62 | 63 | 64 | pafs = [] 65 | stride = RmpeGlobalConfig.stride 66 | 67 | for i,(fr,to) in enumerate(RmpeGlobalConfig.limbs_conn): 68 | paffed_image = image.copy() 69 | 70 | pafX = paf[RmpeGlobalConfig.paf_start + i * 2] 71 | pafY = paf[RmpeGlobalConfig.paf_start + i * 2 + 1] 72 | 73 | for x in range(RmpeGlobalConfig.width//stride): 74 | for y in range(RmpeGlobalConfig.height//stride): 75 | X = pafX[y, x] 76 | Y = pafY[y, x] 77 | 78 | if X!=0 or Y!=0: 79 | cv2.arrowedLine(paffed_image, (x*stride,y*stride), (int(x*stride+X*stride),int(y*stride+Y*stride)), color=(0,0,255), thickness=1, tipLength=0.5) 80 | 81 | pafs += [paffed_image] 82 | 83 | 84 | pafs = np.vstack(pafs) 85 | cv2.imwrite(save_to+"/%07d%s.png" % (num, "paf"), pafs) 86 | 87 | 88 | 89 | def time_processed(client, batch_size): 90 | 91 | num = 0 92 | start = time() 93 | 94 | for x,y in client.gen(): 95 | num += 1 96 | elapsed = time() - start 97 | print(num*batch_size, num*batch_size/elapsed, [ i.shape for i in x ], [i.shape for i in y] ) 98 | 99 | def time_raw(client, save): 100 | 101 | num = 0 102 | start = time() 103 | 104 | for foo in client.gen_raw(): 105 | 106 | if len(foo) == 3: 107 | x, y, z = foo 108 | elif len(foo) == 4: 109 | x, y, z, k = foo 110 | else: 111 | raise NotImplementedError("Unknown number of tensors in proto %d" % len(foo)) 112 | 113 | num += 1 114 | elapsed = time() - start 115 | print(num, num/elapsed, x.shape, y.shape, z.shape ) 116 | 117 | if save: 118 | save_images(num, x, y, z) 119 | 120 | 121 | def main(type, batch_size, save): 122 | 123 | client = DataGeneratorClient(port=5556, host="localhost", hwm=1, batch_size=batch_size) 124 | 125 | if type=='processed': 126 | time_processed(client, batch_size) 127 | elif type=='raw': 128 | time_raw(client, save) 129 | else: 130 | assert False, "type should be 'processed' or 'raw' " 131 | 132 | 133 | assert len(sys.argv) >=2, "Usage: ./rmpe_dataset_server_tester [batch_size] [save]" 134 | batch_size=1 135 | save = False 136 | if 'save' in sys.argv: 137 | save=True 138 | sys.argv = [s for s in sys.argv if s!='save'] 139 | if len(sys.argv)==3: batch_size=int(sys.argv[2]) 140 | 141 | np.set_printoptions(precision=1, linewidth=1000, suppress=True, threshold=100000) 142 | main(sys.argv[1], batch_size, save) 143 | -------------------------------------------------------------------------------- /training/coco_masks_hdf5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from pycocotools.coco import COCO 4 | from scipy.spatial.distance import cdist 5 | import numpy as np 6 | import cv2 7 | import os 8 | import os.path 9 | import h5py 10 | import json 11 | 12 | dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'dataset')) 13 | 14 | tr_anno_path = os.path.join(dataset_dir, "annotations/person_keypoints_train2017.json") 15 | tr_img_dir = os.path.join(dataset_dir, "train2017") 16 | 17 | val_anno_path = os.path.join(dataset_dir, "annotations/person_keypoints_val2017.json") 18 | val_img_dir = os.path.join(dataset_dir, "val2017") 19 | 20 | datasets = [ 21 | (val_anno_path, val_img_dir, "COCO_val"), # it is important to have 'val' in validation dataset name, look for 'val' below 22 | (tr_anno_path, tr_img_dir, "COCO") 23 | ] 24 | 25 | 26 | tr_hdf5_path = os.path.join(dataset_dir, "coco_train_dataset.h5") 27 | val_hdf5_path = os.path.join(dataset_dir, "coco_val_dataset.h5") 28 | 29 | val_size = 2645 # size of validation set 30 | 31 | 32 | def make_mask(img_dir, img_id, img_anns, coco): 33 | 34 | img_path = os.path.join(img_dir, "%012d.jpg" % img_id) 35 | img = cv2.imread(img_path) 36 | h, w, c = img.shape 37 | 38 | mask_all = np.zeros((h, w), dtype=np.uint8) 39 | mask_miss = np.zeros((h, w), dtype=np.uint8) 40 | 41 | flag = 0 42 | for p in img_anns: 43 | seg = p["segmentation"] 44 | 45 | if p["iscrowd"] == 1: 46 | mask_crowd = coco.annToMask(p) 47 | temp = np.bitwise_and(mask_all, mask_crowd) 48 | mask_crowd = mask_crowd - temp 49 | flag += 1 50 | continue 51 | else: 52 | mask = coco.annToMask(p) 53 | 54 | mask_all = np.bitwise_or(mask, mask_all) 55 | 56 | if p["num_keypoints"] <= 0: 57 | mask_miss = np.bitwise_or(mask, mask_miss) 58 | 59 | if flag<1: 60 | mask_miss = np.logical_not(mask_miss) 61 | elif flag == 1: 62 | mask_miss = np.logical_not(np.bitwise_or(mask_miss, mask_crowd)) 63 | mask_all = np.bitwise_or(mask_all, mask_crowd) 64 | else: 65 | raise Exception("crowd segments > 1") 66 | 67 | mask_miss = mask_miss.astype(np.uint8) 68 | mask_miss *= 255 69 | 70 | return img, mask_miss 71 | 72 | def process_image(image_rec, img_id, image_index, img_anns, dataset_type): 73 | 74 | print("Image ID: ", img_id) 75 | 76 | numPeople = len(img_anns) 77 | h, w = image_rec['height'], image_rec['width'] 78 | 79 | all_persons = [] 80 | 81 | for p in range(numPeople): 82 | 83 | pers = dict() 84 | 85 | person_center = [img_anns[p]["bbox"][0] + img_anns[p]["bbox"][2] / 2, 86 | img_anns[p]["bbox"][1] + img_anns[p]["bbox"][3] / 2] 87 | 88 | pers["objpos"] = person_center 89 | pers["bbox"] = img_anns[p]["bbox"] 90 | pers["segment_area"] = img_anns[p]["area"] 91 | pers["num_keypoints"] = img_anns[p]["num_keypoints"] 92 | 93 | anno = img_anns[p]["keypoints"] 94 | 95 | pers["joint"] = np.zeros((17, 3)) 96 | for part in range(17): 97 | pers["joint"][part, 0] = anno[part * 3] 98 | pers["joint"][part, 1] = anno[part * 3 + 1] 99 | 100 | # visible/invisible 101 | # COCO - Each keypoint has a 0-indexed location x,y and a visibility flag v defined as v=0: not labeled (in which case x=y=0), v=1: labeled but not visible, and v=2: labeled and visible. 102 | # OURS - # 3 never marked up in this dataset, 2 - not marked up in this person, 1 - marked and visible, 0 - marked but invisible 103 | if anno[part * 3 + 2] == 2: 104 | pers["joint"][part, 2] = 1 105 | elif anno[part * 3 + 2] == 1: 106 | pers["joint"][part, 2] = 0 107 | else: 108 | pers["joint"][part, 2] = 2 109 | 110 | pers["scale_provided"] = img_anns[p]["bbox"][3] / 368 111 | 112 | all_persons.append(pers) 113 | 114 | main_persons = [] 115 | prev_center = [] 116 | 117 | 118 | for pers in all_persons: 119 | 120 | # skip this person if parts number is too low or if 121 | # segmentation area is too small 122 | if pers["num_keypoints"] < 5 or pers["segment_area"] < 32 * 32: 123 | continue 124 | 125 | person_center = pers["objpos"] 126 | 127 | # skip this person if the distance to exiting person is too small 128 | flag = 0 129 | for pc in prev_center: 130 | a = np.expand_dims(pc[:2], axis=0) 131 | b = np.expand_dims(person_center, axis=0) 132 | dist = cdist(a, b)[0] 133 | if dist < pc[2] * 0.3: 134 | flag = 1 135 | continue 136 | 137 | if flag == 1: 138 | continue 139 | 140 | main_persons.append(pers) 141 | prev_center.append(np.append(person_center, max(img_anns[p]["bbox"][2], img_anns[p]["bbox"][3]))) 142 | 143 | 144 | template = dict() 145 | template["dataset"] = dataset_type 146 | 147 | if image_index < val_size and 'val' in dataset_type: 148 | isValidation = 1 149 | else: 150 | isValidation = 0 151 | 152 | template["isValidation"] = isValidation 153 | template["img_width"] = w 154 | template["img_height"] = h 155 | template["image_id"] = img_id 156 | template["annolist_index"] = image_index 157 | template["img_path"] = '%012d.jpg' % img_id 158 | 159 | for p, person in enumerate(main_persons): 160 | 161 | instance = template.copy() 162 | 163 | instance["objpos"] = [ main_persons[p]["objpos"] ] 164 | instance["joints"] = [ main_persons[p]["joint"].tolist() ] 165 | instance["scale_provided"] = [ main_persons[p]["scale_provided"] ] 166 | 167 | lenOthers = 0 168 | 169 | for ot, operson in enumerate(all_persons): 170 | 171 | if person is operson: 172 | assert not "people_index" in instance, "several main persons? couldn't be" 173 | instance["people_index"] = ot 174 | continue 175 | 176 | if operson["num_keypoints"] == 0: 177 | continue 178 | 179 | instance["joints"].append(all_persons[ot]["joint"].tolist()) 180 | instance["scale_provided"].append(all_persons[ot]["scale_provided"]) 181 | instance["objpos"].append(all_persons[ot]["objpos"]) 182 | 183 | lenOthers += 1 184 | 185 | assert "people_index" in instance, "No main person index" 186 | instance["numOtherPeople"] = lenOthers 187 | 188 | yield instance 189 | 190 | 191 | def writeImage(grp, img_grp, data, img, mask_miss, count, image_id, mask_grp=None): 192 | 193 | serializable_meta = data 194 | serializable_meta['count'] = count 195 | 196 | nop = data['numOtherPeople'] 197 | 198 | assert len(serializable_meta['joints']) == 1 + nop, [len(serializable_meta['joints']), 1 + nop] 199 | assert len(serializable_meta['scale_provided']) == 1 + nop, [len(serializable_meta['scale_provided']), 1 + nop] 200 | assert len(serializable_meta['objpos']) == 1 + nop, [len(serializable_meta['objpos']), 1 + nop] 201 | 202 | img_key = "%012d" % image_id 203 | if not img_key in img_grp: 204 | 205 | if mask_grp is None: 206 | img_and_mask = np.concatenate((img, mask_miss[..., None]), axis=2) 207 | img_ds = img_grp.create_dataset(img_key, data=img_and_mask, chunks=None) 208 | else: 209 | _, img_bin = cv2.imencode(".jpg", img) 210 | _, img_mask = cv2.imencode(".png", mask_miss) 211 | img_ds1 = img_grp.create_dataset(img_key, data=img_bin, chunks=None) 212 | img_ds2 = mask_grp.create_dataset(img_key, data=img_mask, chunks=None) 213 | 214 | 215 | key = '%07d' % count 216 | required = { 'image':img_key, 'joints': serializable_meta['joints'], 'objpos': serializable_meta['objpos'], 'scale_provided': serializable_meta['scale_provided'] } 217 | ds = grp.create_dataset(key, data=json.dumps(required), chunks=None) 218 | ds.attrs['meta'] = json.dumps(serializable_meta) 219 | 220 | print('Writing sample %d' % count) 221 | 222 | 223 | def process(): 224 | 225 | tr_h5 = h5py.File(tr_hdf5_path, 'w') 226 | tr_grp = tr_h5.create_group("dataset") 227 | tr_write_count = 0 228 | tr_grp_img = tr_h5.create_group("images") 229 | tr_grp_mask = tr_h5.create_group("masks") 230 | 231 | val_h5 = h5py.File(val_hdf5_path, 'w') 232 | val_grp = val_h5.create_group("dataset") 233 | val_write_count = 0 234 | val_grp_img = val_h5.create_group("images") 235 | val_grp_mask = val_h5.create_group("masks") 236 | 237 | for _, ds in enumerate(datasets): 238 | 239 | anno_path = ds[0] 240 | img_dir = ds[1] 241 | dataset_type = ds[2] 242 | 243 | coco = COCO(anno_path) 244 | ids = list(coco.imgs.keys()) 245 | 246 | for image_index, img_id in enumerate(ids): 247 | ann_ids = coco.getAnnIds(imgIds=img_id) 248 | img_anns = coco.loadAnns(ann_ids) 249 | image_rec = coco.imgs[img_id] 250 | 251 | img = None 252 | mask_miss = None 253 | cached_img_id = None 254 | 255 | for data in process_image(image_rec, img_id, image_index, img_anns, dataset_type): 256 | 257 | if cached_img_id!=data['image_id']: 258 | assert img_id == data['image_id'] 259 | cached_img_id = data['image_id'] 260 | img, mask_miss = make_mask(img_dir, cached_img_id, img_anns, coco) 261 | 262 | if data['isValidation']: 263 | writeImage(val_grp, val_grp_img, data, img, mask_miss, val_write_count, cached_img_id, val_grp_mask) 264 | val_write_count += 1 265 | else: 266 | writeImage(tr_grp, tr_grp_img, data, img, mask_miss, tr_write_count, cached_img_id, tr_grp_mask) 267 | tr_write_count += 1 268 | 269 | tr_h5.close() 270 | val_h5.close() 271 | 272 | if __name__ == '__main__': 273 | process() 274 | -------------------------------------------------------------------------------- /training/ds_generators.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import zmq 3 | from ast import literal_eval as make_tuple 4 | from py_rmpe_server.py_rmpe_data_iterator import RawDataIterator 5 | from time import time 6 | 7 | import six 8 | if six.PY3: 9 | buffer_ = memoryview 10 | else: 11 | buffer_ = buffer # noqa 12 | 13 | 14 | class DataIteratorBase: 15 | 16 | def __init__(self, global_config, batch_size = 10): 17 | 18 | self.global_config = global_config 19 | self.batch_size = batch_size 20 | 21 | self.split_point = global_config.paf_layers 22 | self.vec_num = global_config.paf_layers 23 | self.heat_num = global_config.heat_layers + 1 24 | 25 | self.image_shape = (self.batch_size, self.global_config.width, self.global_config.height, 3) 26 | self.mask1_shape = (self.batch_size, self.global_config.width//self.global_config.stride, self.global_config.height//self.global_config.stride, self.vec_num) 27 | self.mask2_shape = (self.batch_size, self.global_config.width//self.global_config.stride, self.global_config.height//self.global_config.stride, self.heat_num) 28 | self.ypafs1_shape = (self.batch_size, self.global_config.width//self.global_config.stride, self.global_config.height//self.global_config.stride, self.vec_num) 29 | self.yheat2_shape = (self.batch_size, self.global_config.width//self.global_config.stride, self.global_config.height//self.global_config.stride, self.heat_num) 30 | 31 | #self.keypoints = [None]*self.batch_size # this is never passed to NN, will be accessed by accuracy calculation 32 | 33 | def restart(self): 34 | 35 | assert False, "Not implemented" # should restart connection, server should start new cycle on connection. 36 | 37 | def gen_raw(self): # this function used for test purposes in py_rmpe_server 38 | 39 | self.restart() 40 | 41 | while True: 42 | yield tuple(self._recv_arrays()) 43 | 44 | 45 | def gen(self): 46 | 47 | sample_idx = 0 48 | batches_x = np.empty(self.image_shape) 49 | batches_x1 = np.empty(self.mask1_shape) 50 | batches_x2 = np.empty(self.mask2_shape) 51 | batches_y1 = np.empty(self.ypafs1_shape) 52 | batches_y2 = np.empty(self.yheat2_shape) 53 | 54 | for foo in self.gen_raw(): 55 | 56 | if len(foo)==4: 57 | data_img, mask_img, label, kpts = foo 58 | else: 59 | data_img, mask_img, label = foo 60 | kpts = None 61 | 62 | batches_x[sample_idx] = data_img[np.newaxis, ...] 63 | 64 | batches_x1[sample_idx,:,:,:] = mask_img[ np.newaxis, :, :, :self.split_point ] 65 | batches_x2[sample_idx,:,:,:] = mask_img[ np.newaxis, :, :, self.split_point: ] 66 | 67 | batches_y1[sample_idx] = label[np.newaxis, :, :, :self.split_point ] 68 | batches_y2[sample_idx] = label[np.newaxis, :, :, self.split_point: ] 69 | 70 | #self.keypoints[sample_idx] = kpts 71 | 72 | sample_idx += 1 73 | 74 | if sample_idx == self.batch_size: 75 | sample_idx = 0 76 | 77 | if self.vec_num>0 and self.heat_num>0: 78 | yield [batches_x, batches_x1, batches_x2], \ 79 | [batches_y1, batches_y2, 80 | batches_y1, batches_y2, 81 | batches_y1, batches_y2, 82 | batches_y1, batches_y2, 83 | batches_y1, batches_y2, 84 | batches_y1, batches_y2] 85 | 86 | elif self.vec_num == 0 and self.heat_num > 0: 87 | 88 | yield [batches_x, batches_x2], \ 89 | [batches_y2, batches_y2, batches_y2, batches_y2, batches_y2, batches_y2] 90 | 91 | else: 92 | assert False, "Wtf or not implemented" 93 | 94 | # we should recreate this arrays because we in multiple threads, can't overwrite 95 | batches_x = np.empty(self.image_shape) 96 | batches_x1 = np.empty(self.mask1_shape) 97 | batches_x2 = np.empty(self.mask2_shape) 98 | batches_y1 = np.empty(self.ypafs1_shape) 99 | batches_y2 = np.empty(self.yheat2_shape) 100 | 101 | #self.keypoints = [None] * self.batch_size 102 | 103 | def keypoints(self): 104 | return self.keypoints 105 | 106 | def num_samples(self): 107 | assert False, "Not Implemented" 108 | 109 | 110 | class DataGeneratorClient(DataIteratorBase): 111 | 112 | def __init__(self, global_config, host, port, hwm=20, batch_size=10, limit=None): 113 | 114 | super(DataGeneratorClient, self).__init__(global_config, batch_size) 115 | 116 | self.limit = limit 117 | self.records = 0 118 | 119 | """ 120 | :param host: 121 | :param port: 122 | :param hwm:, optional 123 | The `ZeroMQ high-water mark (HWM) 124 | `_ on the 125 | sending socket. Increasing this increases the buffer, which can be 126 | useful if your data preprocessing times are very random. However, 127 | it will increase memory usage. There is no easy way to tell how 128 | many batches will actually be queued with a particular HWM. 129 | Defaults to 10. Be sure to set the corresponding HWM on the 130 | receiving end as well. 131 | :param batch_size: 132 | :param shuffle: 133 | :param seed: 134 | """ 135 | self.host = host 136 | self.port = port 137 | self.hwm = hwm 138 | self.socket = None 139 | 140 | context = zmq.Context() 141 | self.socket = context.socket(zmq.PULL) 142 | self.socket.set_hwm(self.hwm) 143 | self.socket.connect("tcp://{}:{}".format(self.host, self.port)) 144 | 145 | 146 | def _recv_arrays(self): 147 | """Receive a list of NumPy arrays. 148 | Parameters 149 | ---------- 150 | socket : :class:`zmq.Socket` 151 | The socket to receive the arrays on. 152 | Returns 153 | ------- 154 | list 155 | A list of :class:`numpy.ndarray` objects. 156 | Raises 157 | ------ 158 | StopIteration 159 | If the first JSON object received contains the key `stop`, 160 | signifying that the server has finished a single epoch. 161 | """ 162 | 163 | if self.limit is not None and self.records > self.limit: 164 | raise StopIteration 165 | 166 | headers = self.socket.recv_json() 167 | if 'stop' in headers: 168 | raise StopIteration 169 | arrays = [] 170 | 171 | for header in headers: 172 | data = self.socket.recv() 173 | buf = buffer_(data) 174 | array = np.frombuffer(buf, dtype=np.dtype(header['descr'])) 175 | array.shape = make_tuple(header['shape']) if isinstance(header['shape'], str) else header['shape'] 176 | # this need for comparability with C++ code, for some reasons it is string here, not tuple 177 | 178 | if header['fortran_order']: 179 | array.shape = header['shape'][::-1] 180 | array = array.transpose() 181 | arrays.append(array) 182 | 183 | self.records += 1 184 | return arrays 185 | 186 | 187 | class DataIterator(DataIteratorBase): 188 | 189 | def __init__(self, global_config, config, shuffle=True, augment=True, batch_size=10, limit=None): 190 | 191 | super(DataIterator, self).__init__(global_config, batch_size) 192 | 193 | self.limit = limit 194 | self.records = 0 195 | self.global_config = global_config 196 | self.config = config 197 | self.shuffle = shuffle 198 | self.augment = augment 199 | 200 | self.raw_data_iterator = RawDataIterator(self.global_config, self.config, shuffle=self.shuffle, augment=self.augment) 201 | self.generator = None 202 | 203 | def restart(self): 204 | 205 | self.records = 0 206 | self.generator = self.raw_data_iterator.gen() 207 | 208 | def num_samples(self): 209 | return self.raw_data_iterator.num_keys() 210 | 211 | def _recv_arrays(self): 212 | 213 | while True: 214 | 215 | if self.limit is not None and self.records > self.limit: 216 | raise StopIteration("Limit Reached") 217 | 218 | tpl = next(self.generator, None) 219 | if tpl is not None: 220 | self.records += 1 221 | return tpl 222 | 223 | raise StopIteration("Limited and reached cycle") 224 | 225 | 226 | -------------------------------------------------------------------------------- /training/optimizers.py: -------------------------------------------------------------------------------- 1 | from keras.optimizers import Optimizer 2 | from keras import backend as K 3 | from keras.legacy import interfaces 4 | 5 | 6 | class MultiSGD(Optimizer): 7 | """ 8 | Modified SGD with added support for learning multiplier for kernels and biases 9 | as suggested in: https://github.com/fchollet/keras/issues/5920 10 | 11 | Stochastic gradient descent optimizer. 12 | Includes support for momentum, 13 | learning rate decay, and Nesterov momentum. 14 | # Arguments 15 | lr: float >= 0. Learning rate. 16 | momentum: float >= 0. Parameter updates momentum. 17 | decay: float >= 0. Learning rate decay over each update. 18 | nesterov: boolean. Whether to apply Nesterov momentum. 19 | """ 20 | 21 | def __init__(self, lr=0.01, momentum=0., decay=0., 22 | nesterov=False, lr_mult=None, **kwargs): 23 | super(MultiSGD, self).__init__(**kwargs) 24 | with K.name_scope(self.__class__.__name__): 25 | self.iterations = K.variable(0, dtype='int64', name='iterations') 26 | self.lr = K.variable(lr, name='lr') 27 | self.momentum = K.variable(momentum, name='momentum') 28 | self.decay = K.variable(decay, name='decay') 29 | self.initial_decay = decay 30 | self.nesterov = nesterov 31 | self.lr_mult = lr_mult 32 | 33 | @interfaces.legacy_get_updates_support 34 | def get_updates(self, loss, params): 35 | grads = self.get_gradients(loss, params) 36 | self.updates = [K.update_add(self.iterations, 1)] 37 | 38 | lr = self.lr 39 | if self.initial_decay > 0: 40 | lr *= (1. / (1. + self.decay * K.cast(self.iterations, 41 | K.dtype(self.decay)))) 42 | # momentum 43 | shapes = [K.int_shape(p) for p in params] 44 | moments = [K.zeros(shape) for shape in shapes] 45 | self.weights = [self.iterations] + moments 46 | for p, g, m in zip(params, grads, moments): 47 | 48 | if p.name in self.lr_mult: 49 | multiplied_lr = lr * self.lr_mult[p.name] 50 | else: 51 | multiplied_lr = lr 52 | 53 | v = self.momentum * m - multiplied_lr * g # velocity 54 | self.updates.append(K.update(m, v)) 55 | 56 | if self.nesterov: 57 | new_p = p + self.momentum * v - multiplied_lr * g 58 | else: 59 | new_p = p + v 60 | 61 | # Apply constraints. 62 | if getattr(p, 'constraint', None) is not None: 63 | new_p = p.constraint(new_p) 64 | 65 | self.updates.append(K.update(p, new_p)) 66 | return self.updates 67 | 68 | def get_config(self): 69 | config = {'lr': float(K.get_value(self.lr)), 70 | 'momentum': float(K.get_value(self.momentum)), 71 | 'decay': float(K.get_value(self.decay)), 72 | 'nesterov': self.nesterov} 73 | base_config = super(MultiSGD, self).get_config() 74 | return dict(list(base_config.items()) + list(config.items())) 75 | -------------------------------------------------------------------------------- /training/train_common.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import math 4 | sys.path.append("..") 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | from model import get_training_model, get_lrmult 10 | from training.optimizers import MultiSGD 11 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint, CSVLogger, TensorBoard, TerminateOnNaN 12 | from keras.applications.vgg19 import VGG19 13 | import keras.backend as K 14 | 15 | from glob import glob 16 | from config import GetConfig 17 | import h5py 18 | from testing.inhouse_metric import calc_batch_metrics 19 | from time import time 20 | 21 | base_lr = 2e-5 22 | momentum = 0.9 23 | weight_decay = 5e-4 24 | lr_policy = "step" 25 | gamma = 0.333 26 | stepsize = 121746 * 17 # in original code each epoch is 121746 and step change is on 17th epoch 27 | max_iter = 200 28 | 29 | def get_last_epoch_and_weights_file(WEIGHT_DIR, WEIGHTS_SAVE, epoch): 30 | 31 | os.makedirs(WEIGHT_DIR, exist_ok=True) 32 | 33 | if epoch is not None and epoch != '': #override 34 | return int(epoch), WEIGHT_DIR + '/' + WEIGHTS_SAVE.format(epoch=epoch) 35 | 36 | files = [file for file in glob(WEIGHT_DIR + '/weights.*.h5')] 37 | files = [file.split('/')[-1] for file in files] 38 | epochs = [file.split('.')[1] for file in files if file] 39 | epochs = [int(epoch) for epoch in epochs if epoch.isdigit() ] 40 | if len(epochs) == 0: 41 | if 'weights.best.h5' in files: 42 | return -1, WEIGHT_DIR + '/weights.best.h5' 43 | else: 44 | ep = max([int(epoch) for epoch in epochs]) 45 | return ep, WEIGHT_DIR + '/' + WEIGHTS_SAVE.format(epoch=ep) 46 | return None, None 47 | 48 | 49 | # save names will be looking like 50 | # training/canonical/exp1 51 | # training/canonical_exp1.csv 52 | # training/canonical/exp2 53 | # training/canonical_exp2.csv 54 | 55 | def prepare(config, config_name, exp_id, train_samples, val_samples, batch_size, epoch=None ): 56 | 57 | metrics_id = config_name + "_" + exp_id if exp_id is not None else config_name 58 | weights_id = config_name + "/" + exp_id if exp_id is not None else config_name 59 | 60 | WEIGHT_DIR = "./" + weights_id 61 | WEIGHTS_SAVE = 'weights.{epoch:04d}.h5' 62 | 63 | TRAINING_LOG = "./" + metrics_id + ".csv" 64 | LOGS_DIR = "./logs" 65 | 66 | model = get_training_model(weight_decay, np_branch1=config.paf_layers, np_branch2=config.heat_layers+1) 67 | lr_mult = get_lrmult(model) 68 | 69 | # load previous weights or vgg19 if this is the first run 70 | last_epoch, wfile = get_last_epoch_and_weights_file(WEIGHT_DIR, WEIGHTS_SAVE, epoch) 71 | print("last_epoch:",last_epoch) 72 | 73 | if wfile is not None: 74 | print("Loading %s ..." % wfile) 75 | 76 | model.load_weights(wfile) 77 | 78 | else: 79 | print("Loading vgg19 weights...") 80 | 81 | vgg_model = VGG19(include_top=False, weights='imagenet') 82 | 83 | from_vgg = dict() 84 | from_vgg['conv1_1'] = 'block1_conv1' 85 | from_vgg['conv1_2'] = 'block1_conv2' 86 | from_vgg['conv2_1'] = 'block2_conv1' 87 | from_vgg['conv2_2'] = 'block2_conv2' 88 | from_vgg['conv3_1'] = 'block3_conv1' 89 | from_vgg['conv3_2'] = 'block3_conv2' 90 | from_vgg['conv3_3'] = 'block3_conv3' 91 | from_vgg['conv3_4'] = 'block3_conv4' 92 | from_vgg['conv4_1'] = 'block4_conv1' 93 | from_vgg['conv4_2'] = 'block4_conv2' 94 | 95 | for layer in model.layers: 96 | if layer.name in from_vgg: 97 | vgg_layer_name = from_vgg[layer.name] 98 | layer.set_weights(vgg_model.get_layer(vgg_layer_name).get_weights()) 99 | print("Loaded VGG19 layer: " + vgg_layer_name) 100 | 101 | last_epoch = 0 102 | 103 | # euclidean loss as implemented in caffe https://github.com/BVLC/caffe/blob/master/src/caffe/layers/euclidean_loss_layer.cpp 104 | def eucl_loss(x, y): 105 | l = K.sum(K.square(x - y)) / batch_size / 2 106 | return l 107 | 108 | # learning rate schedule - equivalent of caffe lr_policy = "step" 109 | iterations_per_epoch = train_samples // batch_size 110 | 111 | def step_decay(epoch): 112 | steps = epoch * iterations_per_epoch * batch_size 113 | lrate = base_lr * math.pow(gamma, math.floor(steps/stepsize)) 114 | print("Epoch:", epoch, "Learning rate:", lrate) 115 | return lrate 116 | 117 | print("Weight decay policy...") 118 | for i in range(1,100,5): step_decay(i) 119 | 120 | # configure callbacks 121 | lrate = LearningRateScheduler(step_decay) 122 | checkpoint = ModelCheckpoint(WEIGHT_DIR + '/' + WEIGHTS_SAVE, monitor='loss', verbose=0, save_best_only=False, save_weights_only=True, mode='min', period=1) 123 | csv_logger = CSVLogger(TRAINING_LOG, append=True) 124 | tb = TensorBoard(log_dir=LOGS_DIR, histogram_freq=0, write_graph=True, write_images=False) 125 | tnan = TerminateOnNaN() 126 | #coco_eval = CocoEval(train_client, val_client) 127 | 128 | callbacks_list = [lrate, checkpoint, csv_logger, tb, tnan] 129 | 130 | # sgd optimizer with lr multipliers 131 | multisgd = MultiSGD(lr=base_lr, momentum=momentum, decay=0.0, nesterov=False, lr_mult=lr_mult) 132 | 133 | # start training 134 | 135 | model.compile(loss=eucl_loss, optimizer=multisgd) 136 | 137 | return model, iterations_per_epoch, val_samples//batch_size, last_epoch, metrics_id, callbacks_list 138 | 139 | 140 | 141 | 142 | def train(config, model, train_client, val_client, iterations_per_epoch, validation_steps, metrics_id, last_epoch, use_client_gen, callbacks_list): 143 | 144 | for epoch in range(last_epoch, max_iter): 145 | 146 | train_di = train_client.gen() 147 | 148 | # train for one iteration 149 | model.fit_generator(train_di, 150 | steps_per_epoch=iterations_per_epoch, 151 | epochs=epoch+1, 152 | callbacks=callbacks_list, 153 | use_multiprocessing=False, # TODO: if you set True touching generator from 2 threads will stuck the program 154 | initial_epoch=epoch 155 | ) 156 | 157 | validate(config, model, val_client, validation_steps, metrics_id, epoch+1) 158 | 159 | 160 | def validate(config, model, val_client, validation_steps, metrics_id, epoch): 161 | 162 | val_di = val_client.gen() 163 | from keras.utils import GeneratorEnqueuer 164 | 165 | val_thre = GeneratorEnqueuer(val_di) 166 | val_thre.start() 167 | 168 | model_metrics = [] 169 | inhouse_metrics = [] 170 | 171 | for i in range(validation_steps): 172 | 173 | X, GT = next(val_thre.get()) 174 | 175 | Y = model.predict(X) 176 | 177 | model_losses = [ (np.sum((gt - y) ** 2) / gt.shape[0] / 2) for gt, y in zip(GT,Y) ] 178 | mm = sum(model_losses) 179 | 180 | if config.paf_layers > 0 and config.heat_layers > 0: 181 | GTL6 = np.concatenate([GT[-2], GT[-1]], axis=3) 182 | YL6 = np.concatenate([Y[-2], Y[-1]], axis=3) 183 | mm6l1 = model_losses[-2] 184 | mm6l2 = model_losses[-1] 185 | elif config.paf_layers == 0 and config.heat_layers > 0: 186 | GTL6 = GT[-1] 187 | YL6 = Y[-1] 188 | mm6l1 = None 189 | mm6l2 = model_losses[-1] 190 | else: 191 | assert False, "Wtf or not implemented" 192 | 193 | m = calc_batch_metrics(i, GTL6, YL6, range(config.heat_start, config.bkg_start)) 194 | inhouse_metrics += [m] 195 | 196 | model_metrics += [ (i, mm, mm6l1, mm6l2, m["MAE"].sum()/GTL6.shape[0], m["RMSE"].sum()/GTL6.shape[0], m["DIST"].mean()) ] 197 | print("Validating[BATCH: %d] LOSS: %0.4f, S6L1: %0.4f, S6L2: %0.4f, MAE: %0.4f, RMSE: %0.4f, DIST: %0.2f" % model_metrics[-1] ) 198 | 199 | inhouse_metrics = pd.concat(inhouse_metrics) 200 | inhouse_metrics['epoch']=epoch 201 | inhouse_metrics.to_csv("logs/val_scores.%s.%04d.txt" % (metrics_id, epoch), sep="\t") 202 | 203 | model_metrics = pd.DataFrame(model_metrics, columns=("batch","loss","stage6l1","stage6l2","mae","rmse","dist") ) 204 | model_metrics['epoch']=epoch 205 | del model_metrics['batch'] 206 | model_metrics = model_metrics.groupby('epoch').mean() 207 | with open('%s.val.tsv' % metrics_id, 'a') as f: 208 | model_metrics.to_csv(f, header=(epoch==1), sep="\t", float_format='%.4f') 209 | 210 | val_thre.stop() 211 | 212 | def save_network_input_output(model, val_client, validation_steps, metrics_id, batch_size, epoch=None): 213 | 214 | val_di = val_client.gen() 215 | 216 | if epoch is not None: 217 | filename = "nn_io.%s.%04d.h5" % (metrics_id, epoch) 218 | else: 219 | filename = "nn_gt.%s.h5" % metrics_id 220 | 221 | h5 = h5py.File(filename, 'w') 222 | 223 | for i in range(validation_steps): 224 | X, Y = next(val_di) 225 | 226 | grp = h5.create_group("%06d" % i) 227 | 228 | for n, v in enumerate(X): 229 | grp['x%02d' % n] = v 230 | 231 | for n, v in enumerate(Y): 232 | grp['gt%02d' % n] = v 233 | 234 | if model is not None: 235 | 236 | Yp = model.predict(X, batch_size=batch_size) 237 | 238 | for n, v in enumerate(Yp): 239 | grp['y%02d' % n] = v 240 | 241 | print(i) 242 | 243 | h5.close() 244 | 245 | def test_augmentation_speed(train_client): 246 | 247 | train_di = train_client.gen() 248 | 249 | start = time() 250 | batch = 0 251 | 252 | for X, Y in train_di: 253 | 254 | batch +=1 255 | print("batches per second ", batch/(time()-start)) 256 | -------------------------------------------------------------------------------- /training/train_pose.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | 4 | from training.train_common import prepare, train, validate, save_network_input_output, test_augmentation_speed 5 | from training.ds_generators import DataGeneratorClient, DataIterator 6 | from config import COCOSourceConfig, GetConfig 7 | 8 | use_client_gen = False 9 | batch_size = 10 10 | 11 | task = sys.argv[1] if len(sys.argv)>1 else "train" 12 | config_name = sys.argv[2] if len(sys.argv)>2 else "Canonical" 13 | experiment_name = sys.argv[3] if len(sys.argv)>3 else None 14 | if experiment_name=='': experiment_name=None 15 | epoch = int(sys.argv[4]) if len(sys.argv)>4 and sys.argv[4]!='' else None 16 | 17 | config = GetConfig(config_name) 18 | 19 | train_client = DataIterator(config, COCOSourceConfig("../dataset/coco_train_dataset.h5"), shuffle=True, 20 | augment=True, batch_size=batch_size) 21 | val_client = DataIterator(config, COCOSourceConfig("../dataset/coco_val_dataset.h5"), shuffle=False, augment=False, 22 | batch_size=batch_size) 23 | 24 | train_samples = train_client.num_samples() 25 | val_samples = val_client.num_samples() 26 | 27 | model, iterations_per_epoch, validation_steps, epoch, metrics_id, callbacks_list = \ 28 | prepare(config=config, config_name=config_name, exp_id=experiment_name, train_samples = train_samples, val_samples = val_samples, batch_size=batch_size, epoch=epoch) 29 | 30 | 31 | if task == "train": 32 | train(config, model, train_client, val_client, iterations_per_epoch, validation_steps, metrics_id, epoch, use_client_gen, callbacks_list) 33 | 34 | elif task == "validate": 35 | validate(config, model, val_client, validation_steps, metrics_id, epoch) 36 | 37 | elif task == "save_network_input_output": 38 | save_network_input_output(model, val_client, validation_steps, metrics_id, batch_size, epoch) 39 | 40 | elif task == "save_network_input": 41 | save_network_input_output(None, val_client, validation_steps, metrics_id, batch_size) 42 | 43 | elif task == "test_augmentation_speed": 44 | test_augmentation_speed(train_client) 45 | -------------------------------------------------------------------------------- /trash/lmdb.parse.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# This is my code which compares pictures in original project lmdb and hdfd5\n", 10 | "# actually we train test on different pictures which not allow us to directly compare losses" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": { 17 | "collapsed": false 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "import lmdb\n", 22 | "import caffe\n", 23 | "import struct\n", 24 | "import numpy as np\n", 25 | "import hashlib\n", 26 | "import h5py\n", 27 | "import json" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 102, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "db = lmdb.open(\"/home/anatolix/iidf-data/Realtime_Pose_Estimation_LMDB\", readonly=True )" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 170, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "def process(key, array):\n", 50 | " metadata=array[3]\n", 51 | " \n", 52 | " #0 dataset name\n", 53 | " dataset = struct.unpack('@10s',metadata[0].tobytes()[:10])[0]\n", 54 | " dataset = dataset.partition(b'\\x00')[0]\n", 55 | " \n", 56 | " #1 image height width\n", 57 | " height, width = struct.unpack('ff',metadata[1].tobytes()[:8])\n", 58 | " \n", 59 | " #2 validation, numother, people_index, 4*annolist_index, write_count\n", 60 | " validation, numother, people_index, annolist_index, write_count, total_count = struct.unpack('0 else None } )\n", 130 | " \n", 131 | " assert annolist.frame_sec.shape==(1,0) or annolist.frame_sec.shape==(1,1)\n", 132 | " frame_sec = annolist.frame_sec[0,0] if annolist.frame_sec.shape==(1,1) else None\n", 133 | " \n", 134 | " assert annolist.vididx.shape==(1,0) or annolist.vididx.shape==(1,1)\n", 135 | " vididx = annolist.vididx[0,0] if annolist.vididx.shape==(1,1) else None\n", 136 | " \n", 137 | " assert keypoints.img_train.shape[0]==1\n", 138 | " img_train = keypoints.img_train[0,n]\n", 139 | "\n", 140 | " assert keypoints.version.shape==(1,)\n", 141 | " version = keypoints.version[0]\n", 142 | " \n", 143 | " single_person = []\n", 144 | " assert keypoints.single_person[n,0].shape[1]==1\n", 145 | " for i in range(keypoints.single_person[n,0].shape[0]):\n", 146 | " single_person.append(keypoints.single_person[n,0][i,0])\n", 147 | " \n", 148 | " assert keypoints.act[n,0].act_id.shape==(1,1)\n", 149 | " act_id = keypoints.act[n,0].act_id[0,0]\n", 150 | " \n", 151 | " assert keypoints.act[n,0].act_name.shape==(0,) or keypoints.act[n,0].act_name.shape==(1,) \n", 152 | " act_name = keypoints.act[n,0].act_name[0] if keypoints.act[n,0].act_name.shape==(1,) else None\n", 153 | " \n", 154 | " assert keypoints.act[n,0].cat_name.shape==(0,) or keypoints.act[n,0].cat_name.shape==(1,) \n", 155 | " cat_name = keypoints.act[n,0].cat_name[0] if keypoints.act[n,0].cat_name.shape==(1,) else None\n", 156 | " \n", 157 | " video_name = None\n", 158 | " if vididx is not None:\n", 159 | " video_name = \"https://www.youtube.com/watch?v=\" + keypoints.video_list[0,vididx-1][0] \n", 160 | " \n", 161 | " \n", 162 | " return { 'image':image, 'annorects':annorects, 'img_train':img_train, 'version':version, 'single_person':single_person, 'act':{'act_id':act_id, 'act_name':act_name, 'cat_name':cat_name}, 'video_name':video_name, 'vididx':vididx, 'frame_sec':frame_sec } \n", 163 | " \n", 164 | " \n" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 281, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "{'act': {'act_id': -1, 'act_name': None, 'cat_name': None},\n", 179 | " 'annorects': [{'annopoints': None,\n", 180 | " 'head': None,\n", 181 | " 'objpos': {'x': 601, 'y': 380},\n", 182 | " 'scale': 3.8807339512004684}],\n", 183 | " 'frame_sec': None,\n", 184 | " 'image': '037454012.jpg',\n", 185 | " 'img_train': 0,\n", 186 | " 'single_person': [1],\n", 187 | " 'version': '12',\n", 188 | " 'video_name': None,\n", 189 | " 'vididx': None}\n", 190 | "\n", 191 | "{'act': {'act_id': -1, 'act_name': None, 'cat_name': None},\n", 192 | " 'annorects': [{'annopoints': None,\n", 193 | " 'head': None,\n", 194 | " 'objpos': {'x': 881, 'y': 394},\n", 195 | " 'scale': 8.0781661285219926},\n", 196 | " {'annopoints': None,\n", 197 | " 'head': None,\n", 198 | " 'objpos': {'x': 338, 'y': 210},\n", 199 | " 'scale': 8.9041293791139395}],\n", 200 | " 'frame_sec': None,\n", 201 | " 'image': '095071431.jpg',\n", 202 | " 'img_train': 0,\n", 203 | " 'single_person': [],\n", 204 | " 'version': '12',\n", 205 | " 'video_name': None,\n", 206 | " 'vididx': None}\n", 207 | "\n", 208 | "{'act': {'act_id': -1, 'act_name': None, 'cat_name': None},\n", 209 | " 'annorects': [{'annopoints': None,\n", 210 | " 'head': None,\n", 211 | " 'objpos': {'x': 619, 'y': 350},\n", 212 | " 'scale': 4.3266615305567875}],\n", 213 | " 'frame_sec': None,\n", 214 | " 'image': '073199394.jpg',\n", 215 | " 'img_train': 0,\n", 216 | " 'single_person': [1],\n", 217 | " 'version': '12',\n", 218 | " 'video_name': None,\n", 219 | " 'vididx': None}\n", 220 | "\n", 221 | "{'act': {'act_id': -1, 'act_name': None, 'cat_name': None},\n", 222 | " 'annorects': [{'annopoints': None,\n", 223 | " 'head': None,\n", 224 | " 'objpos': {'x': 684, 'y': 309},\n", 225 | " 'scale': 4.9284804960555526}],\n", 226 | " 'frame_sec': None,\n", 227 | " 'image': '059865848.jpg',\n", 228 | " 'img_train': 0,\n", 229 | " 'single_person': [1],\n", 230 | " 'version': '12',\n", 231 | " 'video_name': None,\n", 232 | " 'vididx': None}\n", 233 | "\n", 234 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n", 235 | " 'annorects': [{'annopoints': [{'id': 6, 'is_visible': 0, 'x': 610, 'y': 187},\n", 236 | " {'id': 7, 'is_visible': 1, 'x': 647, 'y': 176},\n", 237 | " {'id': 8,\n", 238 | " 'is_visible': None,\n", 239 | " 'x': 637.02009999999996,\n", 240 | " 'y': 189.81829999999999},\n", 241 | " {'id': 9,\n", 242 | " 'is_visible': None,\n", 243 | " 'x': 695.97990000000004,\n", 244 | " 'y': 108.18170000000001},\n", 245 | " {'id': 0, 'is_visible': 1, 'x': 620, 'y': 394},\n", 246 | " {'id': 1, 'is_visible': 1, 'x': 616, 'y': 269},\n", 247 | " {'id': 2, 'is_visible': 1, 'x': 573, 'y': 185},\n", 248 | " {'id': 3, 'is_visible': 0, 'x': 647, 'y': 188},\n", 249 | " {'id': 4, 'is_visible': 1, 'x': 661, 'y': 221},\n", 250 | " {'id': 5, 'is_visible': 1, 'x': 656, 'y': 231},\n", 251 | " {'id': 10, 'is_visible': 1, 'x': 606, 'y': 217},\n", 252 | " {'id': 11, 'is_visible': 1, 'x': 553, 'y': 161},\n", 253 | " {'id': 12, 'is_visible': 1, 'x': 601, 'y': 167},\n", 254 | " {'id': 13, 'is_visible': 1, 'x': 692, 'y': 185},\n", 255 | " {'id': 14, 'is_visible': 1, 'x': 693, 'y': 240},\n", 256 | " {'id': 15, 'is_visible': 1, 'x': 688, 'y': 313}],\n", 257 | " 'head': {'x1': 627, 'x2': 706, 'y1': 100, 'y2': 198},\n", 258 | " 'objpos': {'x': 594, 'y': 257},\n", 259 | " 'scale': 3.0210461764097549},\n", 260 | " {'annopoints': [{'id': 6, 'is_visible': 0, 'x': 979, 'y': 221},\n", 261 | " {'id': 7, 'is_visible': 0, 'x': 906, 'y': 190},\n", 262 | " {'id': 8,\n", 263 | " 'is_visible': None,\n", 264 | " 'x': 912.49149999999997,\n", 265 | " 'y': 190.65860000000001},\n", 266 | " {'id': 9,\n", 267 | " 'is_visible': None,\n", 268 | " 'x': 830.50850000000003,\n", 269 | " 'y': 182.34139999999999},\n", 270 | " {'id': 0, 'is_visible': 1, 'x': 895, 'y': 293},\n", 271 | " {'id': 1, 'is_visible': 1, 'x': 910, 'y': 279},\n", 272 | " {'id': 2, 'is_visible': 0, 'x': 945, 'y': 223},\n", 273 | " {'id': 3, 'is_visible': 1, 'x': 1012, 'y': 218},\n", 274 | " {'id': 4, 'is_visible': 1, 'x': 961, 'y': 315},\n", 275 | " {'id': 5, 'is_visible': 1, 'x': 960, 'y': 403},\n", 276 | " {'id': 10, 'is_visible': 1, 'x': 871, 'y': 304},\n", 277 | " {'id': 11, 'is_visible': 1, 'x': 883, 'y': 229},\n", 278 | " {'id': 12, 'is_visible': 0, 'x': 888, 'y': 174},\n", 279 | " {'id': 13, 'is_visible': 1, 'x': 924, 'y': 206},\n", 280 | " {'id': 14, 'is_visible': 1, 'x': 1013, 'y': 203},\n", 281 | " {'id': 15, 'is_visible': 1, 'x': 955, 'y': 263}],\n", 282 | " 'head': {'x1': 841, 'x2': 902, 'y1': 145, 'y2': 228},\n", 283 | " 'objpos': {'x': 952, 'y': 222},\n", 284 | " 'scale': 2.4721165021090732}],\n", 285 | " 'frame_sec': 11,\n", 286 | " 'image': '015601864.jpg',\n", 287 | " 'img_train': 1,\n", 288 | " 'single_person': [1, 2],\n", 289 | " 'version': '12',\n", 290 | " 'video_name': 'https://www.youtube.com/watch?v=aAOusnrSsHI',\n", 291 | " 'vididx': 1660}\n", 292 | "\n", 293 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n", 294 | " 'annorects': [{'annopoints': [{'id': 2, 'is_visible': '1', 'x': 806, 'y': 543},\n", 295 | " {'id': 3, 'is_visible': '1', 'x': 720, 'y': 593},\n", 296 | " {'id': 6, 'is_visible': '1', 'x': 763, 'y': 568},\n", 297 | " {'id': 7, 'is_visible': '0', 'x': 683, 'y': 290},\n", 298 | " {'id': 8,\n", 299 | " 'is_visible': None,\n", 300 | " 'x': 682,\n", 301 | " 'y': 256},\n", 302 | " {'id': 9, 'is_visible': None, 'x': 676, 'y': 68},\n", 303 | " {'id': 10,\n", 304 | " 'is_visible': '1',\n", 305 | " 'x': 563,\n", 306 | " 'y': 296},\n", 307 | " {'id': 11,\n", 308 | " 'is_visible': '1',\n", 309 | " 'x': 555,\n", 310 | " 'y': 410},\n", 311 | " {'id': 12,\n", 312 | " 'is_visible': '0',\n", 313 | " 'x': 647,\n", 314 | " 'y': 281},\n", 315 | " {'id': 13,\n", 316 | " 'is_visible': '1',\n", 317 | " 'x': 719,\n", 318 | " 'y': 299},\n", 319 | " {'id': 14,\n", 320 | " 'is_visible': '1',\n", 321 | " 'x': 711,\n", 322 | " 'y': 516},\n", 323 | " {'id': 15,\n", 324 | " 'is_visible': '1',\n", 325 | " 'x': 545,\n", 326 | " 'y': 466}],\n", 327 | " 'head': {'x1': 607, 'x2': 752, 'y1': 70, 'y2': 255},\n", 328 | " 'objpos': {'x': 619, 'y': 329},\n", 329 | " 'scale': 5.6412764513007154},\n", 330 | " {'annopoints': [{'id': 2, 'is_visible': '1', 'x': 987, 'y': 607},\n", 331 | " {'id': 3,\n", 332 | " 'is_visible': '1',\n", 333 | " 'x': 1194,\n", 334 | " 'y': 571},\n", 335 | " {'id': 6,\n", 336 | " 'is_visible': '1',\n", 337 | " 'x': 1091,\n", 338 | " 'y': 589},\n", 339 | " {'id': 7,\n", 340 | " 'is_visible': '1',\n", 341 | " 'x': 1038,\n", 342 | " 'y': 292},\n", 343 | " {'id': 8,\n", 344 | " 'is_visible': None,\n", 345 | " 'x': 1025,\n", 346 | " 'y': 261},\n", 347 | " {'id': 9, 'is_visible': None, 'x': 947, 'y': 74},\n", 348 | " {'id': 10,\n", 349 | " 'is_visible': '0',\n", 350 | " 'x': 914,\n", 351 | " 'y': 539},\n", 352 | " {'id': 11,\n", 353 | " 'is_visible': '1',\n", 354 | " 'x': 955,\n", 355 | " 'y': 470},\n", 356 | " {'id': 12,\n", 357 | " 'is_visible': '1',\n", 358 | " 'x': 931,\n", 359 | " 'y': 315},\n", 360 | " {'id': 13,\n", 361 | " 'is_visible': '1',\n", 362 | " 'x': 1145,\n", 363 | " 'y': 269},\n", 364 | " {'id': 14,\n", 365 | " 'is_visible': '1',\n", 366 | " 'x': 1226,\n", 367 | " 'y': 475},\n", 368 | " {'id': 15,\n", 369 | " 'is_visible': '1',\n", 370 | " 'x': 1096,\n", 371 | " 'y': 433}],\n", 372 | " 'head': {'x1': 903, 'x2': 1070, 'y1': 73, 'y2': 263},\n", 373 | " 'objpos': {'x': 1010, 'y': 412},\n", 374 | " 'scale': 6.0710513092873786},\n", 375 | " {'annopoints': [{'id': 2, 'is_visible': '1', 'x': 228, 'y': 537},\n", 376 | " {'id': 3, 'is_visible': '1', 'x': 74, 'y': 536},\n", 377 | " {'id': 6, 'is_visible': '1', 'x': 151, 'y': 537},\n", 378 | " {'id': 7, 'is_visible': '1', 'x': 129, 'y': 251},\n", 379 | " {'id': 8,\n", 380 | " 'is_visible': None,\n", 381 | " 'x': 123,\n", 382 | " 'y': 218},\n", 383 | " {'id': 9, 'is_visible': None, 'x': 89, 'y': 31},\n", 384 | " {'id': 10,\n", 385 | " 'is_visible': '0',\n", 386 | " 'x': 220,\n", 387 | " 'y': 373},\n", 388 | " {'id': 11,\n", 389 | " 'is_visible': '1',\n", 390 | " 'x': 297,\n", 391 | " 'y': 456},\n", 392 | " {'id': 12,\n", 393 | " 'is_visible': '1',\n", 394 | " 'x': 232,\n", 395 | " 'y': 251},\n", 396 | " {'id': 13, 'is_visible': '1', 'x': 26, 'y': 251},\n", 397 | " {'id': 14,\n", 398 | " 'is_visible': '1',\n", 399 | " 'x': 26,\n", 400 | " 'y': 423}],\n", 401 | " 'head': {'x1': 27, 'x2': 186, 'y1': 36, 'y2': 214},\n", 402 | " 'objpos': {'x': 133, 'y': 315},\n", 403 | " 'scale': 5.7281620088820802}],\n", 404 | " 'frame_sec': 84,\n", 405 | " 'image': '015599452.jpg',\n", 406 | " 'img_train': 1,\n", 407 | " 'single_person': [3],\n", 408 | " 'version': '12',\n", 409 | " 'video_name': 'https://www.youtube.com/watch?v=aAOusnrSsHI',\n", 410 | " 'vididx': 1660}\n", 411 | "\n", 412 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n", 413 | " 'annorects': [{'annopoints': [{'id': 6, 'is_visible': 1, 'x': 974, 'y': 446},\n", 414 | " {'id': 7, 'is_visible': 1, 'x': 985, 'y': 253},\n", 415 | " {'id': 8,\n", 416 | " 'is_visible': None,\n", 417 | " 'x': 982.75909999999999,\n", 418 | " 'y': 235.96940000000001},\n", 419 | " {'id': 9,\n", 420 | " 'is_visible': None,\n", 421 | " 'x': 962.24090000000001,\n", 422 | " 'y': 80.030600000000007},\n", 423 | " {'id': 0, 'is_visible': 1, 'x': 804, 'y': 711},\n", 424 | " {'id': 1, 'is_visible': 1, 'x': 816, 'y': 510},\n", 425 | " {'id': 2, 'is_visible': 1, 'x': 908, 'y': 438},\n", 426 | " {'id': 3, 'is_visible': 1, 'x': 1040, 'y': 454},\n", 427 | " {'id': 4, 'is_visible': 1, 'x': 906, 'y': 528},\n", 428 | " {'id': 5, 'is_visible': 1, 'x': 883, 'y': 707},\n", 429 | " {'id': 10, 'is_visible': 1, 'x': 869, 'y': 214},\n", 430 | " {'id': 11, 'is_visible': 1, 'x': 798, 'y': 340},\n", 431 | " {'id': 12, 'is_visible': 1, 'x': 902, 'y': 253},\n", 432 | " {'id': 13, 'is_visible': 1, 'x': 1067, 'y': 253},\n", 433 | " {'id': 14, 'is_visible': 1, 'x': 1167, 'y': 353},\n", 434 | " {'id': 15,\n", 435 | " 'is_visible': 1,\n", 436 | " 'x': 1142,\n", 437 | " 'y': 478}],\n", 438 | " 'head': {'x1': 914, 'x2': 1031, 'y1': 79, 'y2': 237},\n", 439 | " 'objpos': {'x': 966, 'y': 340},\n", 440 | " 'scale': 4.7184878933827941},\n", 441 | " {'annopoints': [{'id': 6, 'is_visible': 1, 'x': 471, 'y': 512},\n", 442 | " {'id': 7, 'is_visible': 1, 'x': 463, 'y': 268},\n", 443 | " {'id': 8,\n", 444 | " 'is_visible': None,\n", 445 | " 'x': 472.46640000000002,\n", 446 | " 'y': 220.85730000000001},\n", 447 | " {'id': 9,\n", 448 | " 'is_visible': None,\n", 449 | " 'x': 503.53359999999998,\n", 450 | " 'y': 66.142700000000005},\n", 451 | " {'id': 0, 'is_visible': 1, 'x': 667, 'y': 633},\n", 452 | " {'id': 1, 'is_visible': 1, 'x': 675, 'y': 462},\n", 453 | " {'id': 2, 'is_visible': 1, 'x': 567, 'y': 519},\n", 454 | " {'id': 3, 'is_visible': 1, 'x': 375, 'y': 504},\n", 455 | " {'id': 4, 'is_visible': 0, 'x': 543, 'y': 476},\n", 456 | " {'id': 5, 'is_visible': 0, 'x': 532, 'y': 651},\n", 457 | " {'id': 10, 'is_visible': 1, 'x': 702, 'y': 267},\n", 458 | " {'id': 11, 'is_visible': 1, 'x': 721, 'y': 386},\n", 459 | " {'id': 12, 'is_visible': 1, 'x': 584, 'y': 256},\n", 460 | " {'id': 13, 'is_visible': 1, 'x': 341, 'y': 280},\n", 461 | " {'id': 14, 'is_visible': 1, 'x': 310, 'y': 432},\n", 462 | " {'id': 15, 'is_visible': 1, 'x': 372, 'y': 496}],\n", 463 | " 'head': {'x1': 427, 'x2': 549, 'y1': 66, 'y2': 221},\n", 464 | " 'objpos': {'x': 489, 'y': 383},\n", 465 | " 'scale': 4.734087451663731}],\n", 466 | " 'frame_sec': 135,\n", 467 | " 'image': '005808361.jpg',\n", 468 | " 'img_train': 1,\n", 469 | " 'single_person': [],\n", 470 | " 'version': '12',\n", 471 | " 'video_name': 'https://www.youtube.com/watch?v=s1tES1dQA74',\n", 472 | " 'vididx': 2462}\n", 473 | "\n", 474 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n", 475 | " 'annorects': [{'annopoints': [{'id': 6, 'is_visible': 0, 'x': 248, 'y': 341},\n", 476 | " {'id': 7, 'is_visible': 1, 'x': 279, 'y': 263},\n", 477 | " {'id': 8,\n", 478 | " 'is_visible': None,\n", 479 | " 'x': 277.02100000000002,\n", 480 | " 'y': 268.77859999999998},\n", 481 | " {'id': 9,\n", 482 | " 'is_visible': None,\n", 483 | " 'x': 305.97899999999998,\n", 484 | " 'y': 184.22139999999999},\n", 485 | " {'id': 0, 'is_visible': 1, 'x': 301, 'y': 461},\n", 486 | " {'id': 1, 'is_visible': 1, 'x': 305, 'y': 375},\n", 487 | " {'id': 2, 'is_visible': 1, 'x': 201, 'y': 340},\n", 488 | " {'id': 3, 'is_visible': 0, 'x': 294, 'y': 342},\n", 489 | " {'id': 4, 'is_visible': 0, 'x': 335, 'y': 370},\n", 490 | " {'id': 5, 'is_visible': 1, 'x': 331, 'y': 455},\n", 491 | " {'id': 10, 'is_visible': 1, 'x': 328, 'y': 354},\n", 492 | " {'id': 11, 'is_visible': 1, 'x': 260, 'y': 335},\n", 493 | " {'id': 12, 'is_visible': 1, 'x': 244, 'y': 261},\n", 494 | " {'id': 13, 'is_visible': 1, 'x': 314, 'y': 264},\n", 495 | " {'id': 14, 'is_visible': 1, 'x': 327, 'y': 320},\n", 496 | " {'id': 15, 'is_visible': 0, 'x': 362, 'y': 346}],\n", 497 | " 'head': {'x1': 252, 'x2': 331, 'y1': 187, 'y2': 266},\n", 498 | " 'objpos': {'x': 291, 'y': 353},\n", 499 | " 'scale': 2.681348914259388},\n", 500 | " {'annopoints': [{'id': 6, 'is_visible': 0, 'x': 399, 'y': 374},\n", 501 | " {'id': 7, 'is_visible': 1, 'x': 498, 'y': 317},\n", 502 | " {'id': 8,\n", 503 | " 'is_visible': None,\n", 504 | " 'x': 504.59530000000001,\n", 505 | " 'y': 315.17579999999998},\n", 506 | " {'id': 9,\n", 507 | " 'is_visible': None,\n", 508 | " 'x': 585.40470000000005,\n", 509 | " 'y': 292.82420000000002},\n", 510 | " {'id': 0, 'is_visible': 1, 'x': 515, 'y': 512},\n", 511 | " {'id': 1, 'is_visible': 1, 'x': 514, 'y': 420},\n", 512 | " {'id': 2, 'is_visible': 1, 'x': 406, 'y': 388},\n", 513 | " {'id': 3, 'is_visible': 0, 'x': 392, 'y': 360},\n", 514 | " {'id': 4, 'is_visible': 0, 'x': 493, 'y': 434},\n", 515 | " {'id': 5, 'is_visible': 1, 'x': 518, 'y': 504},\n", 516 | " {'id': 10, 'is_visible': 1, 'x': 628, 'y': 426},\n", 517 | " {'id': 11, 'is_visible': 1, 'x': 551, 'y': 398},\n", 518 | " {'id': 12, 'is_visible': 1, 'x': 501, 'y': 351},\n", 519 | " {'id': 13, 'is_visible': 1, 'x': 495, 'y': 282},\n", 520 | " {'id': 14, 'is_visible': 1, 'x': 425, 'y': 301},\n", 521 | " {'id': 15, 'is_visible': 1, 'x': 483, 'y': 334}],\n", 522 | " 'head': {'x1': 510, 'x2': 580, 'y1': 265, 'y2': 343},\n", 523 | " 'objpos': {'x': 472, 'y': 377},\n", 524 | " 'scale': 2.5153099212621886}],\n", 525 | " 'frame_sec': 240,\n", 526 | " 'image': '086617615.jpg',\n", 527 | " 'img_train': 1,\n", 528 | " 'single_person': [],\n", 529 | " 'version': '12',\n", 530 | " 'video_name': 'https://www.youtube.com/watch?v=s1tES1dQA74',\n", 531 | " 'vididx': 2462}\n", 532 | "\n", 533 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n", 534 | " 'annorects': [{'annopoints': [{'id': 6, 'is_visible': 1, 'x': 904, 'y': 237},\n", 535 | " {'id': 7, 'is_visible': 1, 'x': 858, 'y': 135},\n", 536 | " {'id': 8,\n", 537 | " 'is_visible': None,\n", 538 | " 'x': 871.18769999999995,\n", 539 | " 'y': 180.42439999999999},\n", 540 | " {'id': 9,\n", 541 | " 'is_visible': None,\n", 542 | " 'x': 835.81230000000005,\n", 543 | " 'y': 58.575600000000001},\n", 544 | " {'id': 0, 'is_visible': 1, 'x': 980, 'y': 322},\n", 545 | " {'id': 1, 'is_visible': 0, 'x': 896, 'y': 318},\n", 546 | " {'id': 2, 'is_visible': 1, 'x': 865, 'y': 248},\n", 547 | " {'id': 3, 'is_visible': 1, 'x': 943, 'y': 226},\n", 548 | " {'id': 4, 'is_visible': 1, 'x': 948, 'y': 290},\n", 549 | " {'id': 5, 'is_visible': 1, 'x': 881, 'y': 349},\n", 550 | " {'id': 10, 'is_visible': 1, 'x': 772, 'y': 294},\n", 551 | " {'id': 11, 'is_visible': 1, 'x': 754, 'y': 247},\n", 552 | " {'id': 12, 'is_visible': 1, 'x': 792, 'y': 147},\n", 553 | " {'id': 13, 'is_visible': 1, 'x': 923, 'y': 123},\n", 554 | " {'id': 14, 'is_visible': 0, 'x': 995, 'y': 163},\n", 555 | " {'id': 15, 'is_visible': 0, 'x': 961, 'y': 223}],\n", 556 | " 'head': {'x1': 806, 'x2': 901, 'y1': 56, 'y2': 183},\n", 557 | " 'objpos': {'x': 897, 'y': 171},\n", 558 | " 'scale': 3.8064030264805111}],\n", 559 | " 'frame_sec': 6,\n", 560 | " 'image': '060111501.jpg',\n", 561 | " 'img_train': 1,\n", 562 | " 'single_person': [1],\n", 563 | " 'version': '12',\n", 564 | " 'video_name': 'https://www.youtube.com/watch?v=0skrJnNN3_I',\n", 565 | " 'vididx': 89}\n", 566 | "\n", 567 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n", 568 | " 'annorects': [{'annopoints': [{'id': 0, 'is_visible': '1', 'x': 461, 'y': 398},\n", 569 | " {'id': 1, 'is_visible': '1', 'x': 509, 'y': 335},\n", 570 | " {'id': 2, 'is_visible': '1', 'x': 517, 'y': 218},\n", 571 | " {'id': 3, 'is_visible': '1', 'x': 570, 'y': 203},\n", 572 | " {'id': 5, 'is_visible': '0', 'x': 568, 'y': 309},\n", 573 | " {'id': 6, 'is_visible': '1', 'x': 544, 'y': 211},\n", 574 | " {'id': 7, 'is_visible': '1', 'x': 620, 'y': 273},\n", 575 | " {'id': 8,\n", 576 | " 'is_visible': None,\n", 577 | " 'x': 614,\n", 578 | " 'y': 267},\n", 579 | " {'id': 9,\n", 580 | " 'is_visible': None,\n", 581 | " 'x': 668,\n", 582 | " 'y': 326},\n", 583 | " {'id': 10,\n", 584 | " 'is_visible': '1',\n", 585 | " 'x': 537,\n", 586 | " 'y': 288},\n", 587 | " {'id': 11,\n", 588 | " 'is_visible': '1',\n", 589 | " 'x': 503,\n", 590 | " 'y': 234},\n", 591 | " {'id': 12,\n", 592 | " 'is_visible': '1',\n", 593 | " 'x': 587,\n", 594 | " 'y': 280},\n", 595 | " {'id': 13,\n", 596 | " 'is_visible': '1',\n", 597 | " 'x': 652,\n", 598 | " 'y': 265},\n", 599 | " {'id': 14,\n", 600 | " 'is_visible': '1',\n", 601 | " 'x': 636,\n", 602 | " 'y': 356},\n", 603 | " {'id': 15,\n", 604 | " 'is_visible': '1',\n", 605 | " 'x': 621,\n", 606 | " 'y': 417}],\n", 607 | " 'head': {'x1': 609, 'x2': 674, 'y1': 259, 'y2': 335},\n", 608 | " 'objpos': {'x': 582, 'y': 268},\n", 609 | " 'scale': 2.4001199970001501},\n", 610 | " {'annopoints': [{'id': 0, 'is_visible': '1', 'x': 896, 'y': 436},\n", 611 | " {'id': 1, 'is_visible': '0', 'x': 875, 'y': 397},\n", 612 | " {'id': 2, 'is_visible': '1', 'x': 885, 'y': 295},\n", 613 | " {'id': 3, 'is_visible': '1', 'x': 852, 'y': 363},\n", 614 | " {'id': 4, 'is_visible': '1', 'x': 797, 'y': 442},\n", 615 | " {'id': 5, 'is_visible': '1', 'x': 823, 'y': 505},\n", 616 | " {'id': 6, 'is_visible': '1', 'x': 869, 'y': 329},\n", 617 | " {'id': 7, 'is_visible': '1', 'x': 737, 'y': 323},\n", 618 | " {'id': 8,\n", 619 | " 'is_visible': None,\n", 620 | " 'x': 719,\n", 621 | " 'y': 326},\n", 622 | " {'id': 9,\n", 623 | " 'is_visible': None,\n", 624 | " 'x': 648,\n", 625 | " 'y': 338},\n", 626 | " {'id': 10,\n", 627 | " 'is_visible': '1',\n", 628 | " 'x': 804,\n", 629 | " 'y': 305},\n", 630 | " {'id': 11,\n", 631 | " 'is_visible': '1',\n", 632 | " 'x': 804,\n", 633 | " 'y': 237},\n", 634 | " {'id': 12,\n", 635 | " 'is_visible': '1',\n", 636 | " 'x': 741,\n", 637 | " 'y': 285},\n", 638 | " {'id': 13,\n", 639 | " 'is_visible': '1',\n", 640 | " 'x': 732,\n", 641 | " 'y': 361},\n", 642 | " {'id': 14,\n", 643 | " 'is_visible': '1',\n", 644 | " 'x': 758,\n", 645 | " 'y': 411},\n", 646 | " {'id': 15,\n", 647 | " 'is_visible': '1',\n", 648 | " 'x': 757,\n", 649 | " 'y': 485}],\n", 650 | " 'head': {'x1': 652, 'x2': 716, 'y1': 301, 'y2': 364},\n", 651 | " 'objpos': {'x': 765, 'y': 394},\n", 652 | " 'scale': 2.1553282812601888}],\n", 653 | " 'frame_sec': 81,\n", 654 | " 'image': '070807258.jpg',\n", 655 | " 'img_train': 1,\n", 656 | " 'single_person': [],\n", 657 | " 'version': '12',\n", 658 | " 'video_name': 'https://www.youtube.com/watch?v=0skrJnNN3_I',\n", 659 | " 'vididx': 89}\n", 660 | "\n" 661 | ] 662 | } 663 | ], 664 | "source": [ 665 | "for n in range(10):\n", 666 | " pprint(load_anno(keypoints,n))\n", 667 | " print()" 668 | ] 669 | }, 670 | { 671 | "cell_type": "code", 672 | "execution_count": 282, 673 | "metadata": { 674 | "collapsed": false 675 | }, 676 | "outputs": [ 677 | { 678 | "name": "stdout", 679 | "output_type": "stream", 680 | "text": [ 681 | "{'act': {'act_id': 487,\n", 682 | " 'act_name': 'sitting, arts and crafts, carving wood, weaving, spinning wool',\n", 683 | " 'cat_name': 'miscellaneous'},\n", 684 | " 'annorects': [{'annopoints': [{'id': 0,\n", 685 | " 'is_visible': '1',\n", 686 | " 'x': 435,\n", 687 | " 'y': 1066},\n", 688 | " {'id': 1, 'is_visible': '1', 'x': 181, 'y': 811},\n", 689 | " {'id': 2, 'is_visible': '1', 'x': 668, 'y': 845},\n", 690 | " {'id': 3, 'is_visible': '1', 'x': 879, 'y': 879},\n", 691 | " {'id': 4,\n", 692 | " 'is_visible': '1',\n", 693 | " 'x': 649,\n", 694 | " 'y': 1003},\n", 695 | " {'id': 5,\n", 696 | " 'is_visible': '1',\n", 697 | " 'x': 448,\n", 698 | " 'y': 1003},\n", 699 | " {'id': 6, 'is_visible': '1', 'x': 774, 'y': 862},\n", 700 | " {'id': 7, 'is_visible': '1', 'x': 817, 'y': 397},\n", 701 | " {'id': 8,\n", 702 | " 'is_visible': None,\n", 703 | " 'x': 830,\n", 704 | " 'y': 266},\n", 705 | " {'id': 9, 'is_visible': None, 'x': 857, 'y': -1},\n", 706 | " {'id': 10,\n", 707 | " 'is_visible': '1',\n", 708 | " 'x': 606,\n", 709 | " 'y': 655},\n", 710 | " {'id': 11,\n", 711 | " 'is_visible': '1',\n", 712 | " 'x': 562,\n", 713 | " 'y': 690},\n", 714 | " {'id': 12,\n", 715 | " 'is_visible': '1',\n", 716 | " 'x': 615,\n", 717 | " 'y': 395},\n", 718 | " {'id': 13,\n", 719 | " 'is_visible': '1',\n", 720 | " 'x': 1018,\n", 721 | " 'y': 398},\n", 722 | " {'id': 14,\n", 723 | " 'is_visible': '1',\n", 724 | " 'x': 1065,\n", 725 | " 'y': 718},\n", 726 | " {'id': 15,\n", 727 | " 'is_visible': '1',\n", 728 | " 'x': 885,\n", 729 | " 'y': 590}],\n", 730 | " 'head': {'x1': 740, 'x2': 948, 'y1': 0, 'y2': 265},\n", 731 | " 'objpos': {'x': 814, 'y': 758},\n", 732 | " 'scale': 8.0851508334724329}],\n", 733 | " 'frame_sec': 150,\n", 734 | " 'image': '058019490.jpg',\n", 735 | " 'img_train': 1,\n", 736 | " 'single_person': [1],\n", 737 | " 'version': '12',\n", 738 | " 'video_name': 'https://www.youtube.com/watch?v=UQYoVlVX68w',\n", 739 | " 'vididx': 1367}\n" 740 | ] 741 | } 742 | ], 743 | "source": [ 744 | "pprint(load_anno(keypoints,18099))\n", 745 | "\n" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": 221, 751 | "metadata": { 752 | "collapsed": false 753 | }, 754 | "outputs": [], 755 | "source": [ 756 | "\n", 757 | "n=5\n" 758 | ] 759 | }, 760 | { 761 | "cell_type": "code", 762 | "execution_count": 271, 763 | "metadata": { 764 | "collapsed": false 765 | }, 766 | "outputs": [ 767 | { 768 | "data": { 769 | "text/plain": [ 770 | "array(['-08Vnk8XONY'],\n", 771 | " dtype='= 1, 'octave should >= 1' 25 | # return starting_range, ending_range, octave 26 | 27 | def getJetColor(v, vmin, vmax): 28 | c = np.zeros((3)) 29 | if (v < vmin): 30 | v = vmin 31 | if (v > vmax): 32 | v = vmax 33 | dv = vmax - vmin 34 | if (v < (vmin + 0.125 * dv)): 35 | c[0] = 256 * (0.5 + (v * 4)) #B: 0.5 ~ 1 36 | elif (v < (vmin + 0.375 * dv)): 37 | c[0] = 255 38 | c[1] = 256 * (v - 0.125) * 4 #G: 0 ~ 1 39 | elif (v < (vmin + 0.625 * dv)): 40 | c[0] = 256 * (-4 * v + 2.5) #B: 1 ~ 0 41 | c[1] = 255 42 | c[2] = 256 * (4 * (v - 0.375)) #R: 0 ~ 1 43 | elif (v < (vmin + 0.875 * dv)): 44 | c[1] = 256 * (-4 * v + 3.5) #G: 1 ~ 0 45 | c[2] = 255 46 | else: 47 | c[2] = 256 * (-4 * v + 4.5) #R: 1 ~ 0.5 48 | return c 49 | 50 | def colorize(gray_img): 51 | out = np.zeros(gray_img.shape + (3,)) 52 | for y in range(out.shape[0]): 53 | for x in range(out.shape[1]): 54 | out[y,x,:] = getJetColor(gray_img[y,x], 0, 1) 55 | return out 56 | 57 | def padRightDownCorner(img, stride, padValue): 58 | h = img.shape[0] 59 | w = img.shape[1] 60 | 61 | pad = 4 * [None] 62 | pad[0] = 0 # up 63 | pad[1] = 0 # left 64 | pad[2] = 0 if (h%stride==0) else stride - (h % stride) # down 65 | pad[3] = 0 if (w%stride==0) else stride - (w % stride) # right 66 | 67 | img_padded = img 68 | pad_up = np.tile(img_padded[0:1,:,:]*0 + padValue, (pad[0], 1, 1)) 69 | img_padded = np.concatenate((pad_up, img_padded), axis=0) 70 | pad_left = np.tile(img_padded[:,0:1,:]*0 + padValue, (1, pad[1], 1)) 71 | img_padded = np.concatenate((pad_left, img_padded), axis=1) 72 | pad_down = np.tile(img_padded[-2:-1,:,:]*0 + padValue, (pad[2], 1, 1)) 73 | img_padded = np.concatenate((img_padded, pad_down), axis=0) 74 | pad_right = np.tile(img_padded[:,-2:-1,:]*0 + padValue, (1, pad[3], 1)) 75 | img_padded = np.concatenate((img_padded, pad_right), axis=1) 76 | 77 | return img_padded, pad 78 | --------------------------------------------------------------------------------