├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── config.py
├── dataset
├── coco
│ └── INFO
└── get_dataset.sh
├── demo.ipynb
├── demo_camera.py
├── demo_image.py
├── model.py
├── model
├── caffe
│ ├── _trained_COCO
│ │ └── pose_deploy.prototxt
│ ├── _trained_MPI
│ │ └── pose_deploy.prototxt
│ └── layers
│ │ └── INFO
├── caffe_to_keras.py
├── dump_caffe_layers.py
├── get_caffe_model.sh
└── get_keras_model.sh
├── py_rmpe_server
├── py_rmpe_data_iterator.py
├── py_rmpe_heatmapper.py
├── py_rmpe_transformer.py
└── rmpe_server.py
├── readme
├── 5ep_result.png
├── dance.gif
├── losses.png
├── result.png
└── tr_results.png
├── sample_images
└── ski.jpg
├── testing
├── coco.ipynb
├── coco_metric.py
├── inhouse_metric.py
├── inspect_dataset.ipynb
├── inspect_nn_input_output.ipynb
├── rmpe_server_comparator.py
└── rmpe_server_tester.py
├── training
├── coco_masks_hdf5.py
├── ds_generators.py
├── optimizers.py
├── train_common.py
└── train_pose.py
├── trash
├── lmdb.parse.ipynb
└── matload_test.ipynb
└── util.py
/.gitignore:
--------------------------------------------------------------------------------
1 | model/**/*.caffemodel
2 | model/**/*.h5
3 | model/**/*.npy
4 | dataset/train2017
5 | dataset/val2017
6 | dataset/test2017
7 | dataset/trainmask2017
8 | dataset/valmask2017
9 | dataset/*.h5
10 | dataset/annotations
11 | dataset/coco
12 | .idea
13 | __pycache__
14 | .ipynb_checkpoints
15 | logs/
16 | training.csv
17 | weights.best.h5
18 | .Rproj.user
19 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "addins"]
2 | path = addins
3 | url = git@github.com:anatolix/multipose_addins.git
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | SOFTWARE LICENSE AGREEMENT
2 | ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
3 |
4 | BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
5 |
6 | This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Michal Faber (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor.
7 |
8 | RESERVATION OF OWNERSHIP AND GRANT OF LICENSE:
9 | Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive,
10 | non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement. As used in this Agreement, the term "Software" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor, including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i).
11 |
12 | CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement. Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication.
13 |
14 | COPYRIGHT: The Software is owned by Licensor and is protected by United
15 | States copyright laws and applicable international treaties and/or conventions.
16 |
17 | PERMITTED USES: The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto.
18 |
19 | DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You under this Agreement. You may only use such derivatives and modifications for your own noncommercial internal research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in violation of this Agreement.
20 |
21 | BACKUPS: If Licensee is an organization, it may make that number of copies of the Software necessary for internal noncommercial use at a single site within its organization provided that all information appearing in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies.
22 |
23 | USES NOT PERMITTED: You may not distribute, copy or use the Software except as explicitly permitted herein.
24 |
25 | You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third parties access to prior or present versions (or any parts thereof) of the Software.
26 |
27 | ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. Any attempted assignment without such consent shall be null and void.
28 |
29 | TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by downloading the Software or by using the Software until terminated as provided below.
30 |
31 | The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement. Licensee may terminate this Agreement by ceasing using the Software. Upon any termination of this Agreement, Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement.
32 |
33 | FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to Licensor for Licensee's use of the Software in accordance with this Agreement.
34 |
35 | DISCLAIMER OF WARRANTIES: THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT. LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS.
36 |
37 | SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.
38 |
39 | EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage.
40 |
41 | EXPORT REGULATION: Licensee agrees to comply with any and all applicable
42 | U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control.
43 |
44 | SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby.
45 |
46 | NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be construed as a waiver of any future or other exercise of such right or remedy by Licensor.
47 |
48 | GOVERNING LAW: This Agreement shall be construed and enforced in accordance with the laws of the Commonwealth of Pennsylvania without reference to conflict of laws principles. You consent to the personal jurisdiction of the courts of this County and waive their rights to venue outside of Allegheny County, Pennsylvania.
49 |
50 | ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, and arrangements between the parties relating hereto.
51 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### About this fork
2 |
3 | This fork contains **pure python version** of [Realtime Multi-Person Pose Estimation](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation). Initially it was forked from [Michal Faber fork](https://github.com/michalfaber/keras_Realtime_Multi-Person_Pose_Estimation), all credit for porting original work to Keras goes to him.
4 |
5 | I this fork I've reimplemented images argumentation in pure python, it is significanly shorter(**285** lines vs **1202** lines in Michal Faber's C++ **rmpe_server**, and way less than in original work)
6 |
7 | Despite of Python language this code is **significantly faster** than original implementation(140 images/s vs 30 images/s C++ code on my machine). This is not really useful since most of people don't have 5 GPUs, but just to prove the point python programs could be fast. The magic is in combining all affine transformations to one matrix, and calling single **warpAffine**, and vectorized numpy computation of PAFs and Heatmaps.
8 |
9 |
10 | Could be run as iterator inside **train_pose.py** (default), or as separate **./rmpe_server.py**
11 |
12 | #### Current status
13 | - [x] image augmentation: rotate, shift, scale, crop, flip (implemented as single affine transform, i.e. much faster)
14 | - [x] mask calculation: rotate, shift, scale, crop, flip
15 | - [x] joint heatmaps
16 | - [x] limbs part affinity fields
17 | - [x] quality is same as original work and bit better than Michal's version.
18 |
19 | #### Current work
20 | - [ ] Ability to easily modify config and train different models. See addins submodule for head detector example and example how to add new datasets(MPII, Brainwash)
21 |
22 |
23 | # Realtime Multi-Person Pose Estimation
24 | This is a keras version of project
25 |
26 | ## Introduction
27 | Code repo for reproducing [2017 CVPR](https://arxiv.org/abs/1611.08050) paper using keras.
28 |
29 | ## Results
30 |
31 |
32 |
33 |
34 |
35 |
36 |

37 |
38 |

39 |
40 |
41 |
42 | ## Contents
43 | 1. [Converting caffe model](#converting-caffe-model-to-keras-model)
44 | 2. [Testing](#testing-steps)
45 | 3. [Training](#training-steps)
46 |
47 | ## Require
48 | 1. [Keras](https://keras.io/)
49 | 2. [Caffe - docker](https://hub.docker.com/r/bvlc/caffe/) required if you would like to convert caffe model to keras model. You
50 | don't have to compile/install caffe on your local machine.
51 |
52 | ## Converting Caffe model to Keras model
53 | Authors of [original implementation](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation) released already trained caffe model
54 | which you can use to extract weights data.
55 |
56 | - Download caffe model `cd model; sh get_caffe_model.sh`
57 | - Dump caffe layers to numpy data `cd ..; docker run -v [absolute path to your keras_Realtime_Multi-Person_Pose_Estimation folder]:/workspace -it bvlc/caffe:cpu python dump_caffe_layers.py`
58 | Note that docker accepts only absolute paths so you have to set the full path to the folder containing this project.
59 | - Convert caffe model (from numpy data) to keras model `python caffe_to_keras.py`
60 |
61 | ## Testing steps
62 | - Convert caffe model to keras model or download already converted keras model https://www.dropbox.com/s/llpxd14is7gyj0z/model.h5
63 | - Run the notebook `demo.ipynb`.
64 | - `python demo_image.py --image sample_images/ski.jpg` to run the picture demo. Result will be stored in the file result.png. You can use
65 | any image file as an input.
66 | - `python demo_camera.py` to run the web demo.
67 |
68 | ## Training steps
69 |
70 | **UPDATE 26/10/2017**
71 |
72 | **Fixed problem with the training procedure.
73 | Here are my results after training for 5 epochs = 25000 iterations (1 epoch is ~5000 batches)
74 | The loss values are quite similar as in the original training - [output.txt](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation/blob/master/training/example_loss/output.txt)**
75 |
76 |
77 |
78 |
79 |
80 | **Results of running `demo_image --image sample_images/ski.jpg --model training/weights.best.h5` with model trained only 25000 iterations. Not too bad !!! Training on my single 1070 GPU took around 10 hours.**
81 |
82 |
83 |
84 |
85 |
86 | **UPDATE 22/10/2017:**
87 |
88 | **Augmented samples are fetched from the [server](https://github.com/michalfaber/rmpe_dataset_server). The network never sees the same image twice
89 | which was a problem in previous approach (tool rmpe_dataset_transformer)
90 | This allows you to run augmentation locally or on separate node.
91 | You can start 2 instances, one serving training set and a second one serving validation set (on different port if locally)**
92 |
93 | - Install gsutil `curl https://sdk.cloud.google.com | bash`. This is a really helpful tool for downloading large datasets.
94 | - Download the data set (~25 GB) `cd dataset; sh get_dataset.sh`,
95 | - Download [COCO official toolbox](https://github.com/pdollar/coco) in `dataset/coco/` .
96 | - `cd coco/PythonAPI; sudo python setup.py install` to install pycocotools.
97 | - Go to the "training" folder `cd ../../../training`.
98 | - Generate masks `python generate_masks.py`. Note: set the parameter "mode" in generate_masks.py (validation or training)
99 | - Create intermediate dataset `python generate_hdf5.py`. This tool creates a dataset in hdf5 format. The structure of this dataset is very similar to the
100 | original lmdb dataset where a sample is represented as an array: 5 x width x height (3 channels for image, 1 channel for metedata, 1 channel for miss masks)
101 | For MPI dataset there are 6 channels with additional all masks.
102 | Note: set the parameters `datasets` and `val_size` in `generate_hdf5.py`
103 | - Download and compile the dataset server [rmpe_dataset_server](https://github.com/michalfaber/rmpe_dataset_server).
104 | This server generates augmented samples on the fly. Source samples are retrieved from previously generated hdf5 dataset file.
105 | - Start training data server in the first terminal session.
106 | `./rmpe_dataset_server ../../keras_Realtime_Multi-Person_Pose_Estimation/dataset/train_dataset.h5 5555`
107 | - Start validation data server in a second terminal session.
108 | `./rmpe_dataset_server ../../keras_Realtime_Multi-Person_Pose_Estimation/dataset/val_dataset.h5 5556`
109 | - Optionally you can verify the datasets `inspect_dataset.ipynb`
110 | - Set the correct number of samples within `python train_pose.py` - variables "train_samples = ???" and "val_samples = ???".
111 | This number is used by keras to determine how many samples are in 1 epoch.
112 | - Train the model in a third terminal `python train_pose.py`
113 |
114 | ## Related repository
115 | - CVPR'16, [Convolutional Pose Machines](https://github.com/shihenw/convolutional-pose-machines-release).
116 | - CVPR'17, [Realtime Multi-Person Pose Estimation](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation).
117 |
118 | ## Citation
119 | Please cite the paper in your publications if it helps your research:
120 |
121 | @InProceedings{cao2017realtime,
122 | title = {Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields},
123 | author = {Zhe Cao and Tomas Simon and Shih-En Wei and Yaser Sheikh},
124 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
125 | year = {2017}
126 | }
127 |
128 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import numpy as np
4 |
5 |
6 | Configs = {}
7 |
8 | class CanonicalConfig:
9 |
10 | def __init__(self):
11 |
12 | self.width = 368
13 | self.height = 368
14 |
15 | self.stride = 8
16 |
17 | self.parts = ["nose", "neck", "Rsho", "Relb", "Rwri", "Lsho", "Lelb", "Lwri", "Rhip", "Rkne", "Rank", "Lhip", "Lkne", "Lank", "Reye", "Leye", "Rear", "Lear"]
18 | self.num_parts = len(self.parts)
19 | self.parts_dict = dict(zip(self.parts, range(self.num_parts)))
20 | self.parts += ["background"]
21 | self.num_parts_with_background = len(self.parts)
22 |
23 | leftParts, rightParts = CanonicalConfig.ltr_parts(self.parts_dict)
24 | self.leftParts = leftParts
25 | self.rightParts = rightParts
26 |
27 |
28 | # this numbers probably copied from matlab they are 1.. based not 0.. based
29 | self.limb_from = ['neck', 'Rhip', 'Rkne', 'neck', 'Lhip', 'Lkne', 'neck', 'Rsho', 'Relb', 'Rsho', 'neck', 'Lsho', 'Lelb', 'Lsho',
30 | 'neck', 'nose', 'nose', 'Reye', 'Leye']
31 | self.limb_to = ['Rhip', 'Rkne', 'Rank', 'Lhip', 'Lkne', 'Lank', 'Rsho', 'Relb', 'Rwri', 'Rear', 'Lsho', 'Lelb', 'Lwri', 'Lear',
32 | 'nose', 'Reye', 'Leye', 'Rear', 'Lear']
33 |
34 | self.limb_from = [ self.parts_dict[n] for n in self.limb_from ]
35 | self.limb_to = [ self.parts_dict[n] for n in self.limb_to ]
36 |
37 | assert self.limb_from == [x-1 for x in [2, 9, 10, 2, 12, 13, 2, 3, 4, 3, 2, 6, 7, 6, 2, 1, 1, 15, 16]]
38 | assert self.limb_to == [x-1 for x in [9, 10, 11, 12, 13, 14, 3, 4, 5, 17, 6, 7, 8, 18, 1, 15, 16, 17, 18]]
39 |
40 | self.limbs_conn = list(zip(self.limb_from, self.limb_to))
41 |
42 | self.paf_layers = 2*len(self.limbs_conn)
43 | self.heat_layers = self.num_parts
44 | self.num_layers = self.paf_layers + self.heat_layers + 1
45 |
46 | self.paf_start = 0
47 | self.heat_start = self.paf_layers
48 | self.bkg_start = self.paf_layers + self.heat_layers
49 |
50 | #self.data_shape = (self.height, self.width, 3) # 368, 368, 3
51 | self.mask_shape = (self.height//self.stride, self.width//self.stride) # 46, 46
52 | self.parts_shape = (self.height//self.stride, self.width//self.stride, self.num_layers) # 46, 46, 57
53 |
54 | class TransformationParams:
55 |
56 | def __init__(self):
57 | self.target_dist = 0.6;
58 | self.scale_prob = 1; # TODO: this is actually scale unprobability, i.e. 1 = off, 0 = always, not sure if it is a bug or not
59 | self.scale_min = 0.5;
60 | self.scale_max = 1.1;
61 | self.max_rotate_degree = 40.
62 | self.center_perterb_max = 40.
63 | self.flip_prob = 0.5
64 | self.sigma = 7.
65 | self.paf_thre = 8. # it is original 1.0 * stride in this program
66 |
67 | self.transform_params = TransformationParams()
68 |
69 | @staticmethod
70 | def ltr_parts(parts_dict):
71 | # when we flip image left parts became right parts and vice versa. This is the list of parts to exchange each other.
72 | leftParts = [ parts_dict[p] for p in ["Lsho", "Lelb", "Lwri", "Lhip", "Lkne", "Lank", "Leye", "Lear"] ]
73 | rightParts = [ parts_dict[p] for p in ["Rsho", "Relb", "Rwri", "Rhip", "Rkne", "Rank", "Reye", "Rear"] ]
74 | return leftParts,rightParts
75 |
76 |
77 |
78 | class COCOSourceConfig:
79 |
80 |
81 | def __init__(self, hdf5_source):
82 |
83 | self.hdf5_source = hdf5_source
84 | self.parts = ['nose', 'Leye', 'Reye', 'Lear', 'Rear', 'Lsho', 'Rsho', 'Lelb',
85 | 'Relb', 'Lwri', 'Rwri', 'Lhip', 'Rhip', 'Lkne', 'Rkne', 'Lank',
86 | 'Rank']
87 |
88 | self.num_parts = len(self.parts)
89 |
90 | # for COCO neck is calculated like mean of 2 shoulders.
91 | self.parts_dict = dict(zip(self.parts, range(self.num_parts)))
92 |
93 | def convert(self, meta, global_config):
94 |
95 | joints = np.array(meta['joints'])
96 |
97 | assert joints.shape[1] == len(self.parts)
98 |
99 | result = np.zeros((joints.shape[0], global_config.num_parts, 3), dtype=np.float)
100 | result[:,:,2]=3. # OURS - # 3 never marked up in this dataset, 2 - not marked up in this person, 1 - marked and visible, 0 - marked but invisible
101 |
102 | for p in self.parts:
103 | coco_id = self.parts_dict[p]
104 |
105 | if p in global_config.parts_dict:
106 | global_id = global_config.parts_dict[p]
107 | assert global_id!=1, "neck shouldn't be known yet"
108 | result[:,global_id,:]=joints[:,coco_id,:]
109 |
110 | if 'neck' in global_config.parts_dict:
111 | neckG = global_config.parts_dict['neck']
112 | RshoC = self.parts_dict['Rsho']
113 | LshoC = self.parts_dict['Lsho']
114 |
115 | # no neck in coco database, we calculate it as average of shoulders
116 | # TODO: we use 0 - hidden, 1 visible, 2 absent - it is not coco values they processed by generate_hdf5
117 | both_shoulders_known = (joints[:, LshoC, 2]<2) & (joints[:, RshoC, 2] < 2)
118 |
119 | result[~both_shoulders_known, neckG, 2] = 2. # otherwise they will be 3. aka 'never marked in this dataset'
120 |
121 | result[both_shoulders_known, neckG, 0:2] = (joints[both_shoulders_known, RshoC, 0:2] +
122 | joints[both_shoulders_known, LshoC, 0:2]) / 2
123 | result[both_shoulders_known, neckG, 2] = np.minimum(joints[both_shoulders_known, RshoC, 2],
124 | joints[both_shoulders_known, LshoC, 2])
125 |
126 | meta['joints'] = result
127 |
128 | return meta
129 |
130 | def convert_mask(self, mask, global_config, joints = None):
131 |
132 | mask = np.repeat(mask[:,:,np.newaxis], global_config.num_layers, axis=2)
133 | return mask
134 |
135 | def source(self):
136 |
137 | return self.hdf5_source
138 |
139 |
140 |
141 | # more information on keypoints mapping is here
142 | # https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation/issues/7
143 |
144 |
145 | Configs["Canonical"] = CanonicalConfig
146 |
147 |
148 | def GetConfig(config_name):
149 |
150 | config = Configs[config_name]()
151 |
152 | dct = config.parts[:]
153 | dct = [None]*(config.num_layers-len(dct)) + dct
154 |
155 | for (i,(fr,to)) in enumerate(config.limbs_conn):
156 | name = "%s->%s" % (config.parts[fr], config.parts[to])
157 | print(i, name)
158 | x = i*2
159 | y = i*2+1
160 |
161 | assert dct[x] is None
162 | dct[x] = name + ":x"
163 | assert dct[y] is None
164 | dct[y] = name + ":y"
165 |
166 | from pprint import pprint
167 | pprint(dict(zip(range(len(dct)), dct)))
168 |
169 | return config
170 |
171 | if __name__ == "__main__":
172 |
173 | # test it
174 | foo = GetConfig("Canonical")
175 | print(foo.paf_layers, foo.heat_layers)
176 |
177 |
178 |
--------------------------------------------------------------------------------
/dataset/coco/INFO:
--------------------------------------------------------------------------------
1 | Folder for https://github.com/pdollar/coco files
--------------------------------------------------------------------------------
/dataset/get_dataset.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Install gsutil which provides tools for efficiently accessing datasets
4 | # without unzipping large files.
5 | # Install gsutil via:curl https://sdk.cloud.google.com | bash
6 |
7 | mkdir train2017
8 | mkdir val2017
9 | mkdir test2017
10 | mkdir annotations
11 |
12 | echo "Downloading train2017..."
13 | gsutil -m rsync gs://images.cocodataset.org/train2017 train2017
14 |
15 | echo "Downloading val2017..."
16 | gsutil -m rsync gs://images.cocodataset.org/val2017 val2017
17 |
18 | echo "Downloading test2017..."
19 | gsutil -m rsync gs://images.cocodataset.org/test2017 test2017
20 |
21 | echo "Downloading annotations..."
22 | gsutil -m rsync gs://images.cocodataset.org/annotations annotations
23 |
24 |
--------------------------------------------------------------------------------
/demo_camera.py:
--------------------------------------------------------------------------------
1 | # TODO
--------------------------------------------------------------------------------
/demo_image.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import cv2
3 | import math
4 | import time
5 | import numpy as np
6 | import util
7 | from config_reader import config_reader
8 | from scipy.ndimage.filters import gaussian_filter
9 | from model import get_testing_model
10 |
11 |
12 | # find connection in the specified sequence, center 29 is in the position 15
13 | limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
14 | [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
15 | [1, 16], [16, 18], [3, 17], [6, 18]]
16 |
17 | # the middle joints heatmap correpondence
18 | mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
19 | [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
20 | [55, 56], [37, 38], [45, 46]]
21 |
22 | # visualize
23 | colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0],
24 | [0, 255, 0], \
25 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255],
26 | [85, 0, 255], \
27 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
28 |
29 |
30 | def process (input_image, params, model_params):
31 |
32 | oriImg = cv2.imread(input_image) # B,G,R order
33 | multiplier = [x * model_params['boxsize'] / oriImg.shape[0] for x in params['scale_search']]
34 |
35 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
36 | paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
37 |
38 | for m in range(len(multiplier)):
39 | scale = multiplier[m]
40 |
41 | imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
42 | imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_params['stride'],
43 | model_params['padValue'])
44 |
45 | input_img = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,0,1,2)) # required shape (1, width, height, channels)
46 |
47 | output_blobs = model.predict(input_img)
48 |
49 | # extract outputs, resize, and remove padding
50 | heatmap = np.squeeze(output_blobs[1]) # output 1 is heatmaps
51 | heatmap = cv2.resize(heatmap, (0, 0), fx=model_params['stride'], fy=model_params['stride'],
52 | interpolation=cv2.INTER_CUBIC)
53 | heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3],
54 | :]
55 | heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
56 |
57 | paf = np.squeeze(output_blobs[0]) # output 0 is PAFs
58 | paf = cv2.resize(paf, (0, 0), fx=model_params['stride'], fy=model_params['stride'],
59 | interpolation=cv2.INTER_CUBIC)
60 | paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
61 | paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
62 |
63 | heatmap_avg = heatmap_avg + heatmap / len(multiplier)
64 | paf_avg = paf_avg + paf / len(multiplier)
65 |
66 | all_peaks = []
67 | peak_counter = 0
68 |
69 | for part in range(18):
70 | map_ori = heatmap_avg[:, :, part]
71 | map = gaussian_filter(map_ori, sigma=3)
72 |
73 | map_left = np.zeros(map.shape)
74 | map_left[1:, :] = map[:-1, :]
75 | map_right = np.zeros(map.shape)
76 | map_right[:-1, :] = map[1:, :]
77 | map_up = np.zeros(map.shape)
78 | map_up[:, 1:] = map[:, :-1]
79 | map_down = np.zeros(map.shape)
80 | map_down[:, :-1] = map[:, 1:]
81 |
82 | peaks_binary = np.logical_and.reduce(
83 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > params['thre1']))
84 | peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
85 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
86 | id = range(peak_counter, peak_counter + len(peaks))
87 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
88 |
89 | all_peaks.append(peaks_with_score_and_id)
90 | peak_counter += len(peaks)
91 |
92 | connection_all = []
93 | special_k = []
94 | mid_num = 10
95 |
96 | for k in range(len(mapIdx)):
97 | score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
98 | candA = all_peaks[limbSeq[k][0] - 1]
99 | candB = all_peaks[limbSeq[k][1] - 1]
100 | nA = len(candA)
101 | nB = len(candB)
102 | indexA, indexB = limbSeq[k]
103 | if (nA != 0 and nB != 0):
104 | connection_candidate = []
105 | for i in range(nA):
106 | for j in range(nB):
107 | vec = np.subtract(candB[j][:2], candA[i][:2])
108 | norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
109 | # failure case when 2 body parts overlaps
110 | if norm == 0:
111 | continue
112 | vec = np.divide(vec, norm)
113 |
114 | startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
115 | np.linspace(candA[i][1], candB[j][1], num=mid_num)))
116 |
117 | vec_x = np.array(
118 | [score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
119 | for I in range(len(startend))])
120 | vec_y = np.array(
121 | [score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
122 | for I in range(len(startend))])
123 |
124 | score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
125 | score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
126 | 0.5 * oriImg.shape[0] / norm - 1, 0)
127 | criterion1 = len(np.nonzero(score_midpts > params['thre2'])[0]) > 0.8 * len(
128 | score_midpts)
129 | criterion2 = score_with_dist_prior > 0
130 | if criterion1 and criterion2:
131 | connection_candidate.append([i, j, score_with_dist_prior,
132 | score_with_dist_prior + candA[i][2] + candB[j][2]])
133 |
134 | connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
135 | connection = np.zeros((0, 5))
136 | for c in range(len(connection_candidate)):
137 | i, j, s = connection_candidate[c][0:3]
138 | if (i not in connection[:, 3] and j not in connection[:, 4]):
139 | connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
140 | if (len(connection) >= min(nA, nB)):
141 | break
142 |
143 | connection_all.append(connection)
144 | else:
145 | special_k.append(k)
146 | connection_all.append([])
147 |
148 | # last number in each row is the total parts number of that person
149 | # the second last number in each row is the score of the overall configuration
150 | subset = -1 * np.ones((0, 20))
151 | candidate = np.array([item for sublist in all_peaks for item in sublist])
152 |
153 | for k in range(len(mapIdx)):
154 | if k not in special_k:
155 | partAs = connection_all[k][:, 0]
156 | partBs = connection_all[k][:, 1]
157 | indexA, indexB = np.array(limbSeq[k]) - 1
158 |
159 | for i in range(len(connection_all[k])): # = 1:size(temp,1)
160 | found = 0
161 | subset_idx = [-1, -1]
162 | for j in range(len(subset)): # 1:size(subset,1):
163 | if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
164 | subset_idx[found] = j
165 | found += 1
166 |
167 | if found == 1:
168 | j = subset_idx[0]
169 | if (subset[j][indexB] != partBs[i]):
170 | subset[j][indexB] = partBs[i]
171 | subset[j][-1] += 1
172 | subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
173 | elif found == 2: # if found 2 and disjoint, merge them
174 | j1, j2 = subset_idx
175 | membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
176 | if len(np.nonzero(membership == 2)[0]) == 0: # merge
177 | subset[j1][:-2] += (subset[j2][:-2] + 1)
178 | subset[j1][-2:] += subset[j2][-2:]
179 | subset[j1][-2] += connection_all[k][i][2]
180 | subset = np.delete(subset, j2, 0)
181 | else: # as like found == 1
182 | subset[j1][indexB] = partBs[i]
183 | subset[j1][-1] += 1
184 | subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
185 |
186 | # if find no partA in the subset, create a new subset
187 | elif not found and k < 17:
188 | row = -1 * np.ones(20)
189 | row[indexA] = partAs[i]
190 | row[indexB] = partBs[i]
191 | row[-1] = 2
192 | row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + \
193 | connection_all[k][i][2]
194 | subset = np.vstack([subset, row])
195 |
196 | # delete some rows of subset which has few parts occur
197 | deleteIdx = [];
198 | for i in range(len(subset)):
199 | if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
200 | deleteIdx.append(i)
201 | subset = np.delete(subset, deleteIdx, axis=0)
202 |
203 | canvas = cv2.imread(input_image) # B,G,R order
204 | for i in range(18):
205 | for j in range(len(all_peaks[i])):
206 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1)
207 |
208 | stickwidth = 4
209 |
210 | for i in range(17):
211 | for n in range(len(subset)):
212 | index = subset[n][np.array(limbSeq[i]) - 1]
213 | if -1 in index:
214 | continue
215 | cur_canvas = canvas.copy()
216 | Y = candidate[index.astype(int), 0]
217 | X = candidate[index.astype(int), 1]
218 | mX = np.mean(X)
219 | mY = np.mean(Y)
220 | length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
221 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
222 | polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0,
223 | 360, 1)
224 | cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
225 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
226 |
227 | return canvas
228 |
229 | if __name__ == '__main__':
230 | parser = argparse.ArgumentParser()
231 | parser.add_argument('--image', type=str, required=True, help='input image')
232 | parser.add_argument('--output', type=str, default='result.png', help='output image')
233 | parser.add_argument('--model', type=str, default='model/keras/model.h5', help='path to the weights file')
234 |
235 | args = parser.parse_args()
236 | input_image = args.image
237 | output = args.output
238 | keras_weights_file = args.model
239 |
240 | tic = time.time()
241 | print('start processing...')
242 |
243 | # load model
244 |
245 | # authors of original model don't use
246 | # vgg normalization (subtracting mean) on input images
247 | model = get_testing_model()
248 | model.load_weights(keras_weights_file)
249 |
250 | # load config
251 | params, model_params = config_reader()
252 |
253 | # generate image with body parts
254 | canvas = process(input_image, params, model_params)
255 |
256 | toc = time.time()
257 | print ('processing time is %.5f' % (toc - tic))
258 |
259 | cv2.imwrite(output, canvas)
260 |
261 | cv2.destroyAllWindows()
262 |
263 |
264 |
265 |
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | from keras.models import Model
2 | from keras.layers.merge import Concatenate
3 | from keras.layers import Activation, Input, Lambda
4 | from keras.layers.convolutional import Conv2D
5 | from keras.layers.pooling import MaxPooling2D
6 | from keras.layers.merge import Multiply
7 | from keras.regularizers import l2
8 | from keras.initializers import random_normal, constant
9 |
10 | import re
11 |
12 |
13 | #stages = 6
14 | #np_branch1 = 38
15 | #np_branch2 = 19
16 |
17 | def relu(x): return Activation('relu')(x)
18 |
19 | def conv(x, nf, ks, name, weight_decay):
20 | kernel_reg = l2(weight_decay[0]) if weight_decay else None
21 | bias_reg = l2(weight_decay[1]) if weight_decay else None
22 |
23 | x = Conv2D(nf, (ks, ks), padding='same', name=name,
24 | kernel_regularizer=kernel_reg,
25 | bias_regularizer=bias_reg,
26 | kernel_initializer=random_normal(stddev=0.01),
27 | bias_initializer=constant(0.0))(x)
28 | return x
29 |
30 | def pooling(x, ks, st, name):
31 | x = MaxPooling2D((ks, ks), strides=(st, st), name=name)(x)
32 | return x
33 |
34 | def vgg_block(x, weight_decay):
35 | # Block 1
36 | x = conv(x, 64, 3, "conv1_1", (weight_decay, 0))
37 | x = relu(x)
38 | x = conv(x, 64, 3, "conv1_2", (weight_decay, 0))
39 | x = relu(x)
40 | x = pooling(x, 2, 2, "pool1_1")
41 |
42 | # Block 2
43 | x = conv(x, 128, 3, "conv2_1", (weight_decay, 0))
44 | x = relu(x)
45 | x = conv(x, 128, 3, "conv2_2", (weight_decay, 0))
46 | x = relu(x)
47 | x = pooling(x, 2, 2, "pool2_1")
48 |
49 | # Block 3
50 | x = conv(x, 256, 3, "conv3_1", (weight_decay, 0))
51 | x = relu(x)
52 | x = conv(x, 256, 3, "conv3_2", (weight_decay, 0))
53 | x = relu(x)
54 | x = conv(x, 256, 3, "conv3_3", (weight_decay, 0))
55 | x = relu(x)
56 | x = conv(x, 256, 3, "conv3_4", (weight_decay, 0))
57 | x = relu(x)
58 | x = pooling(x, 2, 2, "pool3_1")
59 |
60 | # Block 4
61 | x = conv(x, 512, 3, "conv4_1", (weight_decay, 0))
62 | x = relu(x)
63 | x = conv(x, 512, 3, "conv4_2", (weight_decay, 0))
64 | x = relu(x)
65 |
66 | # Additional non vgg layers
67 | x = conv(x, 256, 3, "conv4_3_CPM", (weight_decay, 0))
68 | x = relu(x)
69 | x = conv(x, 128, 3, "conv4_4_CPM", (weight_decay, 0))
70 | x = relu(x)
71 |
72 | return x
73 |
74 |
75 | def stage1_block(x, num_p, branch, weight_decay):
76 | # Block 1
77 | x = conv(x, 128, 3, "Mconv1_stage1_L%d" % branch, (weight_decay, 0))
78 | x = relu(x)
79 | x = conv(x, 128, 3, "Mconv2_stage1_L%d" % branch, (weight_decay, 0))
80 | x = relu(x)
81 | x = conv(x, 128, 3, "Mconv3_stage1_L%d" % branch, (weight_decay, 0))
82 | x = relu(x)
83 | x = conv(x, 512, 1, "Mconv4_stage1_L%d" % branch, (weight_decay, 0))
84 | x = relu(x)
85 | x = conv(x, num_p, 1, "Mconv5_stage1_L%d" % branch, (weight_decay, 0))
86 |
87 | return x
88 |
89 |
90 | def stageT_block(x, num_p, stage, branch, weight_decay):
91 | # Block 1
92 | x = conv(x, 128, 7, "Mconv1_stage%d_L%d" % (stage, branch), (weight_decay, 0))
93 | x = relu(x)
94 | x = conv(x, 128, 7, "Mconv2_stage%d_L%d" % (stage, branch), (weight_decay, 0))
95 | x = relu(x)
96 | x = conv(x, 128, 7, "Mconv3_stage%d_L%d" % (stage, branch), (weight_decay, 0))
97 | x = relu(x)
98 | x = conv(x, 128, 7, "Mconv4_stage%d_L%d" % (stage, branch), (weight_decay, 0))
99 | x = relu(x)
100 | x = conv(x, 128, 7, "Mconv5_stage%d_L%d" % (stage, branch), (weight_decay, 0))
101 | x = relu(x)
102 | x = conv(x, 128, 1, "Mconv6_stage%d_L%d" % (stage, branch), (weight_decay, 0))
103 | x = relu(x)
104 | x = conv(x, num_p, 1, "Mconv7_stage%d_L%d" % (stage, branch), (weight_decay, 0))
105 |
106 | return x
107 |
108 |
109 | def apply_mask(x, mask1, mask2, num_p, stage, branch, np_branch1, np_branch2):
110 | w_name = "weight_stage%d_L%d" % (stage, branch)
111 |
112 | # TODO: we have branch number here why we made so strange check
113 | assert np_branch1 != np_branch2 # we selecting branches by number of pafs, if they accidentally became the same it will be disaster
114 |
115 | if num_p == np_branch1:
116 | w = Multiply(name=w_name)([x, mask1]) # vec_weight
117 | elif num_p == np_branch2:
118 | w = Multiply(name=w_name)([x, mask2]) # vec_heat
119 | else:
120 | assert False, "wrong number of layers num_p=%d " % num_p
121 | return w
122 |
123 |
124 | def get_training_model(weight_decay, np_branch1, np_branch2, stages = 6, gpus = None):
125 |
126 | img_input_shape = (None, None, 3)
127 | vec_input_shape = (None, None, np_branch1)
128 | heat_input_shape = (None, None, np_branch2)
129 |
130 | inputs = []
131 | outputs = []
132 |
133 | img_input = Input(shape=img_input_shape)
134 | vec_weight_input = Input(shape=vec_input_shape)
135 | heat_weight_input = Input(shape=heat_input_shape)
136 |
137 | inputs.append(img_input)
138 | if np_branch1 > 0:
139 | inputs.append(vec_weight_input)
140 |
141 | if np_branch2 > 0:
142 | inputs.append(heat_weight_input)
143 |
144 | #img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5]
145 | img_normalized = img_input # will be done on augmentation stage
146 |
147 | # VGG
148 | stage0_out = vgg_block(img_normalized, weight_decay)
149 |
150 | # stage 1 - branch 1 (PAF)
151 | new_x = []
152 | if np_branch1 > 0:
153 | stage1_branch1_out = stage1_block(stage0_out, np_branch1, 1, weight_decay)
154 | w1 = apply_mask(stage1_branch1_out, vec_weight_input, heat_weight_input, np_branch1, 1, 1, np_branch1, np_branch2)
155 | outputs.append(w1)
156 | new_x.append(stage1_branch1_out)
157 |
158 | # stage 1 - branch 2 (confidence maps)
159 |
160 | if np_branch2 > 0:
161 | stage1_branch2_out = stage1_block(stage0_out, np_branch2, 2, weight_decay)
162 | w2 = apply_mask(stage1_branch2_out, vec_weight_input, heat_weight_input, np_branch2, 1, 2, np_branch1, np_branch2)
163 | outputs.append(w2)
164 | new_x.append(stage1_branch2_out)
165 |
166 | new_x.append(stage0_out)
167 |
168 | x = Concatenate()(new_x)
169 |
170 | # stage sn >= 2
171 | for sn in range(2, stages + 1):
172 |
173 | new_x = []
174 | # stage SN - branch 1 (PAF)
175 | if np_branch1 > 0:
176 | stageT_branch1_out = stageT_block(x, np_branch1, sn, 1, weight_decay)
177 | w1 = apply_mask(stageT_branch1_out, vec_weight_input, heat_weight_input, np_branch1, sn, 1, np_branch1, np_branch2)
178 | outputs.append(w1)
179 | new_x.append(stageT_branch1_out)
180 |
181 | # stage SN - branch 2 (confidence maps)
182 | if np_branch2 > 0:
183 | stageT_branch2_out = stageT_block(x, np_branch2, sn, 2, weight_decay)
184 | w2 = apply_mask(stageT_branch2_out, vec_weight_input, heat_weight_input, np_branch2, sn, 2, np_branch1, np_branch2)
185 | outputs.append(w2)
186 | new_x.append(stageT_branch2_out)
187 |
188 | new_x.append(stage0_out)
189 |
190 | if sn < stages:
191 | x = Concatenate()(new_x)
192 |
193 | model = Model(inputs=inputs, outputs=outputs)
194 | return model
195 |
196 | def get_lrmult(model):
197 |
198 | # setup lr multipliers for conv layers
199 | lr_mult = dict()
200 |
201 | for layer in model.layers:
202 |
203 | if isinstance(layer, Conv2D):
204 |
205 | # stage = 1
206 | if re.match("Mconv\d_stage1.*", layer.name):
207 | kernel_name = layer.weights[0].name
208 | bias_name = layer.weights[1].name
209 | lr_mult[kernel_name] = 1
210 | lr_mult[bias_name] = 2
211 |
212 | # stage > 1
213 | elif re.match("Mconv\d_stage.*", layer.name):
214 | kernel_name = layer.weights[0].name
215 | bias_name = layer.weights[1].name
216 | lr_mult[kernel_name] = 4
217 | lr_mult[bias_name] = 8
218 |
219 | # vgg
220 | else:
221 | print("matched as vgg layer", layer.name)
222 | kernel_name = layer.weights[0].name
223 | bias_name = layer.weights[1].name
224 | lr_mult[kernel_name] = 1
225 | lr_mult[bias_name] = 2
226 |
227 | return lr_mult
228 |
229 |
230 | def get_testing_model(np_branch1, np_branch2, stages = 6):
231 |
232 | img_input_shape = (None, None, 3)
233 |
234 | img_input = Input(shape=img_input_shape)
235 |
236 | img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5]
237 |
238 | # VGG
239 | stage0_out = vgg_block(img_normalized, None)
240 |
241 | stages_out = []
242 |
243 | # stage 1 - branch 1 (PAF)
244 | if np_branch1 > 0:
245 | stage1_branch1_out = stage1_block(stage0_out, np_branch1, 1, None)
246 | stages_out.append(stage1_branch1_out)
247 |
248 | # stage 1 - branch 2 (confidence maps)
249 | if np_branch2 > 0:
250 | stage1_branch2_out = stage1_block(stage0_out, np_branch2, 2, None)
251 | stages_out.append(stage1_branch2_out)
252 |
253 | x = Concatenate()(stages_out + [stage0_out])
254 |
255 | # stage t >= 2
256 | stageT_branch1_out = None
257 | stageT_branch2_out = None
258 | for sn in range(2, stages + 1):
259 |
260 | stages_out = []
261 |
262 | if np_branch1 > 0:
263 | stageT_branch1_out = stageT_block(x, np_branch1, sn, 1, None)
264 | stages_out.append(stageT_branch1_out)
265 | if np_branch2 > 0:
266 | stageT_branch2_out = stageT_block(x, np_branch2, sn, 2, None)
267 | stages_out.append(stageT_branch2_out)
268 |
269 | if sn < stages:
270 | x = Concatenate()(stages_out + [stage0_out])
271 |
272 | model = Model(inputs=[img_input], outputs=[stageT_branch1_out, stageT_branch2_out])
273 |
274 | return model
--------------------------------------------------------------------------------
/model/caffe/layers/INFO:
--------------------------------------------------------------------------------
1 | Folder for layers extracted from the caffe model by the tool dump_caffe_layers.py
--------------------------------------------------------------------------------
/model/caffe_to_keras.py:
--------------------------------------------------------------------------------
1 | from model import get_testing_model
2 | import numpy as np
3 | import os
4 |
5 | CAFFE_LAYERS_DIR = "model/caffe/layers"
6 | KERAS_MODEL_FILE = "model/keras/model.h5"
7 |
8 | m = get_testing_model()
9 |
10 | for layer in m.layers:
11 | layer_name = layer.name
12 | if (os.path.exists(os.path.join(CAFFE_LAYERS_DIR, "W_%s.npy" % layer_name))):
13 | w = np.load(os.path.join(CAFFE_LAYERS_DIR, "W_%s.npy" % layer_name))
14 | b = np.load(os.path.join(CAFFE_LAYERS_DIR, "b_%s.npy" % layer_name))
15 |
16 | w = np.transpose(w, (2, 3, 1, 0))
17 |
18 | layer_weights = [w, b]
19 | layer.set_weights(layer_weights)
20 |
21 | m.save_weights(KERAS_MODEL_FILE)
22 |
23 | print("Done !")
--------------------------------------------------------------------------------
/model/dump_caffe_layers.py:
--------------------------------------------------------------------------------
1 | #
2 | # Run this file from docker:
3 | #
4 | # docker run -v [absolute path to your keras_Realtime_Multi-Person_Pose_Estimation folder]:/workspace -it bvlc/caffe:cpu python dump_caffe_layers.py
5 | #
6 |
7 | from __future__ import division, print_function
8 | import caffe
9 | import numpy as np
10 | import os
11 |
12 | layers_output = 'model/caffe/layers'
13 | caffe_model = 'model/caffe/_trained_COCO/pose_iter_440000.caffemodel'
14 | caffe_proto = 'model/caffe/_trained_COCO/pose_deploy.prototxt'
15 |
16 | caffe.set_mode_cpu()
17 | net = caffe.Net(caffe_proto, caffe_model, caffe.TEST)
18 |
19 | # layer names and output shapes
20 | for layer_name, blob in net.blobs.iteritems():
21 | print(layer_name, blob.data.shape)
22 |
23 | # write out weight matrices and bias vectors
24 | for k, v in net.params.items():
25 | print(k, v[0].data.shape, v[1].data.shape)
26 | np.save(os.path.join(layers_output, "W_{:s}.npy".format(k)), v[0].data)
27 | np.save(os.path.join(layers_output, "b_{:s}.npy".format(k)), v[1].data)
28 |
29 | print("Done !")
30 |
--------------------------------------------------------------------------------
/model/get_caffe_model.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | wget -nc --directory-prefix=./caffe/_trained_COCO/ http://posefs1.perception.cs.cmu.edu/Users/ZheCao/pose_iter_440000.caffemodel
4 | wget -nc --directory-prefix=./caffe/_trained_MPI/ http://posefs1.perception.cs.cmu.edu/Users/ZheCao/pose_iter_146000.caffemodel
5 |
--------------------------------------------------------------------------------
/model/get_keras_model.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | wget -nc --directory-prefix=./keras/ https://www.dropbox.com/s/llpxd14is7gyj0z/model.h5
--------------------------------------------------------------------------------
/py_rmpe_server/py_rmpe_data_iterator.py:
--------------------------------------------------------------------------------
1 |
2 | import h5py
3 | import random
4 | import json
5 | import numpy as np
6 | import cv2
7 |
8 | from py_rmpe_server.py_rmpe_transformer import Transformer, AugmentSelection
9 | from py_rmpe_server.py_rmpe_heatmapper import Heatmapper
10 |
11 | from time import time
12 |
13 | class RawDataIterator:
14 |
15 | def __init__(self, global_config, configs, shuffle = True, augment = True):
16 |
17 | self.global_config = global_config
18 |
19 | if not isinstance(configs, (list,tuple)):
20 | configs = [configs]
21 |
22 | self.h5files = [c.source() for c in configs]
23 | self.configs = configs
24 | self.h5s = [h5py.File(fname, "r") for fname in self.h5files]
25 | self.datums = [ h5['datum'] if 'datum' in h5 else (h5['dataset'], h5['images'], h5['masks'] if 'masks' in h5 else None) for h5 in self.h5s ]
26 |
27 | self.heatmapper = Heatmapper(global_config)
28 | self.transformer = Transformer(global_config)
29 | self.augment = augment
30 | self.shuffle = shuffle
31 |
32 | self.keys = []
33 |
34 | for n,d in enumerate(self.datums):
35 | if isinstance(d, (list, tuple)):
36 | k = list(d[0].keys())
37 | else:
38 | k = list(d.keys())
39 |
40 | print(len(k))
41 |
42 | self.keys += zip([n] * len(k), k)
43 |
44 | def gen(self, timing = False):
45 |
46 | if self.shuffle:
47 | random.shuffle(self.keys)
48 |
49 | for num, key in self.keys:
50 |
51 | read_start = time()
52 | image, mask, meta, debug = self.read_data(num, key)
53 |
54 | aug_start = time()
55 |
56 | # transform picture
57 | assert mask.dtype == np.uint8, mask.dtype
58 | image, mask, meta = self.transformer.transform(image, mask, meta, aug=None if self.augment else AugmentSelection.unrandom())
59 | assert mask.dtype == np.float, mask.dtype
60 |
61 | # we need layered mask on next stage
62 | mask = self.configs[num].convert_mask(mask, self.global_config, joints = meta['joints'])
63 |
64 | # create heatmaps and pafs
65 | labels = self.heatmapper.create_heatmaps(meta['joints'], mask)
66 |
67 | # normalize image to save gpu/cpu time for keras
68 | image = image/256.0 - 0.5
69 |
70 | if timing:
71 | yield image, mask, labels, meta['joints'], time()-read_start, time()-aug_start
72 | else:
73 | yield image, mask, labels, meta['joints']
74 |
75 | def num_keys(self):
76 |
77 | return len(self.keys)
78 |
79 | def read_data(self, num, key):
80 |
81 | config = self.configs[num]
82 | datum = self.datums[num]
83 | if isinstance(datum, (list, tuple)):
84 | dataset, images, masks = datum
85 | return self.read_data_new(dataset, images, masks, key, config)
86 | else:
87 | return self.read_data_old(datum, key, config)
88 |
89 |
90 | def read_data_old(self, datum, key, config):
91 |
92 | entry = datum[key]
93 |
94 | assert 'meta' in entry.attrs, "No 'meta' attribute in .h5 file. Did you generate .h5 with new code?"
95 |
96 | debug = json.loads(entry.attrs['meta'])
97 | meta = {}
98 | meta["objpos"]=debug["objpos"]
99 | meta["scale_provided"] = debug["scale_provided"]
100 | meta["joints"] = debug["joints"]
101 |
102 | meta = config.convert(meta, self.global_config)
103 | data = entry.value
104 |
105 | if data.shape[0] <= 6:
106 | # TODO: this is extra work, should write in store in correct format (not transposed)
107 | # can't do now because I want storage compatibility yet
108 | # we need image in classical not transposed format in this program for warp affine
109 | data = data.transpose([1,2,0])
110 |
111 | img = data[:,:,0:3]
112 | mask_miss = data[:,:,4]
113 | #mask = data[:,:,5]
114 |
115 | return img, mask_miss, meta, debug
116 |
117 | def read_data_new(self, dataset, images, masks, key, config):
118 |
119 | entry = dataset[key]
120 |
121 | assert 'meta' in entry.attrs, "No 'meta' attribute in .h5 file. Did you generate .h5 with new code?"
122 |
123 | meta = json.loads(entry.value)
124 | debug = json.loads(entry.attrs['meta'])
125 | meta = config.convert(meta, self.global_config)
126 |
127 | img = images[meta['image']].value
128 | mask_miss = None
129 |
130 | if len(img.shape)==2 and img.shape[1]==1:
131 | img = cv2.imdecode(img, flags=-1)
132 |
133 | if img.shape[2]>3:
134 | mask_miss = img[:, :, 3]
135 | img = img[:, :, 0:3]
136 |
137 | if mask_miss is None:
138 | if masks is not None:
139 | mask_miss = masks[meta['image']].value
140 | if len(mask_miss.shape) == 2 and mask_miss.shape[1]==1:
141 | mask_miss = cv2.imdecode(mask_miss, flags = -1)
142 |
143 | if mask_miss is None:
144 | mask_miss = np.full((img.shape[0], img.shape[1]), fill_value=255, dtype=np.uint8)
145 |
146 |
147 | return img, mask_miss, meta, debug
148 |
149 | def __del__(self):
150 |
151 | if 'h5s' in vars(self):
152 | for h5 in self.h5s:
153 | h5.close()
154 |
--------------------------------------------------------------------------------
/py_rmpe_server/py_rmpe_heatmapper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import numpy as np
4 | from math import sqrt, isnan
5 |
6 | class Heatmapper:
7 |
8 | def __init__(self, config):
9 |
10 | self.config = config
11 | sigma = config.transform_params.sigma
12 | thre = config.transform_params.paf_thre
13 |
14 | self.double_sigma2 = 2 * sigma * sigma
15 | self.thre = thre
16 |
17 | # cached common parameters which same for all iterations and all pictures
18 |
19 | stride = self.config.stride
20 | width = self.config.width//stride
21 | height = self.config.height//stride
22 |
23 | # this is coordinates of centers of bigger grid
24 | self.grid_x = np.arange(width)*stride + stride/2-0.5
25 | self.grid_y = np.arange(height)*stride + stride/2-0.5
26 |
27 | self.Y, self.X = np.mgrid[0:self.config.height:stride, 0:self.config.width:stride]
28 |
29 | # TODO: check it again
30 | # basically we should use center of grid, but in this place classic implementation uses left-top point.
31 | # self.X = self.X + stride / 2 - 0.5
32 | # self.Y = self.Y + stride / 2 - 0.5
33 |
34 |
35 | def create_heatmaps(self, joints, mask):
36 |
37 | heatmaps = np.zeros(self.config.parts_shape, dtype=np.float)
38 |
39 | self.put_joints(heatmaps, joints)
40 | sl = slice(self.config.heat_start, self.config.heat_start + self.config.heat_layers)
41 | heatmaps[:,:,self.config.bkg_start] = 1. - np.amax(heatmaps[:,:,sl], axis=2)
42 |
43 | self.put_limbs(heatmaps, joints)
44 |
45 | heatmaps *= mask
46 |
47 | return heatmaps
48 |
49 |
50 | def put_gaussian_maps(self, heatmaps, layer, joints):
51 |
52 | # actually exp(a+b) = exp(a)*exp(b), lets use it calculating 2d exponent, it could just be calculated by
53 |
54 | for i in range(joints.shape[0]):
55 |
56 | exp_x = np.exp(-(self.grid_x-joints[i,0])**2/self.double_sigma2)
57 | exp_y = np.exp(-(self.grid_y-joints[i,1])**2/self.double_sigma2)
58 |
59 | exp = np.outer(exp_y, exp_x)
60 |
61 | # note this is correct way of combination - min(sum(...),1.0) as was in C++ code is incorrect
62 | # https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation/issues/118
63 | heatmaps[:, :, self.config.heat_start + layer] = np.maximum(heatmaps[:, :, self.config.heat_start + layer], exp)
64 |
65 | def put_joints(self, heatmaps, joints):
66 |
67 | for i in range(self.config.num_parts):
68 | visible = joints[:,i,2] < 2
69 | self.put_gaussian_maps(heatmaps, i, joints[visible, i, 0:2])
70 |
71 |
72 | def put_vector_maps(self, heatmaps, layerX, layerY, joint_from, joint_to):
73 |
74 | count = np.zeros(heatmaps.shape[:-1], dtype=np.int)
75 |
76 | for i in range(joint_from.shape[0]):
77 | (x1, y1) = joint_from[i]
78 | (x2, y2) = joint_to[i]
79 |
80 | dx = x2-x1
81 | dy = y2-y1
82 | dnorm = sqrt(dx*dx + dy*dy)
83 |
84 | if dnorm==0: # we get nan here sometimes, it's kills NN
85 | # TODO: handle it better. probably we should add zero paf, centered paf, or skip this completely
86 | print("Parts are too close to each other. Length is zero. Skipping")
87 | continue
88 |
89 | dx = dx / dnorm
90 | dy = dy / dnorm
91 |
92 | assert not isnan(dx) and not isnan(dy), "dnorm is zero, wtf"
93 |
94 | min_sx, max_sx = (x1, x2) if x1 < x2 else (x2, x1)
95 | min_sy, max_sy = (y1, y2) if y1 < y2 else (y2, y1)
96 |
97 | min_sx = int(round((min_sx - self.thre) / self.config.stride))
98 | min_sy = int(round((min_sy - self.thre) / self.config.stride))
99 | max_sx = int(round((max_sx + self.thre) / self.config.stride))
100 | max_sy = int(round((max_sy + self.thre) / self.config.stride))
101 |
102 | # check PAF off screen. do not really need to do it with max>grid size
103 | if max_sy < 0:
104 | continue
105 |
106 | if max_sx < 0:
107 | continue
108 |
109 | if min_sx < 0:
110 | min_sx = 0
111 |
112 | if min_sy < 0:
113 | min_sy = 0
114 |
115 | #TODO: check it again
116 | slice_x = slice(min_sx, max_sx) # + 1 this mask is not only speed up but crops paf really. This copied from original code
117 | slice_y = slice(min_sy, max_sy) # + 1 int g_y = min_y; g_y < max_y; g_y++ -- note strict <
118 |
119 | dist = distances(self.X[slice_y,slice_x], self.Y[slice_y,slice_x], x1, y1, x2, y2)
120 | dist = dist <= self.thre
121 |
122 | # TODO: averaging by pafs mentioned in the paper but never worked in C++ augmentation code
123 | heatmaps[slice_y, slice_x, layerX][dist] = (dist * dx)[dist] # += dist * dx
124 | heatmaps[slice_y, slice_x, layerY][dist] = (dist * dy)[dist] # += dist * dy
125 | count[slice_y, slice_x][dist] += 1
126 |
127 | # TODO: averaging by pafs mentioned in the paper but never worked in C++ augmentation code
128 | # heatmaps[:, :, layerX][count > 0] /= count[count > 0]
129 | # heatmaps[:, :, layerY][count > 0] /= count[count > 0]
130 |
131 | def put_limbs(self, heatmaps, joints):
132 |
133 | for (i,(fr,to)) in enumerate(self.config.limbs_conn):
134 |
135 | visible_from = joints[:,fr,2] < 2
136 | visible_to = joints[:,to, 2] < 2
137 | visible = visible_from & visible_to
138 |
139 | layerX, layerY = (self.config.paf_start + i*2, self.config.paf_start + i*2 + 1)
140 | self.put_vector_maps(heatmaps, layerX, layerY, joints[visible, fr, 0:2], joints[visible, to, 0:2])
141 |
142 |
143 |
144 | #parallel calculation distance from any number of points of arbitrary shape(X, Y), to line defined by segment (x1,y1) -> (x2, y2)
145 |
146 | def distances(X, Y, x1, y1, x2, y2):
147 |
148 | # classic formula is:
149 | # d = (x2-x1)*(y1-y)-(x1-x)*(y2-y1)/sqrt((x2-x1)**2 + (y2-y1)**2)
150 |
151 | xD = (x2-x1)
152 | yD = (y2-y1)
153 | norm2 = sqrt(xD**2 + yD**2)
154 | dist = xD*(y1-Y)-(x1-X)*yD
155 | dist /= norm2
156 |
157 | return np.abs(dist)
158 |
159 | def test():
160 |
161 | hm = Heatmapper()
162 | d = distances(hm.X, hm.Y, 100, 100, 50, 150)
163 | print(d < 8.)
164 |
165 | if __name__ == "__main__":
166 | np.set_printoptions(precision=1, linewidth=1000, suppress=True, threshold=100000)
167 | test()
168 |
169 |
--------------------------------------------------------------------------------
/py_rmpe_server/py_rmpe_transformer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import numpy as np
4 | from math import cos, sin, pi
5 | import cv2
6 | import random
7 |
8 | class AugmentSelection:
9 |
10 | def __init__(self, flip=False, degree = 0., crop = (0,0), scale = 1.):
11 | self.flip = flip
12 | self.degree = degree #rotate
13 | self.crop = crop #shift actually
14 | self.scale = scale
15 |
16 | @staticmethod
17 | def random(transform_params):
18 | flip = random.uniform(0.,1.) > transform_params.flip_prob
19 | degree = random.uniform(-1.,1.) * transform_params.max_rotate_degree
20 | scale = (transform_params.scale_max - transform_params.scale_min)*random.uniform(0.,1.)+transform_params.scale_min \
21 | if random.uniform(0.,1.) > transform_params.scale_prob else 1. # TODO: see 'scale improbability' TODO above
22 | x_offset = int(random.uniform(-1.,1.) * transform_params.center_perterb_max);
23 | y_offset = int(random.uniform(-1.,1.) * transform_params.center_perterb_max);
24 |
25 | return AugmentSelection(flip, degree, (x_offset,y_offset), scale)
26 |
27 | @staticmethod
28 | def unrandom():
29 | flip = False
30 | degree = 0.
31 | scale = 1.
32 | x_offset = 0
33 | y_offset = 0
34 |
35 | return AugmentSelection(flip, degree, (x_offset,y_offset), scale)
36 |
37 | def affine(self, center, scale_self, config):
38 |
39 | # the main idea: we will do all image transformations with one affine matrix.
40 | # this saves lot of cpu and make code significantly shorter
41 | # same affine matrix could be used to transform joint coordinates afterwards
42 |
43 |
44 | A = self.scale * cos(self.degree / 180. * pi )
45 | B = self.scale * sin(self.degree / 180. * pi )
46 |
47 | scale_size = config.transform_params.target_dist / scale_self * self.scale
48 |
49 | (width, height) = center
50 | center_x = width + self.crop[0]
51 | center_y = height + self.crop[1]
52 |
53 | center2zero = np.array( [[ 1., 0., -center_x],
54 | [ 0., 1., -center_y ],
55 | [ 0., 0., 1. ]] )
56 |
57 | rotate = np.array( [[ A, B, 0 ],
58 | [ -B, A, 0 ],
59 | [ 0, 0, 1. ] ])
60 |
61 | scale = np.array( [[ scale_size, 0, 0 ],
62 | [ 0, scale_size, 0 ],
63 | [ 0, 0, 1. ] ])
64 |
65 | flip = np.array( [[ -1 if self.flip else 1., 0., 0. ],
66 | [ 0., 1., 0. ],
67 | [ 0., 0., 1. ]] )
68 |
69 | center2center = np.array( [[ 1., 0., config.width//2],
70 | [ 0., 1., config.height//2 ],
71 | [ 0., 0., 1. ]] )
72 |
73 | # order of combination is reversed
74 | combined = center2center.dot(flip).dot(scale).dot(rotate).dot(center2zero)
75 |
76 | return combined[0:2]
77 |
78 | class Transformer:
79 |
80 | def __init__(self, config):
81 |
82 | self.config = config
83 |
84 | def transform(self, img, mask, meta, aug = None):
85 |
86 | if aug is None:
87 | aug = AugmentSelection.random(self.config.transform_params)
88 |
89 | # warp picture and mask
90 | M = aug.affine(meta['objpos'][0], meta['scale_provided'][0], self.config)
91 |
92 | # TODO: need to understand this, scale_provided[0] is height of main person divided by 368, caclulated in generate_hdf5.py
93 | # print(img.shape)
94 | img = cv2.warpAffine(img, M, (self.config.height, self.config.width), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(127,127,127))
95 | mask = cv2.warpAffine(mask, M, (self.config.height, self.config.width), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=255)
96 | mask = cv2.resize(mask, self.config.mask_shape, interpolation=cv2.INTER_CUBIC) # TODO: should be combined with warp for speed
97 | mask = mask.astype(np.float) / 255.
98 |
99 | # warp key points
100 | #TODO: joint could be cropped by augmentation, in this case we should mark it as invisible.
101 | #update: may be we don't need it actually, original code removed part sliced more than half totally, may be we should keep it
102 | original_points = meta['joints'].copy()
103 | original_points[:,:,2]=1 # we reuse 3rd column in completely different way here, it is hack
104 | converted_points = np.matmul(M, original_points.transpose([0,2,1])).transpose([0,2,1])
105 | meta['joints'][:,:,0:2]=converted_points
106 |
107 | # we just made image flip, i.e. right leg just became left leg, and vice versa
108 |
109 | if aug.flip:
110 | tmpLeft = meta['joints'][:, self.config.leftParts, :]
111 | tmpRight = meta['joints'][:, self.config.rightParts, :]
112 | meta['joints'][:, self.config.leftParts, :] = tmpRight
113 | meta['joints'][:, self.config.rightParts, :] = tmpLeft
114 |
115 |
116 | return img, mask, meta
117 |
118 |
--------------------------------------------------------------------------------
/py_rmpe_server/rmpe_server.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import sys
3 | import numpy as np
4 | import zmq
5 | from multiprocessing import Process
6 | from time import time
7 |
8 | sys.path.append("..")
9 |
10 | from py_rmpe_data_iterator import RawDataIterator
11 | from config import COCOSourceConfig, MPIISourceConfig, GetConfig
12 |
13 |
14 | class Server:
15 |
16 | # these methods all called in parent process
17 |
18 | def __init__(self, global_config, configs, port, name, shuffle, augment):
19 |
20 | self.name = name
21 | self.port = port
22 | self.configs = configs
23 | self.global_config = global_config
24 |
25 | self.shuffle = shuffle
26 | self.augment = augment
27 |
28 | self.process = Process(target=Server.loop, args=(self,))
29 | self.process.daemon = True
30 | self.process.start()
31 |
32 |
33 | def join(self):
34 |
35 | return self.process.join(10)
36 |
37 | # these methods all called in child process
38 |
39 | def init(self):
40 |
41 | self.context = zmq.Context()
42 | self.socket = self.context.socket(zmq.PUSH)
43 | self.socket.set_hwm(160)
44 | self.socket.bind("tcp://*:%s" % self.port)
45 |
46 | @staticmethod
47 | def loop(self):
48 |
49 | print("%s: Child process init... " % self.name)
50 | self.init()
51 |
52 | iterator = RawDataIterator(self.global_config, self.configs, shuffle=self.shuffle, augment=self.augment)
53 |
54 | print("%s: Loop started... " % self.name)
55 |
56 | num = 0
57 | generation = 0
58 | cycle_start = time()
59 |
60 | while True:
61 |
62 | keys = iterator.num_keys()
63 | print("%s: generation %s, %d images " % (self.name, generation, keys))
64 |
65 | start = time()
66 | for (image, mask, labels, keypoints, read_time, aug_time) in iterator.gen(timing=True):
67 |
68 | augment_time = time()-start
69 |
70 | headers = self.produce_headers(image, mask, labels, keypoints)
71 | self.socket.send_json(headers)
72 | self.socket.send(np.ascontiguousarray(image))
73 | self.socket.send(np.ascontiguousarray(mask))
74 | self.socket.send(np.ascontiguousarray(labels))
75 | self.socket.send(np.ascontiguousarray(keypoints))
76 |
77 | num += 1
78 | print("%s [%d/%d] read/decompress %0.2f ms, aug %0.2f ms (%0.2f im/s), send %0.2f s" % (self.name, num, keys, read_time*1000, aug_time*1000, 1./aug_time, time() - start - aug_time) )
79 | start = time()
80 |
81 | def produce_headers(self, img, mask, labels, keypoints):
82 |
83 | header_data = {"descr": img.dtype.str, "shape": img.shape, "fortran_order": False, "normalized": True }
84 | header_mask = {"descr": mask.dtype.str, "shape": mask.shape, "fortran_order": False}
85 | header_label = {"descr": labels.dtype.str, "shape": labels.shape, "fortran_order": False}
86 | header_keypoints = {"descr": keypoints.dtype.str, "shape": keypoints.shape, "fortran_order": False}
87 |
88 | headers = [header_data, header_mask, header_label, header_keypoints]
89 |
90 | return headers
91 |
92 |
93 | def main():
94 |
95 | train = Server(GetConfig("Canonical"), COCOSourceConfig("../dataset/coco_train_dataset.h5"), 5555, "Train", shuffle=True, augment=True)
96 | val = Server(GetConfig("Canonical"), COCOSourceConfig("../dataset/coco_val_dataset.h5"), 5556, "Val", shuffle=False, augment=False)
97 |
98 | processes = [train, val]
99 |
100 | while None in [p.process.exitcode for p in processes]:
101 |
102 | print("exitcodes", [p.process.exitcode for p in processes])
103 | for p in processes:
104 | if p.process.exitcode is None:
105 | p.join()
106 |
107 |
108 | np.set_printoptions(precision=1, linewidth=100*3, suppress=True, threshold=100000)
109 | main()
110 |
111 |
112 |
--------------------------------------------------------------------------------
/readme/5ep_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/readme/5ep_result.png
--------------------------------------------------------------------------------
/readme/dance.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/readme/dance.gif
--------------------------------------------------------------------------------
/readme/losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/readme/losses.png
--------------------------------------------------------------------------------
/readme/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/readme/result.png
--------------------------------------------------------------------------------
/readme/tr_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/readme/tr_results.png
--------------------------------------------------------------------------------
/sample_images/ski.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation/8ac3229444128ef6a64be35106d5111c7f0e8fb5/sample_images/ski.jpg
--------------------------------------------------------------------------------
/testing/coco.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# This is actually dremovd@github code for calculating coco metric. "
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {
16 | "collapsed": true
17 | },
18 | "outputs": [],
19 | "source": [
20 | "import sys\n",
21 | "import pandas as pd"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 2,
27 | "metadata": {
28 | "collapsed": false
29 | },
30 | "outputs": [],
31 | "source": [
32 | "import os\n",
33 | "\n",
34 | "os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'\n",
35 | "os.environ['CUDA_VISIBLE_DEVICES']='0'"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "Create keras model and load weights"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 3,
48 | "metadata": {
49 | "collapsed": false
50 | },
51 | "outputs": [
52 | {
53 | "name": "stderr",
54 | "output_type": "stream",
55 | "text": [
56 | "Using TensorFlow backend.\n"
57 | ]
58 | }
59 | ],
60 | "source": [
61 | "from model import get_testing_model\n",
62 | "model = get_testing_model()"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 4,
68 | "metadata": {
69 | "collapsed": false
70 | },
71 | "outputs": [],
72 | "source": [
73 | "from coco_metric import per_image_scores, validation"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 6,
79 | "metadata": {
80 | "collapsed": true
81 | },
82 | "outputs": [],
83 | "source": [
84 | "training_dir = './training/'\n",
85 | "trained_models = [\n",
86 | " 'weights'\n",
87 | " #'weights-cpp-lr',\n",
88 | " #'weights-python-last',\n",
89 | "]\n",
90 | "optimal_epoch_loss = 'val_weight_stage6_L1_loss'"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 7,
96 | "metadata": {
97 | "collapsed": false
98 | },
99 | "outputs": [
100 | {
101 | "name": "stdout",
102 | "output_type": "stream",
103 | "text": [
104 | "Model 'weights', optimal loss: 78.969 at epoch 36\n",
105 | "./training/weights/weights.0036.h5\n",
106 | "loading annotations into memory...\n"
107 | ]
108 | },
109 | {
110 | "name": "stderr",
111 | "output_type": "stream",
112 | "text": [
113 | "\r",
114 | " 0%| | 0/5000 [00:00, ?it/s]"
115 | ]
116 | },
117 | {
118 | "name": "stdout",
119 | "output_type": "stream",
120 | "text": [
121 | "Done (t=0.27s)\n",
122 | "creating index...\n",
123 | "index created!\n"
124 | ]
125 | },
126 | {
127 | "name": "stderr",
128 | "output_type": "stream",
129 | "text": [
130 | "100%|██████████| 5000/5000 [1:45:30<00:00, 1.24s/it]\n"
131 | ]
132 | },
133 | {
134 | "name": "stdout",
135 | "output_type": "stream",
136 | "text": [
137 | "Loading and preparing results...\n",
138 | "DONE (t=0.40s)\n",
139 | "creating index...\n",
140 | "index created!\n",
141 | "Running per image evaluation...\n",
142 | "Evaluate annotation type *keypoints*\n",
143 | "DONE (t=3.18s).\n",
144 | "Accumulating evaluation results...\n",
145 | "DONE (t=0.07s).\n",
146 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.553\n",
147 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.769\n",
148 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.594\n",
149 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.526\n",
150 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.606\n",
151 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.590\n",
152 | " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.783\n",
153 | " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.626\n",
154 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.535\n",
155 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.671\n"
156 | ]
157 | }
158 | ],
159 | "source": [
160 | "from sklearn.externals import joblib\n",
161 | "\n",
162 | "for trained_model in trained_models:\n",
163 | " model_dir = os.path.join(training_dir, trained_model)\n",
164 | " training_log = pd.read_csv(os.path.join(model_dir, 'training.csv'))\n",
165 | " min_index = training_log[[optimal_epoch_loss]].idxmin()\n",
166 | " min_epoch, min_loss = training_log.loc[min_index][['epoch', optimal_epoch_loss]].values[0]\n",
167 | " print(\"Model '%s', optimal loss: %.3f at epoch %d\" % (trained_model, min_loss, min_epoch))\n",
168 | " \n",
169 | " epoch_weights_name = os.path.join(model_dir, 'weights.%04d.h5' % min_epoch)\n",
170 | " print(epoch_weights_name)\n",
171 | " model.load_weights(epoch_weights_name)\n",
172 | " eval_result = validation(model, dump_name = trained_model)\n",
173 | " joblib.dump(eval_result, 'metrics-raw-%s.dump' % trained_model)"
174 | ]
175 | },
176 | {
177 | "cell_type": "raw",
178 | "metadata": {},
179 | "source": []
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 8,
184 | "metadata": {
185 | "collapsed": false
186 | },
187 | "outputs": [
188 | {
189 | "name": "stdout",
190 | "output_type": "stream",
191 | "text": [
192 | "loading annotations into memory...\n"
193 | ]
194 | },
195 | {
196 | "name": "stderr",
197 | "output_type": "stream",
198 | "text": [
199 | "\r",
200 | " 0%| | 0/5000 [00:00, ?it/s]"
201 | ]
202 | },
203 | {
204 | "name": "stdout",
205 | "output_type": "stream",
206 | "text": [
207 | "Done (t=0.40s)\n",
208 | "creating index...\n",
209 | "index created!\n"
210 | ]
211 | },
212 | {
213 | "name": "stderr",
214 | "output_type": "stream",
215 | "text": [
216 | "100%|██████████| 5000/5000 [1:41:50<00:00, 1.25s/it]\n"
217 | ]
218 | },
219 | {
220 | "name": "stdout",
221 | "output_type": "stream",
222 | "text": [
223 | "Loading and preparing results...\n",
224 | "DONE (t=0.21s)\n",
225 | "creating index...\n",
226 | "index created!\n",
227 | "Running per image evaluation...\n",
228 | "Evaluate annotation type *keypoints*\n",
229 | "DONE (t=3.54s).\n",
230 | "Accumulating evaluation results...\n",
231 | "DONE (t=0.07s).\n",
232 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.578\n",
233 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.789\n",
234 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.632\n",
235 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.559\n",
236 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.613\n",
237 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.616\n",
238 | " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.808\n",
239 | " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.662\n",
240 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.569\n",
241 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.686\n"
242 | ]
243 | },
244 | {
245 | "data": {
246 | "text/plain": [
247 | "['metrics-raw-original.dump']"
248 | ]
249 | },
250 | "execution_count": 8,
251 | "metadata": {},
252 | "output_type": "execute_result"
253 | }
254 | ],
255 | "source": [
256 | "weights_path = \"model/keras/model.h5\" # orginal weights converted from caffe\n",
257 | "model.load_weights(weights_path)\n",
258 | "eval_result_original = validation(model, dump_name = 'original')\n",
259 | "joblib.dump(eval_result_original, 'metrics-raw-original.dump')"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": 9,
265 | "metadata": {
266 | "collapsed": false
267 | },
268 | "outputs": [],
269 | "source": [
270 | "raw_eval_list = !ls metrics-raw*.dump"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": 10,
276 | "metadata": {
277 | "collapsed": false
278 | },
279 | "outputs": [
280 | {
281 | "name": "stdout",
282 | "output_type": "stream",
283 | "text": [
284 | "\n",
285 | "metrics-raw-original.dump\n",
286 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.578\n",
287 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.789\n",
288 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.632\n",
289 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.559\n",
290 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.613\n",
291 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.616\n",
292 | " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.808\n",
293 | " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.662\n",
294 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.569\n",
295 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.686\n"
296 | ]
297 | },
298 | {
299 | "name": "stderr",
300 | "output_type": "stream",
301 | "text": [
302 | "/opt/home/anatolix/iidf/cv/keras_Realtime_Multi-Person_Pose_Estimation/coco_metric.py:344: RuntimeWarning: invalid value encountered in double_scalars\n",
303 | " scores['average'] = np.mean(np.sum(matches != 0, axis=1)) / scores['gt_person_count']\n"
304 | ]
305 | },
306 | {
307 | "name": "stdout",
308 | "output_type": "stream",
309 | "text": [
310 | "Average per-image score (not coco metric): 0.639\n",
311 | "\n",
312 | "metrics-raw-weights-cpp-lr.dump\n",
313 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.521\n",
314 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.730\n",
315 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.564\n",
316 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.495\n",
317 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.566\n",
318 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.556\n",
319 | " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.748\n",
320 | " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.594\n",
321 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.505\n",
322 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.631\n",
323 | "Average per-image score (not coco metric): 0.598\n",
324 | "\n",
325 | "metrics-raw-weights.dump\n",
326 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.553\n",
327 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.769\n",
328 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.594\n",
329 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.526\n",
330 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.606\n",
331 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.590\n",
332 | " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.783\n",
333 | " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.626\n",
334 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.535\n",
335 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.671\n",
336 | "Average per-image score (not coco metric): 0.614\n",
337 | "\n",
338 | "metrics-raw-weights-python-last.dump\n",
339 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.504\n",
340 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.720\n",
341 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.535\n",
342 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.462\n",
343 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.572\n",
344 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 20 ] = 0.538\n",
345 | " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets= 20 ] = 0.731\n",
346 | " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets= 20 ] = 0.570\n",
347 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.469\n",
348 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.635\n",
349 | "Average per-image score (not coco metric): 0.581\n"
350 | ]
351 | }
352 | ],
353 | "source": [
354 | "from sklearn.externals import joblib\n",
355 | "\n",
356 | "for raw_eval in raw_eval_list:\n",
357 | " eval_result = joblib.load(raw_eval)\n",
358 | " print(\"\\n\" + raw_eval)\n",
359 | " eval_result.summarize()\n",
360 | " scores = per_image_scores(eval_result)\n",
361 | " scores.to_csv('%s-scores.csv' % raw_eval)\n",
362 | " print(\"Average per-image score (not coco metric): %.3f\" % scores['average'].mean())"
363 | ]
364 | },
365 | {
366 | "cell_type": "code",
367 | "execution_count": null,
368 | "metadata": {
369 | "collapsed": true
370 | },
371 | "outputs": [],
372 | "source": []
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": null,
377 | "metadata": {
378 | "collapsed": true
379 | },
380 | "outputs": [],
381 | "source": []
382 | }
383 | ],
384 | "metadata": {
385 | "anaconda-cloud": {},
386 | "kernelspec": {
387 | "display_name": "Python [conda env:gpu_tf_intel]",
388 | "language": "python",
389 | "name": "conda-env-gpu_tf_intel-py"
390 | },
391 | "language_info": {
392 | "codemirror_mode": {
393 | "name": "ipython",
394 | "version": 3
395 | },
396 | "file_extension": ".py",
397 | "mimetype": "text/x-python",
398 | "name": "python",
399 | "nbconvert_exporter": "python",
400 | "pygments_lexer": "ipython3",
401 | "version": "3.5.3"
402 | }
403 | },
404 | "nbformat": 4,
405 | "nbformat_minor": 0
406 | }
407 |
--------------------------------------------------------------------------------
/testing/coco_metric.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import math
4 |
5 | import pandas as pd
6 | import numpy as np
7 | from scipy.ndimage.filters import gaussian_filter
8 |
9 | import tqdm
10 | import cv2
11 |
12 | from pycocotools.coco import COCO
13 | from pycocotools.cocoeval import COCOeval
14 | import util
15 |
16 | from config_reader import config_reader
17 |
18 | params, model_params = config_reader()
19 | params['scale_search'] = list(params['scale_search'])
20 |
21 |
22 | # the middle joints heatmap correpondence
23 | mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
24 | [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
25 | [55, 56], [37, 38], [45, 46]]
26 |
27 | limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
28 | [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
29 | [1, 16], [16, 18], [3, 17], [6, 18]]
30 |
31 |
32 | def predict(image, model, model_params):
33 | # print (image.shape)
34 | heatmap_avg = np.zeros((image.shape[0], image.shape[1], 19))
35 | paf_avg = np.zeros((image.shape[0], image.shape[1], 38))
36 | multiplier = [x * model_params['boxsize'] / image.shape[0] for x in params['scale_search']]
37 | # print(multiplier)
38 | for m in range(len(multiplier)):
39 | scale = multiplier[m]
40 |
41 | imageToTest = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
42 | imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_params['stride'],
43 | model_params['padValue'])
44 |
45 | input_img = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]),
46 | (3, 0, 1, 2)) # required shape (1, width, height, channels)
47 |
48 | output_blobs = model.predict(input_img)
49 |
50 | # extract outputs, resize, and remove padding
51 | heatmap = np.squeeze(output_blobs[1]) # output 1 is heatmaps
52 | heatmap = cv2.resize(heatmap, (0, 0), fx=model_params['stride'], fy=model_params['stride'],
53 | interpolation=cv2.INTER_CUBIC)
54 | heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3],
55 | :]
56 | heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_CUBIC)
57 |
58 | paf = np.squeeze(output_blobs[0]) # output 0 is PAFs
59 | paf = cv2.resize(paf, (0, 0), fx=model_params['stride'], fy=model_params['stride'],
60 | interpolation=cv2.INTER_CUBIC)
61 | paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
62 | paf = cv2.resize(paf, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_CUBIC)
63 |
64 | heatmap_avg = heatmap_avg + heatmap / len(multiplier)
65 | paf_avg = paf_avg + paf / len(multiplier)
66 |
67 | return heatmap_avg, paf_avg
68 |
69 |
70 | def find_peaks(heatmap_avg, threshold):
71 | all_peaks = []
72 | peak_counter = 0
73 |
74 | for part in range(18):
75 | map_ori = heatmap_avg[:, :, part]
76 | map = gaussian_filter(map_ori, sigma=3)
77 |
78 | map_left = np.zeros(map.shape)
79 | map_left[1:, :] = map[:-1, :]
80 | map_right = np.zeros(map.shape)
81 | map_right[:-1, :] = map[1:, :]
82 | map_up = np.zeros(map.shape)
83 | map_up[:, 1:] = map[:, :-1]
84 | map_down = np.zeros(map.shape)
85 | map_down[:, :-1] = map[:, 1:]
86 |
87 | peaks_binary = np.logical_and.reduce(
88 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > threshold))
89 | peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
90 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
91 | id = range(peak_counter, peak_counter + len(peaks))
92 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
93 |
94 | all_peaks.append(peaks_with_score_and_id)
95 | peak_counter += len(peaks)
96 |
97 | return all_peaks
98 |
99 |
100 | def find_connections(all_peaks, paf_avg, image_width, threshold):
101 | connection_all = []
102 | special_k = []
103 | mid_num = 10
104 |
105 | for k in range(len(mapIdx)):
106 | score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
107 | candA = all_peaks[limbSeq[k][0] - 1]
108 | candB = all_peaks[limbSeq[k][1] - 1]
109 | nA = len(candA)
110 | nB = len(candB)
111 | if (nA != 0 and nB != 0):
112 | connection_candidate = []
113 | for i in range(nA):
114 | for j in range(nB):
115 | vec = np.subtract(candB[j][:2], candA[i][:2])
116 | norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
117 | # failure case when 2 body parts overlaps
118 | if norm == 0:
119 | continue
120 | vec = np.divide(vec, norm)
121 |
122 | startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
123 | np.linspace(candA[i][1], candB[j][1], num=mid_num)))
124 |
125 | vec_x = np.array(
126 | [score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
127 | for I in range(len(startend))])
128 | vec_y = np.array(
129 | [score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
130 | for I in range(len(startend))])
131 |
132 | score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
133 | score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
134 | 0.5 * image_width / norm - 1, 0)
135 | criterion1 = len(np.nonzero(score_midpts > threshold)[0]) > 0.8 * len(
136 | score_midpts)
137 | criterion2 = score_with_dist_prior > 0
138 | if criterion1 and criterion2:
139 | connection_candidate.append([i, j, score_with_dist_prior,
140 | score_with_dist_prior + candA[i][2] + candB[j][2]])
141 |
142 | connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
143 | connection = np.zeros((0, 5))
144 | for c in range(len(connection_candidate)):
145 | i, j, s = connection_candidate[c][0:3]
146 | if (i not in connection[:, 3] and j not in connection[:, 4]):
147 | connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
148 | if (len(connection) >= min(nA, nB)):
149 | break
150 |
151 | connection_all.append(connection)
152 | else:
153 | special_k.append(k)
154 | connection_all.append([])
155 |
156 | return connection_all, special_k
157 |
158 |
159 | def find_people(connection_all, special_k, all_peaks):
160 | # last number in each row is the total parts number of that person
161 | # the second last number in each row is the score of the overall configuration
162 | subset = -1 * np.ones((0, 20))
163 | candidate = np.array([item for sublist in all_peaks for item in sublist])
164 |
165 | for k in range(len(mapIdx)):
166 | if k not in special_k:
167 | partAs = connection_all[k][:, 0]
168 | partBs = connection_all[k][:, 1]
169 | indexA, indexB = np.array(limbSeq[k]) - 1
170 |
171 | for i in range(len(connection_all[k])): # = 1:size(temp,1)
172 | found = 0
173 | subset_idx = [-1, -1]
174 | for j in range(len(subset)): # 1:size(subset,1):
175 | if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
176 | subset_idx[found] = j
177 | found += 1
178 |
179 | if found == 1:
180 | j = subset_idx[0]
181 | if (subset[j][indexB] != partBs[i]):
182 | subset[j][indexB] = partBs[i]
183 | subset[j][-1] += 1
184 | subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
185 | elif found == 2: # if found 2 and disjoint, merge them
186 | j1, j2 = subset_idx
187 | membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
188 | if len(np.nonzero(membership == 2)[0]) == 0: # merge
189 | subset[j1][:-2] += (subset[j2][:-2] + 1)
190 | subset[j1][-2:] += subset[j2][-2:]
191 | subset[j1][-2] += connection_all[k][i][2]
192 | subset = np.delete(subset, j2, 0)
193 | else: # as like found == 1
194 | subset[j1][indexB] = partBs[i]
195 | subset[j1][-1] += 1
196 | subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
197 |
198 | # if find no partA in the subset, create a new subset
199 | elif not found and k < 17:
200 | row = -1 * np.ones(20)
201 | row[indexA] = partAs[i]
202 | row[indexB] = partBs[i]
203 | row[-1] = 2
204 | row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + \
205 | connection_all[k][i][2]
206 | subset = np.vstack([subset, row])
207 |
208 | # delete some rows of subset which has few parts occur
209 | deleteIdx = [];
210 | for i in range(len(subset)):
211 | if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
212 | deleteIdx.append(i)
213 | subset = np.delete(subset, deleteIdx, axis=0)
214 | return subset, candidate
215 |
216 |
217 | dt_gt_mapping = {
218 | 0: 0,
219 | 1: None,
220 | 2: 6,
221 | 3: 8,
222 | 4: 10,
223 | 5: 5,
224 | 6: 7,
225 | 7: 9,
226 | 8: 12,
227 | 9: 14,
228 | 10: 16,
229 | 11: 11,
230 | 12: 13,
231 | 13: 15,
232 | 14: 2,
233 | 15: 1,
234 | 16: 4,
235 | 17: 3,
236 | }
237 |
238 |
239 | def process(input_image, params, model, model_params):
240 | oriImg = cv2.imread(input_image) # B,G,R order
241 | heatmap_avg, paf_avg = predict(oriImg, model, model_params)
242 |
243 | all_peaks = find_peaks(heatmap_avg, params['thre1'])
244 | connection_all, special_k = find_connections(all_peaks, paf_avg, oriImg.shape[0], params['thre2'])
245 | subset, candidate = find_people(connection_all, special_k, all_peaks)
246 |
247 | keypoints = []
248 | for s in subset:
249 | keypoint_indexes = s[:18]
250 | person_keypoint_coordinates = []
251 | for index in keypoint_indexes:
252 | if index == -1:
253 | # "No candidate for keypoint"
254 | X, Y = 0, 0
255 | else:
256 | X, Y = candidate[index.astype(int)][:2]
257 | person_keypoint_coordinates.append((X, Y))
258 | person_keypoint_coordinates_coco = [None] * 17
259 |
260 | for dt_index, gt_index in dt_gt_mapping.items():
261 | if gt_index is None:
262 | continue
263 | person_keypoint_coordinates_coco[gt_index] = person_keypoint_coordinates[dt_index]
264 |
265 | keypoints.append((person_keypoint_coordinates_coco, 1 - 1.0 / s[18]))
266 | return keypoints
267 |
268 |
269 | def get_image_name(coco, image_id):
270 | return coco.imgs[image_id]['file_name']
271 |
272 |
273 | def predict_many(coco, images_directory, validation_ids, params, model, model_params):
274 | assert (not set(validation_ids).difference(set(coco.getImgIds())))
275 |
276 | keypoints = {}
277 | for image_id in tqdm.tqdm(validation_ids):
278 | image_name = get_image_name(coco, image_id)
279 | image_name = os.path.join(images_directory, image_name)
280 | keypoints[image_id] = process(image_name, dict(params), model, dict(model_params))
281 | return keypoints
282 |
283 |
284 | def format_results(keypoints, resFile):
285 | format_keypoints = []
286 |
287 | for image_id, people in keypoints.items():
288 | for keypoint_list, score in people:
289 | format_keypoint_list = []
290 | for x, y in keypoint_list:
291 | for v in [int(x), int(y), 1 if x > 0 or y > 0 else 0]:
292 | format_keypoint_list.append(v)
293 |
294 | format_keypoints.append({
295 | "image_id": image_id,
296 | "category_id": 1,
297 | "keypoints": format_keypoint_list,
298 | "score": score,
299 | })
300 |
301 | json.dump(format_keypoints, open(resFile, 'w'))
302 |
303 |
304 | def validation(model, dump_name, validation_ids=None, dataset='val2017'):
305 | annType = 'keypoints'
306 | prefix = 'person_keypoints'
307 |
308 | dataDir = 'dataset'
309 | annFile = '%s/annotations/%s_%s.json' % (dataDir, prefix, dataset)
310 | cocoGt = COCO(annFile)
311 |
312 | if validation_ids == None:
313 | validation_ids = cocoGt.getImgIds()
314 |
315 | resFile = '%s/results/%s_%s_%s100_results.json'
316 | resFile = resFile % (dataDir, prefix, dataset, dump_name)
317 | os.makedirs(os.path.dirname(resFile), exist_ok=True)
318 |
319 | keypoints = predict_many(cocoGt, os.path.join(dataDir, dataset), validation_ids, params, model, model_params)
320 | format_results(keypoints, resFile)
321 |
322 | cocoDt = cocoGt.loadRes(resFile)
323 |
324 | cocoEval = COCOeval(cocoGt, cocoDt, annType)
325 |
326 | cocoEval.params.imgIds = validation_ids
327 | cocoEval.evaluate()
328 | cocoEval.accumulate()
329 | cocoEval.summarize()
330 | return cocoEval
331 |
332 |
333 | def per_image_scores(eval_result):
334 | def convert_match_to_score(match):
335 | matches = match['gtMatches'][:, np.array(match['gtIgnore']) == 0]
336 | scores = {
337 | 'image_id': match['image_id'],
338 | 'gt_person_count': matches.shape[1],
339 | }
340 |
341 | for i in range(matches.shape[0]):
342 | okp_threshold = eval_result.params.iouThrs[i]
343 | scores['matched_%.2f' % okp_threshold] = sum(matches[i, :] != 0)
344 | scores['average'] = np.mean(np.sum(matches != 0, axis=1)) / scores['gt_person_count']
345 |
346 | return scores
347 |
348 | evalImgs = eval_result.evalImgs
349 | scores = [convert_match_to_score(image_match) for image_match in evalImgs if image_match is not None]
350 |
351 | return pd.DataFrame(scores)
352 |
--------------------------------------------------------------------------------
/testing/inhouse_metric.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import pandas as pd
4 | import cv2
5 | from time import sleep
6 | from scipy.ndimage.filters import gaussian_filter, maximum_filter
7 |
8 | # this is find peak function
9 | from scipy.optimize import linear_sum_assignment
10 |
11 |
12 | def find_peaks(layer, thre1=0.01):
13 | map_ori = cv2.resize(layer, (0, 0), fx=8, fy=8, interpolation=cv2.INTER_CUBIC)
14 | map = gaussian_filter(map_ori, sigma=3)
15 | peaks_binary = (map == maximum_filter(map, 3)) & (map > thre1)
16 |
17 | if np.count_nonzero(peaks_binary) > 50:
18 | return [] #safety valve from N^2 in next stages
19 |
20 | peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
21 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
22 |
23 | return peaks_with_score
24 |
25 |
26 | def assign_peaks(layer_y, layer_gt):
27 |
28 | if len(layer_y) == 0 and len(layer_gt) == 0:
29 | return np.nan
30 |
31 | if len(layer_y) == 0 or len(layer_gt) == 0:
32 | return 400
33 |
34 | d = np.array(layer_y)
35 | t = np.array(layer_gt)
36 |
37 | dx = np.subtract.outer(d[:, 0], t[:, 0])
38 | dy = np.subtract.outer(d[:, 1], t[:, 1])
39 | distance = np.sqrt(dx ** 2 + dy ** 2)
40 | # print(distance)
41 |
42 | y, gt = linear_sum_assignment(distance)
43 | # print(np.array(list(zip(y,gt))))
44 |
45 | dist = [distance[foo] for foo in zip(y, gt)] # TODO: use numpy
46 | # print(dist)
47 |
48 | dist += [400] * (len(layer_y) - len(y))
49 | dist += [400] * (len(layer_gt) - len(gt))
50 |
51 | dist = np.mean(dist)
52 |
53 | return dist
54 |
55 |
56 | def calc_batch_metrics(batch_no, gt, Y, heatmap_layers):
57 |
58 | MAE = Y - gt
59 | MAE = np.abs(MAE)
60 | MAE = np.mean(MAE, axis=(1, 2))
61 |
62 | RMSE = (Y - gt) ** 2
63 | RMSE = np.mean(RMSE, axis=(1, 2))
64 | RMSE = np.sqrt(RMSE)
65 |
66 |
67 | gt_parts = np.full((gt.shape[0], gt.shape[3]), np.nan)
68 | y_parts = np.full((gt.shape[0], gt.shape[3]), np.nan)
69 | y_dist = np.full((gt.shape[0], gt.shape[3]), np.nan)
70 |
71 |
72 | for n in range(gt.shape[0]):
73 | for l in heatmap_layers:
74 | y_peaks = find_peaks(Y[n, :, :, l])
75 | y_parts[n, l] = len(y_peaks)
76 | gt_peaks = find_peaks(gt[n, :, :, l])
77 | gt_parts[n, l] = len(gt_peaks)
78 | y_dist[n, l] = assign_peaks(y_peaks, gt_peaks)
79 |
80 | batch_index = np.full(fill_value=batch_no, shape=MAE.shape)
81 | item_index, layer_index = np.mgrid[0:MAE.shape[0], 0:MAE.shape[1]]
82 |
83 | metrics = pd.DataFrame({'batch': batch_index.ravel(),
84 | 'item': item_index.ravel(),
85 | 'layer': layer_index.ravel(),
86 | 'MAE': MAE.ravel(),
87 | 'RMSE': RMSE.ravel(),
88 | 'GT_PARTS': gt_parts.ravel(),
89 | 'Y_PARTS': y_parts.ravel(),
90 | 'DIST': y_dist.ravel()
91 | },
92 | columns=('batch', 'item', 'layer', 'MAE', 'RMSE', 'GT_PARTS', 'Y_PARTS', 'DIST')
93 | )
94 |
95 | return metrics
96 |
97 |
--------------------------------------------------------------------------------
/testing/rmpe_server_comparator.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 | import os
5 | sys.path.append("..")
6 |
7 | from time import time
8 | from training.ds_generators import DataGeneratorClient
9 | from py_rmpe_config import RmpeGlobalConfig
10 |
11 | import numpy as np
12 | import pandas as pd
13 | import cv2
14 |
15 | servers = [('py-server', 'localhost', 5556), ('new-server', 'localhost', 5558)]
16 | clients = {}
17 | save_to = 'comparator-hdf5' # save new server to output, c++ server to original and compare images
18 |
19 | def cmp_pics(num, lhsd, rhsd, lhsn, rhsn):
20 | diff = lhsd.astype(float) - rhsd.astype(float)
21 | L1 = np.average(np.abs(diff))
22 | L2 = np.sqrt(np.average(diff**2))
23 | AC = np.average(lhsd==rhsd)
24 |
25 | print("Image: ", num, lhsd.shape, rhsd.shape, L1, L2, AC)
26 |
27 | diff = diff.transpose([1,2,0])
28 | diff = np.abs(diff)
29 | diff = diff.astype(np.uint8)
30 |
31 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "image." + lhsn), lhsd.transpose([1,2,0]))
32 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "image." + rhsn), rhsd.transpose([1,2,0]))
33 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "imagediff" ), diff)
34 |
35 | return (L1,L2,AC)
36 |
37 | def cmp_masks(num, lhsd, rhsd, lhsn, rhsn):
38 | diff = (lhsd.astype(float) - rhsd.astype(float))*255.0
39 | L1 = np.average(np.abs(diff))
40 | L2 = np.sqrt(np.average(diff**2))
41 | AC = np.average(lhsd == rhsd)
42 |
43 | print("Mask: ", num, lhsd.shape, rhsd.shape, L1, L2, AC)
44 |
45 | lhsd = lhsd.reshape((lhsd.shape[0], lhsd.shape[1], 1))
46 | lhsd = (lhsd*255).astype(np.uint8)
47 | lhsd = cv2.resize(lhsd, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST)
48 | lhsd = lhsd.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1))
49 |
50 | rhsd = rhsd.reshape((rhsd.shape[0], rhsd.shape[1], 1))
51 | rhsd = (rhsd*255).astype(np.uint8)
52 | rhsd = cv2.resize(rhsd, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST)
53 | rhsd = rhsd.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1))
54 |
55 | diff = np.abs(diff).reshape((diff.shape[0], diff.shape[1], 1))
56 | diff = diff.astype(np.uint8)
57 | diff = cv2.resize(diff, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST)
58 | diff = diff.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1))
59 |
60 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "mask."+lhsn), lhsd)
61 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "mask."+rhsn), rhsd)
62 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "maskdiff"), diff)
63 |
64 | return (L1,L2,AC)
65 |
66 | def cmp_layers(num, lhsd_all, rhsd_all, lhsn, rhsn):
67 |
68 | result = []
69 |
70 | L1T = 0
71 | L2T = 0
72 | ACT = 0
73 |
74 | for layer in range(RmpeGlobalConfig.num_layers):
75 | lhsd = lhsd_all[layer, :, :]
76 | rhsd = rhsd_all[layer, :, :]
77 |
78 | diff = (lhsd.astype(float) - rhsd.astype(float))*255.0
79 | L1 = np.average(np.abs(diff))
80 | L2 = np.sqrt(np.average(diff**2))
81 | AC = np.average(lhsd == rhsd)
82 |
83 | #print("Layers(%d): " % layer, num, lhsd.shape, rhsd.shape, L1, L2, AC)
84 | L1T += L1
85 | L2T += L2
86 | ACT += AC
87 |
88 | lhsd = lhsd.reshape((lhsd.shape[0], lhsd.shape[1], 1))
89 | lhsd = (127+lhsd*128).astype(np.uint8)
90 | lhsd = cv2.resize(lhsd, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST)
91 | lhsd = lhsd.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1))
92 |
93 | rhsd = rhsd.reshape((rhsd.shape[0], rhsd.shape[1], 1))
94 | rhsd = (127+rhsd*128).astype(np.uint8)
95 | rhsd = cv2.resize(rhsd, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST)
96 | rhsd = rhsd.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1))
97 |
98 | diff = np.abs(diff).reshape((diff.shape[0], diff.shape[1], 1))
99 | diff = diff.astype(np.uint8)
100 | diff = cv2.resize(diff, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST)
101 | diff = diff.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1))
102 |
103 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "layer" + str(layer) + "." + lhsn), lhsd)
104 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "layer" + str(layer) + "." + rhsn), rhsd)
105 | cv2.imwrite(save_to+("/%5d" % num)+"/%07d%s.png" % (num, "layer" + str(layer) + "diff"), diff)
106 |
107 | result += [L1, L2, AC]
108 |
109 | print("Layers: ", num, lhsd.shape, rhsd.shape, L1T/RmpeGlobalConfig.num_layers, L2T/RmpeGlobalConfig.num_layers, ACT/RmpeGlobalConfig.num_layers)
110 |
111 | return result
112 |
113 | def step(num, augs):
114 |
115 | all_res = []
116 |
117 | os.makedirs(save_to+("/%5d" % num), exist_ok=True)
118 |
119 | for (i,lhs) in enumerate(augs):
120 | for (j,rhs) in enumerate(augs):
121 | if i < j:
122 |
123 | res = []
124 |
125 | res += cmp_pics(num, augs[lhs][0], augs[rhs][0], lhs, rhs)
126 | res += cmp_masks(num, augs[lhs][1], augs[rhs][1], lhs, rhs)
127 | res += cmp_layers(num, augs[lhs][2], augs[rhs][2], lhs, rhs)
128 |
129 | all_res += [res]
130 |
131 | return all_res
132 |
133 | def main(servers, batch_size):
134 |
135 | for (name, host, port) in servers:
136 | clients[name] = DataGeneratorClient(port=port, host=host, hwm=1, batch_size=batch_size).gen_raw()
137 |
138 | res_all = []
139 |
140 | for i in range(2645): #2645
141 | print(i)
142 | augs = dict([(name, next(value)) for (name, value) in clients.items()])
143 | res = step(i, augs)
144 | res_all += res
145 |
146 | columns = ["ImageL1", "ImageL2", "ImageAC", "MaskL1", "MaskL2", "MaskAC"]
147 | for layer in range(RmpeGlobalConfig.num_layers):
148 | columns += ["Layer"+str(layer)+"L1", "Layer"+str(layer)+"L2", "Layer"+str(layer)+"AC"]
149 |
150 | res_all = np.array(res_all)
151 | print(res_all.shape)
152 |
153 | results = pd.DataFrame(res_all, columns=columns )
154 | results.to_csv("weights.tsv", sep="\t")
155 |
156 | batch_size=20
157 | np.set_printoptions(precision=1, linewidth=1000, suppress=True, threshold=100000)
158 | main(servers, batch_size)
159 |
--------------------------------------------------------------------------------
/testing/rmpe_server_tester.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 | import os
5 | sys.path.append("..")
6 |
7 | from time import time
8 | from training.ds_generators import DataGeneratorClient
9 |
10 | import cv2
11 | import numpy as np
12 |
13 | from py_rmpe_config import RmpeGlobalConfig
14 |
15 | mask_pattern = np.zeros((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 3), dtype=np.uint8)
16 | heat_pattern = np.zeros((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 3), dtype=np.uint8)
17 | mask_y, mask_x = np.mgrid[0:RmpeGlobalConfig.height, 0:RmpeGlobalConfig.width]
18 | grid = (mask_x//8 % 2) + (mask_y//8 % 2)
19 |
20 | mask_pattern[grid==1]=(255,255,255)
21 | mask_pattern[grid!=1]=(128,128,128)
22 |
23 | heat_pattern[...] = (0,0,255)
24 |
25 | save_to = 'old_val' # save new server to output, c++ server to original and compare images
26 |
27 |
28 | def save_images(num, image, mask, paf):
29 |
30 | image = image.transpose([1,2,0])
31 |
32 | mask_img = mask.reshape((mask.shape[0], mask.shape[1], 1))
33 | mask_img = (mask_img*255).astype(np.uint8)
34 | mask_img = cv2.resize(mask_img, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST)
35 | mask_img = mask_img.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1))
36 |
37 | masked_img = image.copy()
38 | masked_img = masked_img*(mask_img/255.0) + mask_pattern*(1.-mask_img/255.0)
39 |
40 | os.makedirs(save_to, exist_ok=True)
41 |
42 | #cv2.imwrite(save_to+"/%07d%s.png" % (num, ""), image)
43 | #cv2.imwrite(save_to+"/%07d%s.png" % (num, "mask"), mask_img)
44 | cv2.imwrite(save_to + "/%07d%s.png" % (num, "masked"), masked_img)
45 |
46 | parts = []
47 |
48 | for i in range(RmpeGlobalConfig.num_parts_with_background):
49 | heated_image = image.copy()
50 |
51 | heat_img = paf[RmpeGlobalConfig.heat_start+i]
52 |
53 | heat_img = cv2.resize(heat_img, (RmpeGlobalConfig.height, RmpeGlobalConfig.width), interpolation=cv2.INTER_NEAREST)
54 | heat_img = heat_img.reshape((RmpeGlobalConfig.height, RmpeGlobalConfig.width, 1))
55 |
56 | heated_image = heated_image*(1-heat_img) + heat_pattern*heat_img
57 |
58 | parts += [heated_image]
59 |
60 | parts = np.vstack(parts)
61 | cv2.imwrite(save_to+"/%07d%s.png" % (num, "heat"), parts)
62 |
63 |
64 | pafs = []
65 | stride = RmpeGlobalConfig.stride
66 |
67 | for i,(fr,to) in enumerate(RmpeGlobalConfig.limbs_conn):
68 | paffed_image = image.copy()
69 |
70 | pafX = paf[RmpeGlobalConfig.paf_start + i * 2]
71 | pafY = paf[RmpeGlobalConfig.paf_start + i * 2 + 1]
72 |
73 | for x in range(RmpeGlobalConfig.width//stride):
74 | for y in range(RmpeGlobalConfig.height//stride):
75 | X = pafX[y, x]
76 | Y = pafY[y, x]
77 |
78 | if X!=0 or Y!=0:
79 | cv2.arrowedLine(paffed_image, (x*stride,y*stride), (int(x*stride+X*stride),int(y*stride+Y*stride)), color=(0,0,255), thickness=1, tipLength=0.5)
80 |
81 | pafs += [paffed_image]
82 |
83 |
84 | pafs = np.vstack(pafs)
85 | cv2.imwrite(save_to+"/%07d%s.png" % (num, "paf"), pafs)
86 |
87 |
88 |
89 | def time_processed(client, batch_size):
90 |
91 | num = 0
92 | start = time()
93 |
94 | for x,y in client.gen():
95 | num += 1
96 | elapsed = time() - start
97 | print(num*batch_size, num*batch_size/elapsed, [ i.shape for i in x ], [i.shape for i in y] )
98 |
99 | def time_raw(client, save):
100 |
101 | num = 0
102 | start = time()
103 |
104 | for foo in client.gen_raw():
105 |
106 | if len(foo) == 3:
107 | x, y, z = foo
108 | elif len(foo) == 4:
109 | x, y, z, k = foo
110 | else:
111 | raise NotImplementedError("Unknown number of tensors in proto %d" % len(foo))
112 |
113 | num += 1
114 | elapsed = time() - start
115 | print(num, num/elapsed, x.shape, y.shape, z.shape )
116 |
117 | if save:
118 | save_images(num, x, y, z)
119 |
120 |
121 | def main(type, batch_size, save):
122 |
123 | client = DataGeneratorClient(port=5556, host="localhost", hwm=1, batch_size=batch_size)
124 |
125 | if type=='processed':
126 | time_processed(client, batch_size)
127 | elif type=='raw':
128 | time_raw(client, save)
129 | else:
130 | assert False, "type should be 'processed' or 'raw' "
131 |
132 |
133 | assert len(sys.argv) >=2, "Usage: ./rmpe_dataset_server_tester [batch_size] [save]"
134 | batch_size=1
135 | save = False
136 | if 'save' in sys.argv:
137 | save=True
138 | sys.argv = [s for s in sys.argv if s!='save']
139 | if len(sys.argv)==3: batch_size=int(sys.argv[2])
140 |
141 | np.set_printoptions(precision=1, linewidth=1000, suppress=True, threshold=100000)
142 | main(sys.argv[1], batch_size, save)
143 |
--------------------------------------------------------------------------------
/training/coco_masks_hdf5.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from pycocotools.coco import COCO
4 | from scipy.spatial.distance import cdist
5 | import numpy as np
6 | import cv2
7 | import os
8 | import os.path
9 | import h5py
10 | import json
11 |
12 | dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'dataset'))
13 |
14 | tr_anno_path = os.path.join(dataset_dir, "annotations/person_keypoints_train2017.json")
15 | tr_img_dir = os.path.join(dataset_dir, "train2017")
16 |
17 | val_anno_path = os.path.join(dataset_dir, "annotations/person_keypoints_val2017.json")
18 | val_img_dir = os.path.join(dataset_dir, "val2017")
19 |
20 | datasets = [
21 | (val_anno_path, val_img_dir, "COCO_val"), # it is important to have 'val' in validation dataset name, look for 'val' below
22 | (tr_anno_path, tr_img_dir, "COCO")
23 | ]
24 |
25 |
26 | tr_hdf5_path = os.path.join(dataset_dir, "coco_train_dataset.h5")
27 | val_hdf5_path = os.path.join(dataset_dir, "coco_val_dataset.h5")
28 |
29 | val_size = 2645 # size of validation set
30 |
31 |
32 | def make_mask(img_dir, img_id, img_anns, coco):
33 |
34 | img_path = os.path.join(img_dir, "%012d.jpg" % img_id)
35 | img = cv2.imread(img_path)
36 | h, w, c = img.shape
37 |
38 | mask_all = np.zeros((h, w), dtype=np.uint8)
39 | mask_miss = np.zeros((h, w), dtype=np.uint8)
40 |
41 | flag = 0
42 | for p in img_anns:
43 | seg = p["segmentation"]
44 |
45 | if p["iscrowd"] == 1:
46 | mask_crowd = coco.annToMask(p)
47 | temp = np.bitwise_and(mask_all, mask_crowd)
48 | mask_crowd = mask_crowd - temp
49 | flag += 1
50 | continue
51 | else:
52 | mask = coco.annToMask(p)
53 |
54 | mask_all = np.bitwise_or(mask, mask_all)
55 |
56 | if p["num_keypoints"] <= 0:
57 | mask_miss = np.bitwise_or(mask, mask_miss)
58 |
59 | if flag<1:
60 | mask_miss = np.logical_not(mask_miss)
61 | elif flag == 1:
62 | mask_miss = np.logical_not(np.bitwise_or(mask_miss, mask_crowd))
63 | mask_all = np.bitwise_or(mask_all, mask_crowd)
64 | else:
65 | raise Exception("crowd segments > 1")
66 |
67 | mask_miss = mask_miss.astype(np.uint8)
68 | mask_miss *= 255
69 |
70 | return img, mask_miss
71 |
72 | def process_image(image_rec, img_id, image_index, img_anns, dataset_type):
73 |
74 | print("Image ID: ", img_id)
75 |
76 | numPeople = len(img_anns)
77 | h, w = image_rec['height'], image_rec['width']
78 |
79 | all_persons = []
80 |
81 | for p in range(numPeople):
82 |
83 | pers = dict()
84 |
85 | person_center = [img_anns[p]["bbox"][0] + img_anns[p]["bbox"][2] / 2,
86 | img_anns[p]["bbox"][1] + img_anns[p]["bbox"][3] / 2]
87 |
88 | pers["objpos"] = person_center
89 | pers["bbox"] = img_anns[p]["bbox"]
90 | pers["segment_area"] = img_anns[p]["area"]
91 | pers["num_keypoints"] = img_anns[p]["num_keypoints"]
92 |
93 | anno = img_anns[p]["keypoints"]
94 |
95 | pers["joint"] = np.zeros((17, 3))
96 | for part in range(17):
97 | pers["joint"][part, 0] = anno[part * 3]
98 | pers["joint"][part, 1] = anno[part * 3 + 1]
99 |
100 | # visible/invisible
101 | # COCO - Each keypoint has a 0-indexed location x,y and a visibility flag v defined as v=0: not labeled (in which case x=y=0), v=1: labeled but not visible, and v=2: labeled and visible.
102 | # OURS - # 3 never marked up in this dataset, 2 - not marked up in this person, 1 - marked and visible, 0 - marked but invisible
103 | if anno[part * 3 + 2] == 2:
104 | pers["joint"][part, 2] = 1
105 | elif anno[part * 3 + 2] == 1:
106 | pers["joint"][part, 2] = 0
107 | else:
108 | pers["joint"][part, 2] = 2
109 |
110 | pers["scale_provided"] = img_anns[p]["bbox"][3] / 368
111 |
112 | all_persons.append(pers)
113 |
114 | main_persons = []
115 | prev_center = []
116 |
117 |
118 | for pers in all_persons:
119 |
120 | # skip this person if parts number is too low or if
121 | # segmentation area is too small
122 | if pers["num_keypoints"] < 5 or pers["segment_area"] < 32 * 32:
123 | continue
124 |
125 | person_center = pers["objpos"]
126 |
127 | # skip this person if the distance to exiting person is too small
128 | flag = 0
129 | for pc in prev_center:
130 | a = np.expand_dims(pc[:2], axis=0)
131 | b = np.expand_dims(person_center, axis=0)
132 | dist = cdist(a, b)[0]
133 | if dist < pc[2] * 0.3:
134 | flag = 1
135 | continue
136 |
137 | if flag == 1:
138 | continue
139 |
140 | main_persons.append(pers)
141 | prev_center.append(np.append(person_center, max(img_anns[p]["bbox"][2], img_anns[p]["bbox"][3])))
142 |
143 |
144 | template = dict()
145 | template["dataset"] = dataset_type
146 |
147 | if image_index < val_size and 'val' in dataset_type:
148 | isValidation = 1
149 | else:
150 | isValidation = 0
151 |
152 | template["isValidation"] = isValidation
153 | template["img_width"] = w
154 | template["img_height"] = h
155 | template["image_id"] = img_id
156 | template["annolist_index"] = image_index
157 | template["img_path"] = '%012d.jpg' % img_id
158 |
159 | for p, person in enumerate(main_persons):
160 |
161 | instance = template.copy()
162 |
163 | instance["objpos"] = [ main_persons[p]["objpos"] ]
164 | instance["joints"] = [ main_persons[p]["joint"].tolist() ]
165 | instance["scale_provided"] = [ main_persons[p]["scale_provided"] ]
166 |
167 | lenOthers = 0
168 |
169 | for ot, operson in enumerate(all_persons):
170 |
171 | if person is operson:
172 | assert not "people_index" in instance, "several main persons? couldn't be"
173 | instance["people_index"] = ot
174 | continue
175 |
176 | if operson["num_keypoints"] == 0:
177 | continue
178 |
179 | instance["joints"].append(all_persons[ot]["joint"].tolist())
180 | instance["scale_provided"].append(all_persons[ot]["scale_provided"])
181 | instance["objpos"].append(all_persons[ot]["objpos"])
182 |
183 | lenOthers += 1
184 |
185 | assert "people_index" in instance, "No main person index"
186 | instance["numOtherPeople"] = lenOthers
187 |
188 | yield instance
189 |
190 |
191 | def writeImage(grp, img_grp, data, img, mask_miss, count, image_id, mask_grp=None):
192 |
193 | serializable_meta = data
194 | serializable_meta['count'] = count
195 |
196 | nop = data['numOtherPeople']
197 |
198 | assert len(serializable_meta['joints']) == 1 + nop, [len(serializable_meta['joints']), 1 + nop]
199 | assert len(serializable_meta['scale_provided']) == 1 + nop, [len(serializable_meta['scale_provided']), 1 + nop]
200 | assert len(serializable_meta['objpos']) == 1 + nop, [len(serializable_meta['objpos']), 1 + nop]
201 |
202 | img_key = "%012d" % image_id
203 | if not img_key in img_grp:
204 |
205 | if mask_grp is None:
206 | img_and_mask = np.concatenate((img, mask_miss[..., None]), axis=2)
207 | img_ds = img_grp.create_dataset(img_key, data=img_and_mask, chunks=None)
208 | else:
209 | _, img_bin = cv2.imencode(".jpg", img)
210 | _, img_mask = cv2.imencode(".png", mask_miss)
211 | img_ds1 = img_grp.create_dataset(img_key, data=img_bin, chunks=None)
212 | img_ds2 = mask_grp.create_dataset(img_key, data=img_mask, chunks=None)
213 |
214 |
215 | key = '%07d' % count
216 | required = { 'image':img_key, 'joints': serializable_meta['joints'], 'objpos': serializable_meta['objpos'], 'scale_provided': serializable_meta['scale_provided'] }
217 | ds = grp.create_dataset(key, data=json.dumps(required), chunks=None)
218 | ds.attrs['meta'] = json.dumps(serializable_meta)
219 |
220 | print('Writing sample %d' % count)
221 |
222 |
223 | def process():
224 |
225 | tr_h5 = h5py.File(tr_hdf5_path, 'w')
226 | tr_grp = tr_h5.create_group("dataset")
227 | tr_write_count = 0
228 | tr_grp_img = tr_h5.create_group("images")
229 | tr_grp_mask = tr_h5.create_group("masks")
230 |
231 | val_h5 = h5py.File(val_hdf5_path, 'w')
232 | val_grp = val_h5.create_group("dataset")
233 | val_write_count = 0
234 | val_grp_img = val_h5.create_group("images")
235 | val_grp_mask = val_h5.create_group("masks")
236 |
237 | for _, ds in enumerate(datasets):
238 |
239 | anno_path = ds[0]
240 | img_dir = ds[1]
241 | dataset_type = ds[2]
242 |
243 | coco = COCO(anno_path)
244 | ids = list(coco.imgs.keys())
245 |
246 | for image_index, img_id in enumerate(ids):
247 | ann_ids = coco.getAnnIds(imgIds=img_id)
248 | img_anns = coco.loadAnns(ann_ids)
249 | image_rec = coco.imgs[img_id]
250 |
251 | img = None
252 | mask_miss = None
253 | cached_img_id = None
254 |
255 | for data in process_image(image_rec, img_id, image_index, img_anns, dataset_type):
256 |
257 | if cached_img_id!=data['image_id']:
258 | assert img_id == data['image_id']
259 | cached_img_id = data['image_id']
260 | img, mask_miss = make_mask(img_dir, cached_img_id, img_anns, coco)
261 |
262 | if data['isValidation']:
263 | writeImage(val_grp, val_grp_img, data, img, mask_miss, val_write_count, cached_img_id, val_grp_mask)
264 | val_write_count += 1
265 | else:
266 | writeImage(tr_grp, tr_grp_img, data, img, mask_miss, tr_write_count, cached_img_id, tr_grp_mask)
267 | tr_write_count += 1
268 |
269 | tr_h5.close()
270 | val_h5.close()
271 |
272 | if __name__ == '__main__':
273 | process()
274 |
--------------------------------------------------------------------------------
/training/ds_generators.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import zmq
3 | from ast import literal_eval as make_tuple
4 | from py_rmpe_server.py_rmpe_data_iterator import RawDataIterator
5 | from time import time
6 |
7 | import six
8 | if six.PY3:
9 | buffer_ = memoryview
10 | else:
11 | buffer_ = buffer # noqa
12 |
13 |
14 | class DataIteratorBase:
15 |
16 | def __init__(self, global_config, batch_size = 10):
17 |
18 | self.global_config = global_config
19 | self.batch_size = batch_size
20 |
21 | self.split_point = global_config.paf_layers
22 | self.vec_num = global_config.paf_layers
23 | self.heat_num = global_config.heat_layers + 1
24 |
25 | self.image_shape = (self.batch_size, self.global_config.width, self.global_config.height, 3)
26 | self.mask1_shape = (self.batch_size, self.global_config.width//self.global_config.stride, self.global_config.height//self.global_config.stride, self.vec_num)
27 | self.mask2_shape = (self.batch_size, self.global_config.width//self.global_config.stride, self.global_config.height//self.global_config.stride, self.heat_num)
28 | self.ypafs1_shape = (self.batch_size, self.global_config.width//self.global_config.stride, self.global_config.height//self.global_config.stride, self.vec_num)
29 | self.yheat2_shape = (self.batch_size, self.global_config.width//self.global_config.stride, self.global_config.height//self.global_config.stride, self.heat_num)
30 |
31 | #self.keypoints = [None]*self.batch_size # this is never passed to NN, will be accessed by accuracy calculation
32 |
33 | def restart(self):
34 |
35 | assert False, "Not implemented" # should restart connection, server should start new cycle on connection.
36 |
37 | def gen_raw(self): # this function used for test purposes in py_rmpe_server
38 |
39 | self.restart()
40 |
41 | while True:
42 | yield tuple(self._recv_arrays())
43 |
44 |
45 | def gen(self):
46 |
47 | sample_idx = 0
48 | batches_x = np.empty(self.image_shape)
49 | batches_x1 = np.empty(self.mask1_shape)
50 | batches_x2 = np.empty(self.mask2_shape)
51 | batches_y1 = np.empty(self.ypafs1_shape)
52 | batches_y2 = np.empty(self.yheat2_shape)
53 |
54 | for foo in self.gen_raw():
55 |
56 | if len(foo)==4:
57 | data_img, mask_img, label, kpts = foo
58 | else:
59 | data_img, mask_img, label = foo
60 | kpts = None
61 |
62 | batches_x[sample_idx] = data_img[np.newaxis, ...]
63 |
64 | batches_x1[sample_idx,:,:,:] = mask_img[ np.newaxis, :, :, :self.split_point ]
65 | batches_x2[sample_idx,:,:,:] = mask_img[ np.newaxis, :, :, self.split_point: ]
66 |
67 | batches_y1[sample_idx] = label[np.newaxis, :, :, :self.split_point ]
68 | batches_y2[sample_idx] = label[np.newaxis, :, :, self.split_point: ]
69 |
70 | #self.keypoints[sample_idx] = kpts
71 |
72 | sample_idx += 1
73 |
74 | if sample_idx == self.batch_size:
75 | sample_idx = 0
76 |
77 | if self.vec_num>0 and self.heat_num>0:
78 | yield [batches_x, batches_x1, batches_x2], \
79 | [batches_y1, batches_y2,
80 | batches_y1, batches_y2,
81 | batches_y1, batches_y2,
82 | batches_y1, batches_y2,
83 | batches_y1, batches_y2,
84 | batches_y1, batches_y2]
85 |
86 | elif self.vec_num == 0 and self.heat_num > 0:
87 |
88 | yield [batches_x, batches_x2], \
89 | [batches_y2, batches_y2, batches_y2, batches_y2, batches_y2, batches_y2]
90 |
91 | else:
92 | assert False, "Wtf or not implemented"
93 |
94 | # we should recreate this arrays because we in multiple threads, can't overwrite
95 | batches_x = np.empty(self.image_shape)
96 | batches_x1 = np.empty(self.mask1_shape)
97 | batches_x2 = np.empty(self.mask2_shape)
98 | batches_y1 = np.empty(self.ypafs1_shape)
99 | batches_y2 = np.empty(self.yheat2_shape)
100 |
101 | #self.keypoints = [None] * self.batch_size
102 |
103 | def keypoints(self):
104 | return self.keypoints
105 |
106 | def num_samples(self):
107 | assert False, "Not Implemented"
108 |
109 |
110 | class DataGeneratorClient(DataIteratorBase):
111 |
112 | def __init__(self, global_config, host, port, hwm=20, batch_size=10, limit=None):
113 |
114 | super(DataGeneratorClient, self).__init__(global_config, batch_size)
115 |
116 | self.limit = limit
117 | self.records = 0
118 |
119 | """
120 | :param host:
121 | :param port:
122 | :param hwm:, optional
123 | The `ZeroMQ high-water mark (HWM)
124 | `_ on the
125 | sending socket. Increasing this increases the buffer, which can be
126 | useful if your data preprocessing times are very random. However,
127 | it will increase memory usage. There is no easy way to tell how
128 | many batches will actually be queued with a particular HWM.
129 | Defaults to 10. Be sure to set the corresponding HWM on the
130 | receiving end as well.
131 | :param batch_size:
132 | :param shuffle:
133 | :param seed:
134 | """
135 | self.host = host
136 | self.port = port
137 | self.hwm = hwm
138 | self.socket = None
139 |
140 | context = zmq.Context()
141 | self.socket = context.socket(zmq.PULL)
142 | self.socket.set_hwm(self.hwm)
143 | self.socket.connect("tcp://{}:{}".format(self.host, self.port))
144 |
145 |
146 | def _recv_arrays(self):
147 | """Receive a list of NumPy arrays.
148 | Parameters
149 | ----------
150 | socket : :class:`zmq.Socket`
151 | The socket to receive the arrays on.
152 | Returns
153 | -------
154 | list
155 | A list of :class:`numpy.ndarray` objects.
156 | Raises
157 | ------
158 | StopIteration
159 | If the first JSON object received contains the key `stop`,
160 | signifying that the server has finished a single epoch.
161 | """
162 |
163 | if self.limit is not None and self.records > self.limit:
164 | raise StopIteration
165 |
166 | headers = self.socket.recv_json()
167 | if 'stop' in headers:
168 | raise StopIteration
169 | arrays = []
170 |
171 | for header in headers:
172 | data = self.socket.recv()
173 | buf = buffer_(data)
174 | array = np.frombuffer(buf, dtype=np.dtype(header['descr']))
175 | array.shape = make_tuple(header['shape']) if isinstance(header['shape'], str) else header['shape']
176 | # this need for comparability with C++ code, for some reasons it is string here, not tuple
177 |
178 | if header['fortran_order']:
179 | array.shape = header['shape'][::-1]
180 | array = array.transpose()
181 | arrays.append(array)
182 |
183 | self.records += 1
184 | return arrays
185 |
186 |
187 | class DataIterator(DataIteratorBase):
188 |
189 | def __init__(self, global_config, config, shuffle=True, augment=True, batch_size=10, limit=None):
190 |
191 | super(DataIterator, self).__init__(global_config, batch_size)
192 |
193 | self.limit = limit
194 | self.records = 0
195 | self.global_config = global_config
196 | self.config = config
197 | self.shuffle = shuffle
198 | self.augment = augment
199 |
200 | self.raw_data_iterator = RawDataIterator(self.global_config, self.config, shuffle=self.shuffle, augment=self.augment)
201 | self.generator = None
202 |
203 | def restart(self):
204 |
205 | self.records = 0
206 | self.generator = self.raw_data_iterator.gen()
207 |
208 | def num_samples(self):
209 | return self.raw_data_iterator.num_keys()
210 |
211 | def _recv_arrays(self):
212 |
213 | while True:
214 |
215 | if self.limit is not None and self.records > self.limit:
216 | raise StopIteration("Limit Reached")
217 |
218 | tpl = next(self.generator, None)
219 | if tpl is not None:
220 | self.records += 1
221 | return tpl
222 |
223 | raise StopIteration("Limited and reached cycle")
224 |
225 |
226 |
--------------------------------------------------------------------------------
/training/optimizers.py:
--------------------------------------------------------------------------------
1 | from keras.optimizers import Optimizer
2 | from keras import backend as K
3 | from keras.legacy import interfaces
4 |
5 |
6 | class MultiSGD(Optimizer):
7 | """
8 | Modified SGD with added support for learning multiplier for kernels and biases
9 | as suggested in: https://github.com/fchollet/keras/issues/5920
10 |
11 | Stochastic gradient descent optimizer.
12 | Includes support for momentum,
13 | learning rate decay, and Nesterov momentum.
14 | # Arguments
15 | lr: float >= 0. Learning rate.
16 | momentum: float >= 0. Parameter updates momentum.
17 | decay: float >= 0. Learning rate decay over each update.
18 | nesterov: boolean. Whether to apply Nesterov momentum.
19 | """
20 |
21 | def __init__(self, lr=0.01, momentum=0., decay=0.,
22 | nesterov=False, lr_mult=None, **kwargs):
23 | super(MultiSGD, self).__init__(**kwargs)
24 | with K.name_scope(self.__class__.__name__):
25 | self.iterations = K.variable(0, dtype='int64', name='iterations')
26 | self.lr = K.variable(lr, name='lr')
27 | self.momentum = K.variable(momentum, name='momentum')
28 | self.decay = K.variable(decay, name='decay')
29 | self.initial_decay = decay
30 | self.nesterov = nesterov
31 | self.lr_mult = lr_mult
32 |
33 | @interfaces.legacy_get_updates_support
34 | def get_updates(self, loss, params):
35 | grads = self.get_gradients(loss, params)
36 | self.updates = [K.update_add(self.iterations, 1)]
37 |
38 | lr = self.lr
39 | if self.initial_decay > 0:
40 | lr *= (1. / (1. + self.decay * K.cast(self.iterations,
41 | K.dtype(self.decay))))
42 | # momentum
43 | shapes = [K.int_shape(p) for p in params]
44 | moments = [K.zeros(shape) for shape in shapes]
45 | self.weights = [self.iterations] + moments
46 | for p, g, m in zip(params, grads, moments):
47 |
48 | if p.name in self.lr_mult:
49 | multiplied_lr = lr * self.lr_mult[p.name]
50 | else:
51 | multiplied_lr = lr
52 |
53 | v = self.momentum * m - multiplied_lr * g # velocity
54 | self.updates.append(K.update(m, v))
55 |
56 | if self.nesterov:
57 | new_p = p + self.momentum * v - multiplied_lr * g
58 | else:
59 | new_p = p + v
60 |
61 | # Apply constraints.
62 | if getattr(p, 'constraint', None) is not None:
63 | new_p = p.constraint(new_p)
64 |
65 | self.updates.append(K.update(p, new_p))
66 | return self.updates
67 |
68 | def get_config(self):
69 | config = {'lr': float(K.get_value(self.lr)),
70 | 'momentum': float(K.get_value(self.momentum)),
71 | 'decay': float(K.get_value(self.decay)),
72 | 'nesterov': self.nesterov}
73 | base_config = super(MultiSGD, self).get_config()
74 | return dict(list(base_config.items()) + list(config.items()))
75 |
--------------------------------------------------------------------------------
/training/train_common.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | import math
4 | sys.path.append("..")
5 |
6 | import numpy as np
7 | import pandas as pd
8 |
9 | from model import get_training_model, get_lrmult
10 | from training.optimizers import MultiSGD
11 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint, CSVLogger, TensorBoard, TerminateOnNaN
12 | from keras.applications.vgg19 import VGG19
13 | import keras.backend as K
14 |
15 | from glob import glob
16 | from config import GetConfig
17 | import h5py
18 | from testing.inhouse_metric import calc_batch_metrics
19 | from time import time
20 |
21 | base_lr = 2e-5
22 | momentum = 0.9
23 | weight_decay = 5e-4
24 | lr_policy = "step"
25 | gamma = 0.333
26 | stepsize = 121746 * 17 # in original code each epoch is 121746 and step change is on 17th epoch
27 | max_iter = 200
28 |
29 | def get_last_epoch_and_weights_file(WEIGHT_DIR, WEIGHTS_SAVE, epoch):
30 |
31 | os.makedirs(WEIGHT_DIR, exist_ok=True)
32 |
33 | if epoch is not None and epoch != '': #override
34 | return int(epoch), WEIGHT_DIR + '/' + WEIGHTS_SAVE.format(epoch=epoch)
35 |
36 | files = [file for file in glob(WEIGHT_DIR + '/weights.*.h5')]
37 | files = [file.split('/')[-1] for file in files]
38 | epochs = [file.split('.')[1] for file in files if file]
39 | epochs = [int(epoch) for epoch in epochs if epoch.isdigit() ]
40 | if len(epochs) == 0:
41 | if 'weights.best.h5' in files:
42 | return -1, WEIGHT_DIR + '/weights.best.h5'
43 | else:
44 | ep = max([int(epoch) for epoch in epochs])
45 | return ep, WEIGHT_DIR + '/' + WEIGHTS_SAVE.format(epoch=ep)
46 | return None, None
47 |
48 |
49 | # save names will be looking like
50 | # training/canonical/exp1
51 | # training/canonical_exp1.csv
52 | # training/canonical/exp2
53 | # training/canonical_exp2.csv
54 |
55 | def prepare(config, config_name, exp_id, train_samples, val_samples, batch_size, epoch=None ):
56 |
57 | metrics_id = config_name + "_" + exp_id if exp_id is not None else config_name
58 | weights_id = config_name + "/" + exp_id if exp_id is not None else config_name
59 |
60 | WEIGHT_DIR = "./" + weights_id
61 | WEIGHTS_SAVE = 'weights.{epoch:04d}.h5'
62 |
63 | TRAINING_LOG = "./" + metrics_id + ".csv"
64 | LOGS_DIR = "./logs"
65 |
66 | model = get_training_model(weight_decay, np_branch1=config.paf_layers, np_branch2=config.heat_layers+1)
67 | lr_mult = get_lrmult(model)
68 |
69 | # load previous weights or vgg19 if this is the first run
70 | last_epoch, wfile = get_last_epoch_and_weights_file(WEIGHT_DIR, WEIGHTS_SAVE, epoch)
71 | print("last_epoch:",last_epoch)
72 |
73 | if wfile is not None:
74 | print("Loading %s ..." % wfile)
75 |
76 | model.load_weights(wfile)
77 |
78 | else:
79 | print("Loading vgg19 weights...")
80 |
81 | vgg_model = VGG19(include_top=False, weights='imagenet')
82 |
83 | from_vgg = dict()
84 | from_vgg['conv1_1'] = 'block1_conv1'
85 | from_vgg['conv1_2'] = 'block1_conv2'
86 | from_vgg['conv2_1'] = 'block2_conv1'
87 | from_vgg['conv2_2'] = 'block2_conv2'
88 | from_vgg['conv3_1'] = 'block3_conv1'
89 | from_vgg['conv3_2'] = 'block3_conv2'
90 | from_vgg['conv3_3'] = 'block3_conv3'
91 | from_vgg['conv3_4'] = 'block3_conv4'
92 | from_vgg['conv4_1'] = 'block4_conv1'
93 | from_vgg['conv4_2'] = 'block4_conv2'
94 |
95 | for layer in model.layers:
96 | if layer.name in from_vgg:
97 | vgg_layer_name = from_vgg[layer.name]
98 | layer.set_weights(vgg_model.get_layer(vgg_layer_name).get_weights())
99 | print("Loaded VGG19 layer: " + vgg_layer_name)
100 |
101 | last_epoch = 0
102 |
103 | # euclidean loss as implemented in caffe https://github.com/BVLC/caffe/blob/master/src/caffe/layers/euclidean_loss_layer.cpp
104 | def eucl_loss(x, y):
105 | l = K.sum(K.square(x - y)) / batch_size / 2
106 | return l
107 |
108 | # learning rate schedule - equivalent of caffe lr_policy = "step"
109 | iterations_per_epoch = train_samples // batch_size
110 |
111 | def step_decay(epoch):
112 | steps = epoch * iterations_per_epoch * batch_size
113 | lrate = base_lr * math.pow(gamma, math.floor(steps/stepsize))
114 | print("Epoch:", epoch, "Learning rate:", lrate)
115 | return lrate
116 |
117 | print("Weight decay policy...")
118 | for i in range(1,100,5): step_decay(i)
119 |
120 | # configure callbacks
121 | lrate = LearningRateScheduler(step_decay)
122 | checkpoint = ModelCheckpoint(WEIGHT_DIR + '/' + WEIGHTS_SAVE, monitor='loss', verbose=0, save_best_only=False, save_weights_only=True, mode='min', period=1)
123 | csv_logger = CSVLogger(TRAINING_LOG, append=True)
124 | tb = TensorBoard(log_dir=LOGS_DIR, histogram_freq=0, write_graph=True, write_images=False)
125 | tnan = TerminateOnNaN()
126 | #coco_eval = CocoEval(train_client, val_client)
127 |
128 | callbacks_list = [lrate, checkpoint, csv_logger, tb, tnan]
129 |
130 | # sgd optimizer with lr multipliers
131 | multisgd = MultiSGD(lr=base_lr, momentum=momentum, decay=0.0, nesterov=False, lr_mult=lr_mult)
132 |
133 | # start training
134 |
135 | model.compile(loss=eucl_loss, optimizer=multisgd)
136 |
137 | return model, iterations_per_epoch, val_samples//batch_size, last_epoch, metrics_id, callbacks_list
138 |
139 |
140 |
141 |
142 | def train(config, model, train_client, val_client, iterations_per_epoch, validation_steps, metrics_id, last_epoch, use_client_gen, callbacks_list):
143 |
144 | for epoch in range(last_epoch, max_iter):
145 |
146 | train_di = train_client.gen()
147 |
148 | # train for one iteration
149 | model.fit_generator(train_di,
150 | steps_per_epoch=iterations_per_epoch,
151 | epochs=epoch+1,
152 | callbacks=callbacks_list,
153 | use_multiprocessing=False, # TODO: if you set True touching generator from 2 threads will stuck the program
154 | initial_epoch=epoch
155 | )
156 |
157 | validate(config, model, val_client, validation_steps, metrics_id, epoch+1)
158 |
159 |
160 | def validate(config, model, val_client, validation_steps, metrics_id, epoch):
161 |
162 | val_di = val_client.gen()
163 | from keras.utils import GeneratorEnqueuer
164 |
165 | val_thre = GeneratorEnqueuer(val_di)
166 | val_thre.start()
167 |
168 | model_metrics = []
169 | inhouse_metrics = []
170 |
171 | for i in range(validation_steps):
172 |
173 | X, GT = next(val_thre.get())
174 |
175 | Y = model.predict(X)
176 |
177 | model_losses = [ (np.sum((gt - y) ** 2) / gt.shape[0] / 2) for gt, y in zip(GT,Y) ]
178 | mm = sum(model_losses)
179 |
180 | if config.paf_layers > 0 and config.heat_layers > 0:
181 | GTL6 = np.concatenate([GT[-2], GT[-1]], axis=3)
182 | YL6 = np.concatenate([Y[-2], Y[-1]], axis=3)
183 | mm6l1 = model_losses[-2]
184 | mm6l2 = model_losses[-1]
185 | elif config.paf_layers == 0 and config.heat_layers > 0:
186 | GTL6 = GT[-1]
187 | YL6 = Y[-1]
188 | mm6l1 = None
189 | mm6l2 = model_losses[-1]
190 | else:
191 | assert False, "Wtf or not implemented"
192 |
193 | m = calc_batch_metrics(i, GTL6, YL6, range(config.heat_start, config.bkg_start))
194 | inhouse_metrics += [m]
195 |
196 | model_metrics += [ (i, mm, mm6l1, mm6l2, m["MAE"].sum()/GTL6.shape[0], m["RMSE"].sum()/GTL6.shape[0], m["DIST"].mean()) ]
197 | print("Validating[BATCH: %d] LOSS: %0.4f, S6L1: %0.4f, S6L2: %0.4f, MAE: %0.4f, RMSE: %0.4f, DIST: %0.2f" % model_metrics[-1] )
198 |
199 | inhouse_metrics = pd.concat(inhouse_metrics)
200 | inhouse_metrics['epoch']=epoch
201 | inhouse_metrics.to_csv("logs/val_scores.%s.%04d.txt" % (metrics_id, epoch), sep="\t")
202 |
203 | model_metrics = pd.DataFrame(model_metrics, columns=("batch","loss","stage6l1","stage6l2","mae","rmse","dist") )
204 | model_metrics['epoch']=epoch
205 | del model_metrics['batch']
206 | model_metrics = model_metrics.groupby('epoch').mean()
207 | with open('%s.val.tsv' % metrics_id, 'a') as f:
208 | model_metrics.to_csv(f, header=(epoch==1), sep="\t", float_format='%.4f')
209 |
210 | val_thre.stop()
211 |
212 | def save_network_input_output(model, val_client, validation_steps, metrics_id, batch_size, epoch=None):
213 |
214 | val_di = val_client.gen()
215 |
216 | if epoch is not None:
217 | filename = "nn_io.%s.%04d.h5" % (metrics_id, epoch)
218 | else:
219 | filename = "nn_gt.%s.h5" % metrics_id
220 |
221 | h5 = h5py.File(filename, 'w')
222 |
223 | for i in range(validation_steps):
224 | X, Y = next(val_di)
225 |
226 | grp = h5.create_group("%06d" % i)
227 |
228 | for n, v in enumerate(X):
229 | grp['x%02d' % n] = v
230 |
231 | for n, v in enumerate(Y):
232 | grp['gt%02d' % n] = v
233 |
234 | if model is not None:
235 |
236 | Yp = model.predict(X, batch_size=batch_size)
237 |
238 | for n, v in enumerate(Yp):
239 | grp['y%02d' % n] = v
240 |
241 | print(i)
242 |
243 | h5.close()
244 |
245 | def test_augmentation_speed(train_client):
246 |
247 | train_di = train_client.gen()
248 |
249 | start = time()
250 | batch = 0
251 |
252 | for X, Y in train_di:
253 |
254 | batch +=1
255 | print("batches per second ", batch/(time()-start))
256 |
--------------------------------------------------------------------------------
/training/train_pose.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append("..")
3 |
4 | from training.train_common import prepare, train, validate, save_network_input_output, test_augmentation_speed
5 | from training.ds_generators import DataGeneratorClient, DataIterator
6 | from config import COCOSourceConfig, GetConfig
7 |
8 | use_client_gen = False
9 | batch_size = 10
10 |
11 | task = sys.argv[1] if len(sys.argv)>1 else "train"
12 | config_name = sys.argv[2] if len(sys.argv)>2 else "Canonical"
13 | experiment_name = sys.argv[3] if len(sys.argv)>3 else None
14 | if experiment_name=='': experiment_name=None
15 | epoch = int(sys.argv[4]) if len(sys.argv)>4 and sys.argv[4]!='' else None
16 |
17 | config = GetConfig(config_name)
18 |
19 | train_client = DataIterator(config, COCOSourceConfig("../dataset/coco_train_dataset.h5"), shuffle=True,
20 | augment=True, batch_size=batch_size)
21 | val_client = DataIterator(config, COCOSourceConfig("../dataset/coco_val_dataset.h5"), shuffle=False, augment=False,
22 | batch_size=batch_size)
23 |
24 | train_samples = train_client.num_samples()
25 | val_samples = val_client.num_samples()
26 |
27 | model, iterations_per_epoch, validation_steps, epoch, metrics_id, callbacks_list = \
28 | prepare(config=config, config_name=config_name, exp_id=experiment_name, train_samples = train_samples, val_samples = val_samples, batch_size=batch_size, epoch=epoch)
29 |
30 |
31 | if task == "train":
32 | train(config, model, train_client, val_client, iterations_per_epoch, validation_steps, metrics_id, epoch, use_client_gen, callbacks_list)
33 |
34 | elif task == "validate":
35 | validate(config, model, val_client, validation_steps, metrics_id, epoch)
36 |
37 | elif task == "save_network_input_output":
38 | save_network_input_output(model, val_client, validation_steps, metrics_id, batch_size, epoch)
39 |
40 | elif task == "save_network_input":
41 | save_network_input_output(None, val_client, validation_steps, metrics_id, batch_size)
42 |
43 | elif task == "test_augmentation_speed":
44 | test_augmentation_speed(train_client)
45 |
--------------------------------------------------------------------------------
/trash/lmdb.parse.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# This is my code which compares pictures in original project lmdb and hdfd5\n",
10 | "# actually we train test on different pictures which not allow us to directly compare losses"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {
17 | "collapsed": false
18 | },
19 | "outputs": [],
20 | "source": [
21 | "import lmdb\n",
22 | "import caffe\n",
23 | "import struct\n",
24 | "import numpy as np\n",
25 | "import hashlib\n",
26 | "import h5py\n",
27 | "import json"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 102,
33 | "metadata": {
34 | "collapsed": true
35 | },
36 | "outputs": [],
37 | "source": [
38 | "db = lmdb.open(\"/home/anatolix/iidf-data/Realtime_Pose_Estimation_LMDB\", readonly=True )"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 170,
44 | "metadata": {
45 | "collapsed": true
46 | },
47 | "outputs": [],
48 | "source": [
49 | "def process(key, array):\n",
50 | " metadata=array[3]\n",
51 | " \n",
52 | " #0 dataset name\n",
53 | " dataset = struct.unpack('@10s',metadata[0].tobytes()[:10])[0]\n",
54 | " dataset = dataset.partition(b'\\x00')[0]\n",
55 | " \n",
56 | " #1 image height width\n",
57 | " height, width = struct.unpack('ff',metadata[1].tobytes()[:8])\n",
58 | " \n",
59 | " #2 validation, numother, people_index, 4*annolist_index, write_count\n",
60 | " validation, numother, people_index, annolist_index, write_count, total_count = struct.unpack('0 else None } )\n",
130 | " \n",
131 | " assert annolist.frame_sec.shape==(1,0) or annolist.frame_sec.shape==(1,1)\n",
132 | " frame_sec = annolist.frame_sec[0,0] if annolist.frame_sec.shape==(1,1) else None\n",
133 | " \n",
134 | " assert annolist.vididx.shape==(1,0) or annolist.vididx.shape==(1,1)\n",
135 | " vididx = annolist.vididx[0,0] if annolist.vididx.shape==(1,1) else None\n",
136 | " \n",
137 | " assert keypoints.img_train.shape[0]==1\n",
138 | " img_train = keypoints.img_train[0,n]\n",
139 | "\n",
140 | " assert keypoints.version.shape==(1,)\n",
141 | " version = keypoints.version[0]\n",
142 | " \n",
143 | " single_person = []\n",
144 | " assert keypoints.single_person[n,0].shape[1]==1\n",
145 | " for i in range(keypoints.single_person[n,0].shape[0]):\n",
146 | " single_person.append(keypoints.single_person[n,0][i,0])\n",
147 | " \n",
148 | " assert keypoints.act[n,0].act_id.shape==(1,1)\n",
149 | " act_id = keypoints.act[n,0].act_id[0,0]\n",
150 | " \n",
151 | " assert keypoints.act[n,0].act_name.shape==(0,) or keypoints.act[n,0].act_name.shape==(1,) \n",
152 | " act_name = keypoints.act[n,0].act_name[0] if keypoints.act[n,0].act_name.shape==(1,) else None\n",
153 | " \n",
154 | " assert keypoints.act[n,0].cat_name.shape==(0,) or keypoints.act[n,0].cat_name.shape==(1,) \n",
155 | " cat_name = keypoints.act[n,0].cat_name[0] if keypoints.act[n,0].cat_name.shape==(1,) else None\n",
156 | " \n",
157 | " video_name = None\n",
158 | " if vididx is not None:\n",
159 | " video_name = \"https://www.youtube.com/watch?v=\" + keypoints.video_list[0,vididx-1][0] \n",
160 | " \n",
161 | " \n",
162 | " return { 'image':image, 'annorects':annorects, 'img_train':img_train, 'version':version, 'single_person':single_person, 'act':{'act_id':act_id, 'act_name':act_name, 'cat_name':cat_name}, 'video_name':video_name, 'vididx':vididx, 'frame_sec':frame_sec } \n",
163 | " \n",
164 | " \n"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 281,
170 | "metadata": {
171 | "collapsed": false
172 | },
173 | "outputs": [
174 | {
175 | "name": "stdout",
176 | "output_type": "stream",
177 | "text": [
178 | "{'act': {'act_id': -1, 'act_name': None, 'cat_name': None},\n",
179 | " 'annorects': [{'annopoints': None,\n",
180 | " 'head': None,\n",
181 | " 'objpos': {'x': 601, 'y': 380},\n",
182 | " 'scale': 3.8807339512004684}],\n",
183 | " 'frame_sec': None,\n",
184 | " 'image': '037454012.jpg',\n",
185 | " 'img_train': 0,\n",
186 | " 'single_person': [1],\n",
187 | " 'version': '12',\n",
188 | " 'video_name': None,\n",
189 | " 'vididx': None}\n",
190 | "\n",
191 | "{'act': {'act_id': -1, 'act_name': None, 'cat_name': None},\n",
192 | " 'annorects': [{'annopoints': None,\n",
193 | " 'head': None,\n",
194 | " 'objpos': {'x': 881, 'y': 394},\n",
195 | " 'scale': 8.0781661285219926},\n",
196 | " {'annopoints': None,\n",
197 | " 'head': None,\n",
198 | " 'objpos': {'x': 338, 'y': 210},\n",
199 | " 'scale': 8.9041293791139395}],\n",
200 | " 'frame_sec': None,\n",
201 | " 'image': '095071431.jpg',\n",
202 | " 'img_train': 0,\n",
203 | " 'single_person': [],\n",
204 | " 'version': '12',\n",
205 | " 'video_name': None,\n",
206 | " 'vididx': None}\n",
207 | "\n",
208 | "{'act': {'act_id': -1, 'act_name': None, 'cat_name': None},\n",
209 | " 'annorects': [{'annopoints': None,\n",
210 | " 'head': None,\n",
211 | " 'objpos': {'x': 619, 'y': 350},\n",
212 | " 'scale': 4.3266615305567875}],\n",
213 | " 'frame_sec': None,\n",
214 | " 'image': '073199394.jpg',\n",
215 | " 'img_train': 0,\n",
216 | " 'single_person': [1],\n",
217 | " 'version': '12',\n",
218 | " 'video_name': None,\n",
219 | " 'vididx': None}\n",
220 | "\n",
221 | "{'act': {'act_id': -1, 'act_name': None, 'cat_name': None},\n",
222 | " 'annorects': [{'annopoints': None,\n",
223 | " 'head': None,\n",
224 | " 'objpos': {'x': 684, 'y': 309},\n",
225 | " 'scale': 4.9284804960555526}],\n",
226 | " 'frame_sec': None,\n",
227 | " 'image': '059865848.jpg',\n",
228 | " 'img_train': 0,\n",
229 | " 'single_person': [1],\n",
230 | " 'version': '12',\n",
231 | " 'video_name': None,\n",
232 | " 'vididx': None}\n",
233 | "\n",
234 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n",
235 | " 'annorects': [{'annopoints': [{'id': 6, 'is_visible': 0, 'x': 610, 'y': 187},\n",
236 | " {'id': 7, 'is_visible': 1, 'x': 647, 'y': 176},\n",
237 | " {'id': 8,\n",
238 | " 'is_visible': None,\n",
239 | " 'x': 637.02009999999996,\n",
240 | " 'y': 189.81829999999999},\n",
241 | " {'id': 9,\n",
242 | " 'is_visible': None,\n",
243 | " 'x': 695.97990000000004,\n",
244 | " 'y': 108.18170000000001},\n",
245 | " {'id': 0, 'is_visible': 1, 'x': 620, 'y': 394},\n",
246 | " {'id': 1, 'is_visible': 1, 'x': 616, 'y': 269},\n",
247 | " {'id': 2, 'is_visible': 1, 'x': 573, 'y': 185},\n",
248 | " {'id': 3, 'is_visible': 0, 'x': 647, 'y': 188},\n",
249 | " {'id': 4, 'is_visible': 1, 'x': 661, 'y': 221},\n",
250 | " {'id': 5, 'is_visible': 1, 'x': 656, 'y': 231},\n",
251 | " {'id': 10, 'is_visible': 1, 'x': 606, 'y': 217},\n",
252 | " {'id': 11, 'is_visible': 1, 'x': 553, 'y': 161},\n",
253 | " {'id': 12, 'is_visible': 1, 'x': 601, 'y': 167},\n",
254 | " {'id': 13, 'is_visible': 1, 'x': 692, 'y': 185},\n",
255 | " {'id': 14, 'is_visible': 1, 'x': 693, 'y': 240},\n",
256 | " {'id': 15, 'is_visible': 1, 'x': 688, 'y': 313}],\n",
257 | " 'head': {'x1': 627, 'x2': 706, 'y1': 100, 'y2': 198},\n",
258 | " 'objpos': {'x': 594, 'y': 257},\n",
259 | " 'scale': 3.0210461764097549},\n",
260 | " {'annopoints': [{'id': 6, 'is_visible': 0, 'x': 979, 'y': 221},\n",
261 | " {'id': 7, 'is_visible': 0, 'x': 906, 'y': 190},\n",
262 | " {'id': 8,\n",
263 | " 'is_visible': None,\n",
264 | " 'x': 912.49149999999997,\n",
265 | " 'y': 190.65860000000001},\n",
266 | " {'id': 9,\n",
267 | " 'is_visible': None,\n",
268 | " 'x': 830.50850000000003,\n",
269 | " 'y': 182.34139999999999},\n",
270 | " {'id': 0, 'is_visible': 1, 'x': 895, 'y': 293},\n",
271 | " {'id': 1, 'is_visible': 1, 'x': 910, 'y': 279},\n",
272 | " {'id': 2, 'is_visible': 0, 'x': 945, 'y': 223},\n",
273 | " {'id': 3, 'is_visible': 1, 'x': 1012, 'y': 218},\n",
274 | " {'id': 4, 'is_visible': 1, 'x': 961, 'y': 315},\n",
275 | " {'id': 5, 'is_visible': 1, 'x': 960, 'y': 403},\n",
276 | " {'id': 10, 'is_visible': 1, 'x': 871, 'y': 304},\n",
277 | " {'id': 11, 'is_visible': 1, 'x': 883, 'y': 229},\n",
278 | " {'id': 12, 'is_visible': 0, 'x': 888, 'y': 174},\n",
279 | " {'id': 13, 'is_visible': 1, 'x': 924, 'y': 206},\n",
280 | " {'id': 14, 'is_visible': 1, 'x': 1013, 'y': 203},\n",
281 | " {'id': 15, 'is_visible': 1, 'x': 955, 'y': 263}],\n",
282 | " 'head': {'x1': 841, 'x2': 902, 'y1': 145, 'y2': 228},\n",
283 | " 'objpos': {'x': 952, 'y': 222},\n",
284 | " 'scale': 2.4721165021090732}],\n",
285 | " 'frame_sec': 11,\n",
286 | " 'image': '015601864.jpg',\n",
287 | " 'img_train': 1,\n",
288 | " 'single_person': [1, 2],\n",
289 | " 'version': '12',\n",
290 | " 'video_name': 'https://www.youtube.com/watch?v=aAOusnrSsHI',\n",
291 | " 'vididx': 1660}\n",
292 | "\n",
293 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n",
294 | " 'annorects': [{'annopoints': [{'id': 2, 'is_visible': '1', 'x': 806, 'y': 543},\n",
295 | " {'id': 3, 'is_visible': '1', 'x': 720, 'y': 593},\n",
296 | " {'id': 6, 'is_visible': '1', 'x': 763, 'y': 568},\n",
297 | " {'id': 7, 'is_visible': '0', 'x': 683, 'y': 290},\n",
298 | " {'id': 8,\n",
299 | " 'is_visible': None,\n",
300 | " 'x': 682,\n",
301 | " 'y': 256},\n",
302 | " {'id': 9, 'is_visible': None, 'x': 676, 'y': 68},\n",
303 | " {'id': 10,\n",
304 | " 'is_visible': '1',\n",
305 | " 'x': 563,\n",
306 | " 'y': 296},\n",
307 | " {'id': 11,\n",
308 | " 'is_visible': '1',\n",
309 | " 'x': 555,\n",
310 | " 'y': 410},\n",
311 | " {'id': 12,\n",
312 | " 'is_visible': '0',\n",
313 | " 'x': 647,\n",
314 | " 'y': 281},\n",
315 | " {'id': 13,\n",
316 | " 'is_visible': '1',\n",
317 | " 'x': 719,\n",
318 | " 'y': 299},\n",
319 | " {'id': 14,\n",
320 | " 'is_visible': '1',\n",
321 | " 'x': 711,\n",
322 | " 'y': 516},\n",
323 | " {'id': 15,\n",
324 | " 'is_visible': '1',\n",
325 | " 'x': 545,\n",
326 | " 'y': 466}],\n",
327 | " 'head': {'x1': 607, 'x2': 752, 'y1': 70, 'y2': 255},\n",
328 | " 'objpos': {'x': 619, 'y': 329},\n",
329 | " 'scale': 5.6412764513007154},\n",
330 | " {'annopoints': [{'id': 2, 'is_visible': '1', 'x': 987, 'y': 607},\n",
331 | " {'id': 3,\n",
332 | " 'is_visible': '1',\n",
333 | " 'x': 1194,\n",
334 | " 'y': 571},\n",
335 | " {'id': 6,\n",
336 | " 'is_visible': '1',\n",
337 | " 'x': 1091,\n",
338 | " 'y': 589},\n",
339 | " {'id': 7,\n",
340 | " 'is_visible': '1',\n",
341 | " 'x': 1038,\n",
342 | " 'y': 292},\n",
343 | " {'id': 8,\n",
344 | " 'is_visible': None,\n",
345 | " 'x': 1025,\n",
346 | " 'y': 261},\n",
347 | " {'id': 9, 'is_visible': None, 'x': 947, 'y': 74},\n",
348 | " {'id': 10,\n",
349 | " 'is_visible': '0',\n",
350 | " 'x': 914,\n",
351 | " 'y': 539},\n",
352 | " {'id': 11,\n",
353 | " 'is_visible': '1',\n",
354 | " 'x': 955,\n",
355 | " 'y': 470},\n",
356 | " {'id': 12,\n",
357 | " 'is_visible': '1',\n",
358 | " 'x': 931,\n",
359 | " 'y': 315},\n",
360 | " {'id': 13,\n",
361 | " 'is_visible': '1',\n",
362 | " 'x': 1145,\n",
363 | " 'y': 269},\n",
364 | " {'id': 14,\n",
365 | " 'is_visible': '1',\n",
366 | " 'x': 1226,\n",
367 | " 'y': 475},\n",
368 | " {'id': 15,\n",
369 | " 'is_visible': '1',\n",
370 | " 'x': 1096,\n",
371 | " 'y': 433}],\n",
372 | " 'head': {'x1': 903, 'x2': 1070, 'y1': 73, 'y2': 263},\n",
373 | " 'objpos': {'x': 1010, 'y': 412},\n",
374 | " 'scale': 6.0710513092873786},\n",
375 | " {'annopoints': [{'id': 2, 'is_visible': '1', 'x': 228, 'y': 537},\n",
376 | " {'id': 3, 'is_visible': '1', 'x': 74, 'y': 536},\n",
377 | " {'id': 6, 'is_visible': '1', 'x': 151, 'y': 537},\n",
378 | " {'id': 7, 'is_visible': '1', 'x': 129, 'y': 251},\n",
379 | " {'id': 8,\n",
380 | " 'is_visible': None,\n",
381 | " 'x': 123,\n",
382 | " 'y': 218},\n",
383 | " {'id': 9, 'is_visible': None, 'x': 89, 'y': 31},\n",
384 | " {'id': 10,\n",
385 | " 'is_visible': '0',\n",
386 | " 'x': 220,\n",
387 | " 'y': 373},\n",
388 | " {'id': 11,\n",
389 | " 'is_visible': '1',\n",
390 | " 'x': 297,\n",
391 | " 'y': 456},\n",
392 | " {'id': 12,\n",
393 | " 'is_visible': '1',\n",
394 | " 'x': 232,\n",
395 | " 'y': 251},\n",
396 | " {'id': 13, 'is_visible': '1', 'x': 26, 'y': 251},\n",
397 | " {'id': 14,\n",
398 | " 'is_visible': '1',\n",
399 | " 'x': 26,\n",
400 | " 'y': 423}],\n",
401 | " 'head': {'x1': 27, 'x2': 186, 'y1': 36, 'y2': 214},\n",
402 | " 'objpos': {'x': 133, 'y': 315},\n",
403 | " 'scale': 5.7281620088820802}],\n",
404 | " 'frame_sec': 84,\n",
405 | " 'image': '015599452.jpg',\n",
406 | " 'img_train': 1,\n",
407 | " 'single_person': [3],\n",
408 | " 'version': '12',\n",
409 | " 'video_name': 'https://www.youtube.com/watch?v=aAOusnrSsHI',\n",
410 | " 'vididx': 1660}\n",
411 | "\n",
412 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n",
413 | " 'annorects': [{'annopoints': [{'id': 6, 'is_visible': 1, 'x': 974, 'y': 446},\n",
414 | " {'id': 7, 'is_visible': 1, 'x': 985, 'y': 253},\n",
415 | " {'id': 8,\n",
416 | " 'is_visible': None,\n",
417 | " 'x': 982.75909999999999,\n",
418 | " 'y': 235.96940000000001},\n",
419 | " {'id': 9,\n",
420 | " 'is_visible': None,\n",
421 | " 'x': 962.24090000000001,\n",
422 | " 'y': 80.030600000000007},\n",
423 | " {'id': 0, 'is_visible': 1, 'x': 804, 'y': 711},\n",
424 | " {'id': 1, 'is_visible': 1, 'x': 816, 'y': 510},\n",
425 | " {'id': 2, 'is_visible': 1, 'x': 908, 'y': 438},\n",
426 | " {'id': 3, 'is_visible': 1, 'x': 1040, 'y': 454},\n",
427 | " {'id': 4, 'is_visible': 1, 'x': 906, 'y': 528},\n",
428 | " {'id': 5, 'is_visible': 1, 'x': 883, 'y': 707},\n",
429 | " {'id': 10, 'is_visible': 1, 'x': 869, 'y': 214},\n",
430 | " {'id': 11, 'is_visible': 1, 'x': 798, 'y': 340},\n",
431 | " {'id': 12, 'is_visible': 1, 'x': 902, 'y': 253},\n",
432 | " {'id': 13, 'is_visible': 1, 'x': 1067, 'y': 253},\n",
433 | " {'id': 14, 'is_visible': 1, 'x': 1167, 'y': 353},\n",
434 | " {'id': 15,\n",
435 | " 'is_visible': 1,\n",
436 | " 'x': 1142,\n",
437 | " 'y': 478}],\n",
438 | " 'head': {'x1': 914, 'x2': 1031, 'y1': 79, 'y2': 237},\n",
439 | " 'objpos': {'x': 966, 'y': 340},\n",
440 | " 'scale': 4.7184878933827941},\n",
441 | " {'annopoints': [{'id': 6, 'is_visible': 1, 'x': 471, 'y': 512},\n",
442 | " {'id': 7, 'is_visible': 1, 'x': 463, 'y': 268},\n",
443 | " {'id': 8,\n",
444 | " 'is_visible': None,\n",
445 | " 'x': 472.46640000000002,\n",
446 | " 'y': 220.85730000000001},\n",
447 | " {'id': 9,\n",
448 | " 'is_visible': None,\n",
449 | " 'x': 503.53359999999998,\n",
450 | " 'y': 66.142700000000005},\n",
451 | " {'id': 0, 'is_visible': 1, 'x': 667, 'y': 633},\n",
452 | " {'id': 1, 'is_visible': 1, 'x': 675, 'y': 462},\n",
453 | " {'id': 2, 'is_visible': 1, 'x': 567, 'y': 519},\n",
454 | " {'id': 3, 'is_visible': 1, 'x': 375, 'y': 504},\n",
455 | " {'id': 4, 'is_visible': 0, 'x': 543, 'y': 476},\n",
456 | " {'id': 5, 'is_visible': 0, 'x': 532, 'y': 651},\n",
457 | " {'id': 10, 'is_visible': 1, 'x': 702, 'y': 267},\n",
458 | " {'id': 11, 'is_visible': 1, 'x': 721, 'y': 386},\n",
459 | " {'id': 12, 'is_visible': 1, 'x': 584, 'y': 256},\n",
460 | " {'id': 13, 'is_visible': 1, 'x': 341, 'y': 280},\n",
461 | " {'id': 14, 'is_visible': 1, 'x': 310, 'y': 432},\n",
462 | " {'id': 15, 'is_visible': 1, 'x': 372, 'y': 496}],\n",
463 | " 'head': {'x1': 427, 'x2': 549, 'y1': 66, 'y2': 221},\n",
464 | " 'objpos': {'x': 489, 'y': 383},\n",
465 | " 'scale': 4.734087451663731}],\n",
466 | " 'frame_sec': 135,\n",
467 | " 'image': '005808361.jpg',\n",
468 | " 'img_train': 1,\n",
469 | " 'single_person': [],\n",
470 | " 'version': '12',\n",
471 | " 'video_name': 'https://www.youtube.com/watch?v=s1tES1dQA74',\n",
472 | " 'vididx': 2462}\n",
473 | "\n",
474 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n",
475 | " 'annorects': [{'annopoints': [{'id': 6, 'is_visible': 0, 'x': 248, 'y': 341},\n",
476 | " {'id': 7, 'is_visible': 1, 'x': 279, 'y': 263},\n",
477 | " {'id': 8,\n",
478 | " 'is_visible': None,\n",
479 | " 'x': 277.02100000000002,\n",
480 | " 'y': 268.77859999999998},\n",
481 | " {'id': 9,\n",
482 | " 'is_visible': None,\n",
483 | " 'x': 305.97899999999998,\n",
484 | " 'y': 184.22139999999999},\n",
485 | " {'id': 0, 'is_visible': 1, 'x': 301, 'y': 461},\n",
486 | " {'id': 1, 'is_visible': 1, 'x': 305, 'y': 375},\n",
487 | " {'id': 2, 'is_visible': 1, 'x': 201, 'y': 340},\n",
488 | " {'id': 3, 'is_visible': 0, 'x': 294, 'y': 342},\n",
489 | " {'id': 4, 'is_visible': 0, 'x': 335, 'y': 370},\n",
490 | " {'id': 5, 'is_visible': 1, 'x': 331, 'y': 455},\n",
491 | " {'id': 10, 'is_visible': 1, 'x': 328, 'y': 354},\n",
492 | " {'id': 11, 'is_visible': 1, 'x': 260, 'y': 335},\n",
493 | " {'id': 12, 'is_visible': 1, 'x': 244, 'y': 261},\n",
494 | " {'id': 13, 'is_visible': 1, 'x': 314, 'y': 264},\n",
495 | " {'id': 14, 'is_visible': 1, 'x': 327, 'y': 320},\n",
496 | " {'id': 15, 'is_visible': 0, 'x': 362, 'y': 346}],\n",
497 | " 'head': {'x1': 252, 'x2': 331, 'y1': 187, 'y2': 266},\n",
498 | " 'objpos': {'x': 291, 'y': 353},\n",
499 | " 'scale': 2.681348914259388},\n",
500 | " {'annopoints': [{'id': 6, 'is_visible': 0, 'x': 399, 'y': 374},\n",
501 | " {'id': 7, 'is_visible': 1, 'x': 498, 'y': 317},\n",
502 | " {'id': 8,\n",
503 | " 'is_visible': None,\n",
504 | " 'x': 504.59530000000001,\n",
505 | " 'y': 315.17579999999998},\n",
506 | " {'id': 9,\n",
507 | " 'is_visible': None,\n",
508 | " 'x': 585.40470000000005,\n",
509 | " 'y': 292.82420000000002},\n",
510 | " {'id': 0, 'is_visible': 1, 'x': 515, 'y': 512},\n",
511 | " {'id': 1, 'is_visible': 1, 'x': 514, 'y': 420},\n",
512 | " {'id': 2, 'is_visible': 1, 'x': 406, 'y': 388},\n",
513 | " {'id': 3, 'is_visible': 0, 'x': 392, 'y': 360},\n",
514 | " {'id': 4, 'is_visible': 0, 'x': 493, 'y': 434},\n",
515 | " {'id': 5, 'is_visible': 1, 'x': 518, 'y': 504},\n",
516 | " {'id': 10, 'is_visible': 1, 'x': 628, 'y': 426},\n",
517 | " {'id': 11, 'is_visible': 1, 'x': 551, 'y': 398},\n",
518 | " {'id': 12, 'is_visible': 1, 'x': 501, 'y': 351},\n",
519 | " {'id': 13, 'is_visible': 1, 'x': 495, 'y': 282},\n",
520 | " {'id': 14, 'is_visible': 1, 'x': 425, 'y': 301},\n",
521 | " {'id': 15, 'is_visible': 1, 'x': 483, 'y': 334}],\n",
522 | " 'head': {'x1': 510, 'x2': 580, 'y1': 265, 'y2': 343},\n",
523 | " 'objpos': {'x': 472, 'y': 377},\n",
524 | " 'scale': 2.5153099212621886}],\n",
525 | " 'frame_sec': 240,\n",
526 | " 'image': '086617615.jpg',\n",
527 | " 'img_train': 1,\n",
528 | " 'single_person': [],\n",
529 | " 'version': '12',\n",
530 | " 'video_name': 'https://www.youtube.com/watch?v=s1tES1dQA74',\n",
531 | " 'vididx': 2462}\n",
532 | "\n",
533 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n",
534 | " 'annorects': [{'annopoints': [{'id': 6, 'is_visible': 1, 'x': 904, 'y': 237},\n",
535 | " {'id': 7, 'is_visible': 1, 'x': 858, 'y': 135},\n",
536 | " {'id': 8,\n",
537 | " 'is_visible': None,\n",
538 | " 'x': 871.18769999999995,\n",
539 | " 'y': 180.42439999999999},\n",
540 | " {'id': 9,\n",
541 | " 'is_visible': None,\n",
542 | " 'x': 835.81230000000005,\n",
543 | " 'y': 58.575600000000001},\n",
544 | " {'id': 0, 'is_visible': 1, 'x': 980, 'y': 322},\n",
545 | " {'id': 1, 'is_visible': 0, 'x': 896, 'y': 318},\n",
546 | " {'id': 2, 'is_visible': 1, 'x': 865, 'y': 248},\n",
547 | " {'id': 3, 'is_visible': 1, 'x': 943, 'y': 226},\n",
548 | " {'id': 4, 'is_visible': 1, 'x': 948, 'y': 290},\n",
549 | " {'id': 5, 'is_visible': 1, 'x': 881, 'y': 349},\n",
550 | " {'id': 10, 'is_visible': 1, 'x': 772, 'y': 294},\n",
551 | " {'id': 11, 'is_visible': 1, 'x': 754, 'y': 247},\n",
552 | " {'id': 12, 'is_visible': 1, 'x': 792, 'y': 147},\n",
553 | " {'id': 13, 'is_visible': 1, 'x': 923, 'y': 123},\n",
554 | " {'id': 14, 'is_visible': 0, 'x': 995, 'y': 163},\n",
555 | " {'id': 15, 'is_visible': 0, 'x': 961, 'y': 223}],\n",
556 | " 'head': {'x1': 806, 'x2': 901, 'y1': 56, 'y2': 183},\n",
557 | " 'objpos': {'x': 897, 'y': 171},\n",
558 | " 'scale': 3.8064030264805111}],\n",
559 | " 'frame_sec': 6,\n",
560 | " 'image': '060111501.jpg',\n",
561 | " 'img_train': 1,\n",
562 | " 'single_person': [1],\n",
563 | " 'version': '12',\n",
564 | " 'video_name': 'https://www.youtube.com/watch?v=0skrJnNN3_I',\n",
565 | " 'vididx': 89}\n",
566 | "\n",
567 | "{'act': {'act_id': 1, 'act_name': 'curling', 'cat_name': 'sports'},\n",
568 | " 'annorects': [{'annopoints': [{'id': 0, 'is_visible': '1', 'x': 461, 'y': 398},\n",
569 | " {'id': 1, 'is_visible': '1', 'x': 509, 'y': 335},\n",
570 | " {'id': 2, 'is_visible': '1', 'x': 517, 'y': 218},\n",
571 | " {'id': 3, 'is_visible': '1', 'x': 570, 'y': 203},\n",
572 | " {'id': 5, 'is_visible': '0', 'x': 568, 'y': 309},\n",
573 | " {'id': 6, 'is_visible': '1', 'x': 544, 'y': 211},\n",
574 | " {'id': 7, 'is_visible': '1', 'x': 620, 'y': 273},\n",
575 | " {'id': 8,\n",
576 | " 'is_visible': None,\n",
577 | " 'x': 614,\n",
578 | " 'y': 267},\n",
579 | " {'id': 9,\n",
580 | " 'is_visible': None,\n",
581 | " 'x': 668,\n",
582 | " 'y': 326},\n",
583 | " {'id': 10,\n",
584 | " 'is_visible': '1',\n",
585 | " 'x': 537,\n",
586 | " 'y': 288},\n",
587 | " {'id': 11,\n",
588 | " 'is_visible': '1',\n",
589 | " 'x': 503,\n",
590 | " 'y': 234},\n",
591 | " {'id': 12,\n",
592 | " 'is_visible': '1',\n",
593 | " 'x': 587,\n",
594 | " 'y': 280},\n",
595 | " {'id': 13,\n",
596 | " 'is_visible': '1',\n",
597 | " 'x': 652,\n",
598 | " 'y': 265},\n",
599 | " {'id': 14,\n",
600 | " 'is_visible': '1',\n",
601 | " 'x': 636,\n",
602 | " 'y': 356},\n",
603 | " {'id': 15,\n",
604 | " 'is_visible': '1',\n",
605 | " 'x': 621,\n",
606 | " 'y': 417}],\n",
607 | " 'head': {'x1': 609, 'x2': 674, 'y1': 259, 'y2': 335},\n",
608 | " 'objpos': {'x': 582, 'y': 268},\n",
609 | " 'scale': 2.4001199970001501},\n",
610 | " {'annopoints': [{'id': 0, 'is_visible': '1', 'x': 896, 'y': 436},\n",
611 | " {'id': 1, 'is_visible': '0', 'x': 875, 'y': 397},\n",
612 | " {'id': 2, 'is_visible': '1', 'x': 885, 'y': 295},\n",
613 | " {'id': 3, 'is_visible': '1', 'x': 852, 'y': 363},\n",
614 | " {'id': 4, 'is_visible': '1', 'x': 797, 'y': 442},\n",
615 | " {'id': 5, 'is_visible': '1', 'x': 823, 'y': 505},\n",
616 | " {'id': 6, 'is_visible': '1', 'x': 869, 'y': 329},\n",
617 | " {'id': 7, 'is_visible': '1', 'x': 737, 'y': 323},\n",
618 | " {'id': 8,\n",
619 | " 'is_visible': None,\n",
620 | " 'x': 719,\n",
621 | " 'y': 326},\n",
622 | " {'id': 9,\n",
623 | " 'is_visible': None,\n",
624 | " 'x': 648,\n",
625 | " 'y': 338},\n",
626 | " {'id': 10,\n",
627 | " 'is_visible': '1',\n",
628 | " 'x': 804,\n",
629 | " 'y': 305},\n",
630 | " {'id': 11,\n",
631 | " 'is_visible': '1',\n",
632 | " 'x': 804,\n",
633 | " 'y': 237},\n",
634 | " {'id': 12,\n",
635 | " 'is_visible': '1',\n",
636 | " 'x': 741,\n",
637 | " 'y': 285},\n",
638 | " {'id': 13,\n",
639 | " 'is_visible': '1',\n",
640 | " 'x': 732,\n",
641 | " 'y': 361},\n",
642 | " {'id': 14,\n",
643 | " 'is_visible': '1',\n",
644 | " 'x': 758,\n",
645 | " 'y': 411},\n",
646 | " {'id': 15,\n",
647 | " 'is_visible': '1',\n",
648 | " 'x': 757,\n",
649 | " 'y': 485}],\n",
650 | " 'head': {'x1': 652, 'x2': 716, 'y1': 301, 'y2': 364},\n",
651 | " 'objpos': {'x': 765, 'y': 394},\n",
652 | " 'scale': 2.1553282812601888}],\n",
653 | " 'frame_sec': 81,\n",
654 | " 'image': '070807258.jpg',\n",
655 | " 'img_train': 1,\n",
656 | " 'single_person': [],\n",
657 | " 'version': '12',\n",
658 | " 'video_name': 'https://www.youtube.com/watch?v=0skrJnNN3_I',\n",
659 | " 'vididx': 89}\n",
660 | "\n"
661 | ]
662 | }
663 | ],
664 | "source": [
665 | "for n in range(10):\n",
666 | " pprint(load_anno(keypoints,n))\n",
667 | " print()"
668 | ]
669 | },
670 | {
671 | "cell_type": "code",
672 | "execution_count": 282,
673 | "metadata": {
674 | "collapsed": false
675 | },
676 | "outputs": [
677 | {
678 | "name": "stdout",
679 | "output_type": "stream",
680 | "text": [
681 | "{'act': {'act_id': 487,\n",
682 | " 'act_name': 'sitting, arts and crafts, carving wood, weaving, spinning wool',\n",
683 | " 'cat_name': 'miscellaneous'},\n",
684 | " 'annorects': [{'annopoints': [{'id': 0,\n",
685 | " 'is_visible': '1',\n",
686 | " 'x': 435,\n",
687 | " 'y': 1066},\n",
688 | " {'id': 1, 'is_visible': '1', 'x': 181, 'y': 811},\n",
689 | " {'id': 2, 'is_visible': '1', 'x': 668, 'y': 845},\n",
690 | " {'id': 3, 'is_visible': '1', 'x': 879, 'y': 879},\n",
691 | " {'id': 4,\n",
692 | " 'is_visible': '1',\n",
693 | " 'x': 649,\n",
694 | " 'y': 1003},\n",
695 | " {'id': 5,\n",
696 | " 'is_visible': '1',\n",
697 | " 'x': 448,\n",
698 | " 'y': 1003},\n",
699 | " {'id': 6, 'is_visible': '1', 'x': 774, 'y': 862},\n",
700 | " {'id': 7, 'is_visible': '1', 'x': 817, 'y': 397},\n",
701 | " {'id': 8,\n",
702 | " 'is_visible': None,\n",
703 | " 'x': 830,\n",
704 | " 'y': 266},\n",
705 | " {'id': 9, 'is_visible': None, 'x': 857, 'y': -1},\n",
706 | " {'id': 10,\n",
707 | " 'is_visible': '1',\n",
708 | " 'x': 606,\n",
709 | " 'y': 655},\n",
710 | " {'id': 11,\n",
711 | " 'is_visible': '1',\n",
712 | " 'x': 562,\n",
713 | " 'y': 690},\n",
714 | " {'id': 12,\n",
715 | " 'is_visible': '1',\n",
716 | " 'x': 615,\n",
717 | " 'y': 395},\n",
718 | " {'id': 13,\n",
719 | " 'is_visible': '1',\n",
720 | " 'x': 1018,\n",
721 | " 'y': 398},\n",
722 | " {'id': 14,\n",
723 | " 'is_visible': '1',\n",
724 | " 'x': 1065,\n",
725 | " 'y': 718},\n",
726 | " {'id': 15,\n",
727 | " 'is_visible': '1',\n",
728 | " 'x': 885,\n",
729 | " 'y': 590}],\n",
730 | " 'head': {'x1': 740, 'x2': 948, 'y1': 0, 'y2': 265},\n",
731 | " 'objpos': {'x': 814, 'y': 758},\n",
732 | " 'scale': 8.0851508334724329}],\n",
733 | " 'frame_sec': 150,\n",
734 | " 'image': '058019490.jpg',\n",
735 | " 'img_train': 1,\n",
736 | " 'single_person': [1],\n",
737 | " 'version': '12',\n",
738 | " 'video_name': 'https://www.youtube.com/watch?v=UQYoVlVX68w',\n",
739 | " 'vididx': 1367}\n"
740 | ]
741 | }
742 | ],
743 | "source": [
744 | "pprint(load_anno(keypoints,18099))\n",
745 | "\n"
746 | ]
747 | },
748 | {
749 | "cell_type": "code",
750 | "execution_count": 221,
751 | "metadata": {
752 | "collapsed": false
753 | },
754 | "outputs": [],
755 | "source": [
756 | "\n",
757 | "n=5\n"
758 | ]
759 | },
760 | {
761 | "cell_type": "code",
762 | "execution_count": 271,
763 | "metadata": {
764 | "collapsed": false
765 | },
766 | "outputs": [
767 | {
768 | "data": {
769 | "text/plain": [
770 | "array(['-08Vnk8XONY'],\n",
771 | " dtype='= 1, 'octave should >= 1'
25 | # return starting_range, ending_range, octave
26 |
27 | def getJetColor(v, vmin, vmax):
28 | c = np.zeros((3))
29 | if (v < vmin):
30 | v = vmin
31 | if (v > vmax):
32 | v = vmax
33 | dv = vmax - vmin
34 | if (v < (vmin + 0.125 * dv)):
35 | c[0] = 256 * (0.5 + (v * 4)) #B: 0.5 ~ 1
36 | elif (v < (vmin + 0.375 * dv)):
37 | c[0] = 255
38 | c[1] = 256 * (v - 0.125) * 4 #G: 0 ~ 1
39 | elif (v < (vmin + 0.625 * dv)):
40 | c[0] = 256 * (-4 * v + 2.5) #B: 1 ~ 0
41 | c[1] = 255
42 | c[2] = 256 * (4 * (v - 0.375)) #R: 0 ~ 1
43 | elif (v < (vmin + 0.875 * dv)):
44 | c[1] = 256 * (-4 * v + 3.5) #G: 1 ~ 0
45 | c[2] = 255
46 | else:
47 | c[2] = 256 * (-4 * v + 4.5) #R: 1 ~ 0.5
48 | return c
49 |
50 | def colorize(gray_img):
51 | out = np.zeros(gray_img.shape + (3,))
52 | for y in range(out.shape[0]):
53 | for x in range(out.shape[1]):
54 | out[y,x,:] = getJetColor(gray_img[y,x], 0, 1)
55 | return out
56 |
57 | def padRightDownCorner(img, stride, padValue):
58 | h = img.shape[0]
59 | w = img.shape[1]
60 |
61 | pad = 4 * [None]
62 | pad[0] = 0 # up
63 | pad[1] = 0 # left
64 | pad[2] = 0 if (h%stride==0) else stride - (h % stride) # down
65 | pad[3] = 0 if (w%stride==0) else stride - (w % stride) # right
66 |
67 | img_padded = img
68 | pad_up = np.tile(img_padded[0:1,:,:]*0 + padValue, (pad[0], 1, 1))
69 | img_padded = np.concatenate((pad_up, img_padded), axis=0)
70 | pad_left = np.tile(img_padded[:,0:1,:]*0 + padValue, (1, pad[1], 1))
71 | img_padded = np.concatenate((pad_left, img_padded), axis=1)
72 | pad_down = np.tile(img_padded[-2:-1,:,:]*0 + padValue, (pad[2], 1, 1))
73 | img_padded = np.concatenate((img_padded, pad_down), axis=0)
74 | pad_right = np.tile(img_padded[:,-2:-1,:]*0 + padValue, (1, pad[3], 1))
75 | img_padded = np.concatenate((img_padded, pad_right), axis=1)
76 |
77 | return img_padded, pad
78 |
--------------------------------------------------------------------------------