├── .gitignore ├── README.md ├── __init__.py ├── config_template.py ├── configs ├── TrainingStateConfigHandler.py ├── Validator.py ├── __init__.py ├── base_config.py └── config_schema.py ├── datasets ├── Augmenter.py ├── __init__.py ├── coco_dataset.py ├── coco_dataset_dev.py ├── coco_to_hdf5_db.py ├── dataset_base.py └── lsp_dataset.py ├── evaluations └── eval_coco_openpose.py ├── models ├── __init__.py ├── model_base.py ├── model_handler_base.py └── model_openpose.py ├── network.py ├── predict_image.py ├── predict_webcam.py ├── requirements.txt ├── skeletons ├── gt_generators │ ├── gt_generator_base.py │ └── gt_generator_openpose.py ├── joint_converteres │ ├── joint_converter_base.py │ ├── joint_converter_coco_rtpose2d.py │ └── joint_converter_lsp_rtpose2d.py ├── joint_setup.py ├── skeleton_config_base.py ├── skeleton_config_openpose.py └── skeleton_config_rtpose2d.py ├── training ├── __init__.py ├── loss_functions.py ├── tensorboard_logger.py ├── train_openpose.py ├── train_prod.py └── train_utils.py ├── util_img.py ├── utils ├── util.py ├── util_eval.py ├── util_joint_map.py ├── util_paf_map.py ├── util_predict.py └── util_skeleton.py ├── visualization.py └── workbenches └── export_rarepose_results.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | .idea 101 | 102 | configs/training_state_config.ini 103 | config.py 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | - Install requirements.txt 2 | - Install OpenCV 3 | - Install tkinter 4 | - copy config_template.py to config.py and edit as required (for reference look at configs/base_config) 5 | 6 | This is a PyTorch implementation of Realtime_Multi-Person_Pose_Estimation (origin code is here https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation and https://github.com/CMU-Perceptual-Computing-Lab/openpose) 7 | The post processing is a little different from OpenPose causing a little different results as well as a few performance problems. 8 | 9 | Converted caffe weights: https://cogsys.reutlingen-university.de/pub/files/op_converted.pth 10 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noboevbo/openpose-pytorch/4bc9cf4c927fdb507d89198724a237800cad9b3e/__init__.py -------------------------------------------------------------------------------- /config_template.py: -------------------------------------------------------------------------------- 1 | from configs.Validator import ObjectValidator 2 | from configs.base_config import GeneralConfig, ConvertConfig, ConfigBase, \ 3 | DatasetConfigCoco, NetworkConfigOpenPose, TrainingConfigOpenPose 4 | from configs.config_schema import cfg_schema 5 | 6 | 7 | default_path = "/home/USERNAME/rtpose2d_data/" 8 | dataset_dir = "/home/USERNAME/datasets/COCO" 9 | 10 | 11 | class OpenPoseConfig(ConfigBase): 12 | general = GeneralConfig() 13 | convert = ConvertConfig() 14 | network = NetworkConfigOpenPose(default_path) 15 | train = TrainingConfigOpenPose(default_path) 16 | dataset = DatasetConfigCoco(dataset_dir) 17 | 18 | def __init__(self): 19 | super().__init__() 20 | self.network.model_state_file = "/media/disks/beta/models/openpose/itsc18_sim_full_c48.pth" 21 | 22 | self.train.batch_size = 10 23 | self.train.learning_rate = 0.001 24 | 25 | 26 | cfg = OpenPoseConfig() 27 | 28 | cfg_validator = ObjectValidator(cfg_schema) 29 | if not cfg_validator.validate_object(cfg): 30 | raise SystemError(str(cfg_validator.errors)) 31 | 32 | -------------------------------------------------------------------------------- /configs/TrainingStateConfigHandler.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from configobj import ConfigObj 4 | 5 | curr_dir = os.path.dirname(os.path.realpath(__file__)) 6 | 7 | 8 | class TrainingStateConfigHandler: 9 | def __init__(self, config_path=os.path.join(curr_dir, "training_state_config.ini")): 10 | self.config_path = config_path 11 | self.config = None 12 | self.load_config() 13 | 14 | def load_config(self): 15 | self.config = ConfigObj(self.config_path) -------------------------------------------------------------------------------- /configs/Validator.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from cerberus import Validator 4 | 5 | from collections import Mapping, Sequence 6 | 7 | 8 | class ObjectValidator(Validator): 9 | def __init__(self, *args, **kwargs): 10 | super(ObjectValidator, self).__init__(*args, **kwargs) 11 | self.allow_unknown = True 12 | 13 | def validate_object(self, obj): 14 | return self.validate(obj.__dict__) 15 | 16 | def _validate_type_object(self, value): 17 | # objects which are not Mapping or Sequence types are allowed. 18 | # (Mapping and Sequence types are dealt elsewhere.) 19 | if isinstance(value, object) and \ 20 | not isinstance(value, (Sequence, Mapping)): 21 | return True 22 | 23 | def _validate_schema(self, schema, field, value): 24 | if isinstance(value, (Sequence, Mapping)): 25 | super(ObjectValidator, self)._validate_schema(schema, field, value) 26 | elif isinstance(value, object): 27 | validator = copy.copy(self) 28 | validator.schema = schema 29 | # validator = self._get_child_validator(document_crumb=field, 30 | # schema_crumb=(field, 'schema'), 31 | # schema=schema, 32 | # allow_unknown=self.allow_unknown) 33 | if not validator.validate(value.__dict__): 34 | self._error(validator._errors) -------------------------------------------------------------------------------- /configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noboevbo/openpose-pytorch/4bc9cf4c927fdb507d89198724a237800cad9b3e/configs/__init__.py -------------------------------------------------------------------------------- /configs/base_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from configs.TrainingStateConfigHandler import TrainingStateConfigHandler 4 | 5 | curr_dir = os.path.dirname(os.path.realpath(__file__)) 6 | 7 | 8 | class GeneralConfig(object): 9 | def __init__(self): 10 | self.input_width = 368 11 | self.input_height = 368 12 | self.stride = 8 13 | self.debug_timers = False 14 | self.additional_debug_timers = False 15 | 16 | # Network 17 | 18 | 19 | class NetworkConfigBase(object): 20 | def __init__(self, default_path): 21 | self.use_gpu = 1 22 | self.gpu_device_number = 0 23 | self.model_state_file = "" 24 | self.scale_search = [0.5, 1.0, 1.5, 2.0] 25 | self.pad_color = 255 26 | 27 | self.heatmap_thresh = 0.1 28 | 29 | self.limb_num_samples = 10 30 | self.limb_sample_score_thresh = 0.25 31 | self.limb_samples_over_thresh = 0.5 32 | 33 | self.skeleton_min_limbs = 4 34 | self.skeleton_limb_score = 0.25 35 | self.stage_delay_epochs = [0, 0, 0, 0, 0, 0] # Delays the training of a given stage to the given epoch 36 | 37 | 38 | class NetworkConfigRtPose2D(NetworkConfigBase): 39 | def __init__(self, default_path): 40 | super().__init__(default_path) 41 | self.model_state_file = os.path.join(default_path, "pretrained_models", "rtpose2d", "rtpose2d.pth") 42 | self.training = TrainingConfigRtPose2D(default_path) 43 | 44 | 45 | class NetworkConfigOpenPose(NetworkConfigBase): 46 | def __init__(self, default_path): 47 | super().__init__(default_path) 48 | self.model_state_file = os.path.join(default_path, "pretrained_models", "rtpose2d", "rtpose2d.pth") 49 | self.training = TrainingConfigRtPose2D(default_path) 50 | self.paf_thre = 8.0 51 | self.paf_num_samples = 10 52 | self.paf_thresh_sample_score = 0.05 53 | self.paf_samples_over_thresh = 0.8 54 | 55 | 56 | # Training 57 | 58 | class TrainingConfigBase(object): 59 | def __init__(self, default_path): 60 | self.name = self.__class__.__name__ 61 | self.__checkpoint_config_handler = self.get_training_state_config_handler() 62 | self.checkpoint_cfg = self.__checkpoint_config_handler.config 63 | 64 | self.checkpoint_model_base_dir = os.path.join(default_path, "checkpoints") 65 | self.checkpoint_model_path = self.checkpoint_cfg[self.name]["checkpoint_model_path"] 66 | self.checkpoint_epoch = int(self.checkpoint_cfg[self.name]["checkpoint_epoch"]) 67 | best_loss = self.checkpoint_cfg[self.name]["checkpoint_best_model_loss"] 68 | self.checkpoint_best_model_loss = float(best_loss) if best_loss else None 69 | 70 | self.trained_model_dir = os.path.join(default_path, "trained_models") 71 | self.log_dir = os.path.join(default_path, "logs") 72 | 73 | self.learning_rate = 0.00004 # Base learning rate, is changed per layer grp 74 | self.batch_size = 10 75 | self.gamma = 0.333 # 4 Learning Rate Scheduler 76 | self.stepsize = 210392 # in original code each epoch is 121746 and step change is on 17th epoch 77 | 78 | self.momentum = 0.9 79 | self.weight_decay = 0.0005 80 | self.augmentation = AugmentationConfig() 81 | 82 | def get_training_state_config_handler(self): 83 | state_config_handler = TrainingStateConfigHandler() 84 | if self.name not in state_config_handler.config: 85 | state_config_handler.config[self.name] = {} 86 | state_config_handler.config[self.name]["checkpoint_model_path"] = '' 87 | state_config_handler.config[self.name]["checkpoint_epoch"] = 0 88 | state_config_handler.config[self.name]["checkpoint_best_model_loss"] = '' 89 | state_config_handler.config.write() 90 | return state_config_handler 91 | 92 | def __update_checkpoint_from_cfg(self): 93 | self.checkpoint_model_path = self.checkpoint_cfg[self.name]["checkpoint_model_path"] 94 | self.checkpoint_epoch = int(self.checkpoint_cfg[self.name]["checkpoint_epoch"]) 95 | self.checkpoint_best_model_loss = self.checkpoint_cfg[self.name]["checkpoint_best_model_loss"] 96 | 97 | def update_checkpoint(self, checkpoint_model_path, checkpoint_epoch, best_model_loss): 98 | self.checkpoint_cfg[self.name]["checkpoint_model_path"] = checkpoint_model_path 99 | self.checkpoint_cfg[self.name]["checkpoint_epoch"] = checkpoint_epoch 100 | self.checkpoint_cfg[self.name]["checkpoint_best_model_loss"] = best_model_loss 101 | self.checkpoint_cfg.write() 102 | self.__update_checkpoint_from_cfg() 103 | 104 | 105 | class TrainingConfigRtPose2D(TrainingConfigBase): 106 | def __init__(self, default_path): 107 | super().__init__(default_path) 108 | self.trained_model_dir = os.path.join(self.trained_model_dir, "rtpose2d") 109 | self.log_dir = os.path.join(self.log_dir, "rtpose2d") 110 | self.checkpoint_model_base_dir = os.path.join(self.checkpoint_model_base_dir, "rtpose2d") 111 | 112 | 113 | class TrainingConfigOpenPose(TrainingConfigBase): 114 | def __init__(self, default_path): 115 | super().__init__(default_path) 116 | self.trained_model_dir = os.path.join(self.trained_model_dir, "openpose") 117 | self.log_dir = os.path.join(self.log_dir, "openpose") 118 | self.checkpoint_model_base_dir = os.path.join(self.checkpoint_model_base_dir, "openpose") 119 | 120 | 121 | # Augmentation 122 | 123 | class AugmentationConfig(object): 124 | def __init__(self): 125 | self.target_dist = 0.6 126 | self.scale_prob = 1 127 | self.scale_min = 0.4 128 | self.scale_max = 1.3 129 | self.max_rotate_degree = 50 130 | self.center_perterb_max = 50 131 | self.flip_prob = 0.5 132 | self.sigma = 7 133 | self.sigma_limb = 6 134 | 135 | 136 | # Dataset 137 | 138 | class DatasetConfigBase(object): 139 | def __init__(self, dataset_dir): 140 | self.base_dir = dataset_dir 141 | self.train_annotation_dir = "" 142 | self.train_img_dir = "" 143 | # This setting is used in the coco_to_hdf5 converter 144 | self.train_convert_hdf5 = "" 145 | # This setting is used for training 146 | self.train_hdf5 = "" 147 | 148 | self.val_annotation_dir = "" 149 | self.val_img_dir = "" 150 | # This setting is used in the coco_to_hdf5 converter 151 | self.val_convert_hdf5 = "" 152 | # This setting is used for validate 153 | self.val_hdf5 = "" 154 | 155 | 156 | class DatasetConfigCoco(DatasetConfigBase): 157 | def __init__(self, dataset_dir): 158 | super().__init__(dataset_dir) 159 | self.base_dir = dataset_dir 160 | self.train_annotation_dir = os.path.join(self.base_dir, "annotations/person_keypoints_train2017.json") 161 | self.train_img_dir = os.path.join(self.base_dir, "train2017") 162 | self.train_convert_hdf5 = os.path.join(self.base_dir, "coco_train.h5") 163 | self.train_hdf5 = os.path.join(self.base_dir, "train_dataset.h5") 164 | 165 | self.val_annotation_dir = os.path.join(self.base_dir, "annotations/person_keypoints_val2017.json") 166 | self.val_img_dir = os.path.join(self.base_dir, "val2017") 167 | self.val_convert_hdf5 = os.path.join(self.base_dir, "coco_val.h5") 168 | self.val_hdf5 = os.path.join(self.base_dir, "val_dataset.h5") 169 | self.val_size = 2637 170 | 171 | 172 | # Convert 173 | 174 | class ConvertConfig(object): 175 | def __init__(self): 176 | self.caffe = ConvertCaffeConfig() 177 | 178 | 179 | class ConvertCaffeConfig(object): 180 | def __init__(self): 181 | self.caffe_model = "/home/USERNAME/git/openpose/models/pose/coco/pose_iter_440000.caffemodel" 182 | self.deploy_file = "/home/USERNAME/git/openpose/models/pose/coco/pose_deploy_linevec.prototxt" 183 | self.pytorch_model = "/media/USERNAME/Data/Dump/pose_iter_440000.pth" 184 | self.test_image = "/home/USERNAME/git/openpose/examples/media/COCO_val2014_000000000474.jpg" 185 | 186 | 187 | class ConfigBase(object): 188 | @property 189 | def general(self) -> GeneralConfig: 190 | raise NotImplementedError 191 | 192 | @property 193 | def convert(self) -> ConvertConfig: 194 | raise NotImplementedError 195 | 196 | @property 197 | def network(self) -> NetworkConfigBase: 198 | raise NotImplementedError 199 | 200 | @property 201 | def train(self) -> TrainingConfigBase: 202 | raise NotImplementedError 203 | 204 | @property 205 | def dataset(self) -> DatasetConfigBase: 206 | raise NotImplementedError 207 | -------------------------------------------------------------------------------- /configs/config_schema.py: -------------------------------------------------------------------------------- 1 | cfg_schema_augmentation = { 2 | 'type': 'object', 3 | 'schema': { 4 | 'target_dist': {'type': 'float'}, 5 | 'scale_prob': {'type': 'integer'}, 6 | 'scale_min': {'type': 'float'}, 7 | 'scale_max': {'type': 'float'}, 8 | 'max_rotate_degree': {'type': 'integer'}, 9 | 'center_perterb_max': {'type': 'integer'}, 10 | 'flip_prob': {'type': 'float'}, 11 | 'sigma': {'type': 'integer'}, 12 | } 13 | } 14 | 15 | cfg_schema_convert_caffe = { 16 | 'type': 'object', 17 | 'schema': { 18 | 'caffe_model': {'type': 'string'}, 19 | 'deploy_model': {'type': 'string'}, 20 | 'pytorch_model': {'type': 'string'}, 21 | 'test_image': {'type': 'string'}, 22 | } 23 | } 24 | 25 | cfg_schema_general = { 26 | 'type': 'object', 27 | 'schema': { 28 | 'input_width': {'type': 'integer'}, 29 | 'input_height': {'type': 'integer'}, 30 | 'stride': {'type': 'integer'}, 31 | 'debug_timers': { 'type': 'boolean'}, 32 | 'additional_debug_timers': { 'type': 'boolean'} 33 | } 34 | } 35 | 36 | cfg_schema_convert = { 37 | 'type': 'object', 38 | 'schema': { 39 | 'caffe': cfg_schema_convert_caffe 40 | } 41 | } 42 | 43 | cfg_schema_training = { 44 | 'type': 'object', 45 | 'schema': { 46 | 'weight_dir': {'type': 'string'}, 47 | 'trained_model_dir': {'type': 'string'}, 48 | 'log_dir': {'type': 'string'}, 49 | 'checkpoint_model_base_dir' : {'type': 'string'}, 50 | 'checkpoint_model_path': {'type': 'string', 'nullable': True}, 51 | 'checkpoint_epoch': {'type': 'integer'}, 52 | 'checkpoint_best_model_loss': {'type': 'float', 'nullable': True}, 53 | 54 | 'learning_rate': {'type': 'float'}, # Base learning rate, is changed per layer grp 55 | 'batch_size': {'type': 'integer'}, 56 | 'gamma': {'type': 'float'}, # 4 Learning Rate Scheduler 57 | 'stepsize': {'type': 'integer'}, 58 | # in original code each epoch is 121746 and step change is on 17th epoch 59 | 60 | 'momentum': {'type': 'float'}, 61 | 'weight_decay': {'type': 'float'}, 62 | 'augmentation': cfg_schema_augmentation 63 | } 64 | } 65 | 66 | cfg_schema_network = { 67 | 'type': 'object', 68 | 'schema': { 69 | 'use_gpu': {'type': 'integer'}, 70 | 'gpu_device_number': {'type': 'integer'}, 71 | 'model_state_file': {'type': 'string'}, 72 | 'scale_search': { 73 | 'type': 'list', 74 | 'schema': { 75 | 'type': 'float' 76 | } 77 | }, 78 | 'pad_color': {'type': 'integer'}, 79 | 'heatmap_thresh': {'type': 'float'}, 80 | 'limb_num_samples': {'type': 'integer'}, 81 | 'limb_sample_score_thresh': {'type': 'float'}, 82 | 'limb_samples_over_thresh': {'type': 'float'}, 83 | 'skeleton_min_limbs': {'type': 'integer'}, 84 | 'skeleton_limb_score': {'type': 'float'}, 85 | 'stage_delay_epochs': { 86 | 'type': 'list', 87 | 'schema': { 88 | 'type': 'integer' 89 | } 90 | } 91 | } 92 | } 93 | 94 | cfg_schema_dataset = { 95 | 'type': 'object', 96 | 'schema': { 97 | 'base_dir': {'type': 'string'}, 98 | 'train_annotation_dir': {'type': 'string'}, 99 | 'train_img_dir': {'type': 'string'}, 100 | 'train_convert_hdf5': {'type': 'string'}, 101 | 'train_hdf5': {'type': 'string'}, 102 | 'val_annotation_dir': {'type': 'string'}, 103 | 'val_img_dir': {'type': 'string'}, 104 | 'val_convert_hdf5': {'type': 'string'}, 105 | 'val_hdf5': {'type': 'string'}, 106 | } 107 | } 108 | 109 | cfg_schema = { 110 | 'general': cfg_schema_general, 111 | 'convert': cfg_schema_convert, 112 | 'network': cfg_schema_network, 113 | 'train': cfg_schema_training, 114 | 'dataset': cfg_schema_dataset 115 | } 116 | -------------------------------------------------------------------------------- /datasets/Augmenter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | from math import cos, sin, pi 5 | import cv2 6 | import random 7 | 8 | from config import cfg 9 | from skeletons.skeleton_config_base import SkeletonConfigBase 10 | 11 | 12 | class AugmentSelection: 13 | def __init__(self, flip=False, degree=0., crop=(0, 0), scale=1.): 14 | self.flip = flip 15 | self.degree = degree # rotate 16 | self.crop = crop # shift actually 17 | self.scale = scale 18 | 19 | @staticmethod 20 | def random(): 21 | flip = random.uniform(0., 1.) > cfg.train.augmentation.flip_prob 22 | degree = random.uniform(-1., 1.) * cfg.train.augmentation.max_rotate_degree 23 | scale = 1 24 | if random.uniform(0., 1.) > cfg.train.augmentation.scale_prob: 25 | # TODO: see 'scale improbability' TODO above 26 | scale = (cfg.train.augmentation.scale_max - cfg.train.augmentation.scale_min) * random.uniform(0., 1.) + \ 27 | cfg.train.augmentation.scale_min 28 | x_offset = int(random.uniform(-1., 1.) * cfg.train.augmentation.center_perterb_max) 29 | y_offset = int(random.uniform(-1., 1.) * cfg.train.augmentation.center_perterb_max) 30 | 31 | return AugmentSelection(flip, degree, (x_offset, y_offset), scale) 32 | 33 | @staticmethod 34 | def unrandom(): 35 | flip = False 36 | degree = 0. 37 | scale = 1. 38 | x_offset = 0 39 | y_offset = 0 40 | 41 | return AugmentSelection(flip, degree, (x_offset, y_offset), scale) 42 | 43 | def affine(self, center, scale_self): 44 | # the main idea: we will do all image transformations with one affine matrix. 45 | # this saves lot of cpu and make code significantly shorter 46 | # same affine matrix could be used to transform joint coordinates afterwards 47 | A = self.scale * cos(self.degree / 180. * pi) 48 | B = self.scale * sin(self.degree / 180. * pi) 49 | 50 | divisor = scale_self * self.scale 51 | if divisor <= 0: 52 | divisor = 1 # prevent zero division 53 | 54 | scale_size = cfg.train.augmentation.target_dist / divisor 55 | 56 | (width, height) = center 57 | center_x = width + self.crop[0] 58 | center_y = height + self.crop[1] 59 | 60 | center2zero = np.array([[1., 0., -center_x], 61 | [0., 1., -center_y], 62 | [0., 0., 1.]]) 63 | 64 | rotate = np.array([[A, B, 0], 65 | [-B, A, 0], 66 | [0, 0, 1.]]) 67 | 68 | scale = np.array([[scale_size, 0, 0], 69 | [0, scale_size, 0], 70 | [0, 0, 1.]]) 71 | 72 | flip = np.array([[-1 if self.flip else 1., 0., 0.], 73 | [0., 1., 0.], 74 | [0., 0., 1.]]) 75 | 76 | center2center = np.array([[1., 0., cfg.general.input_width // 2], 77 | [0., 1., cfg.general.input_height // 2], 78 | [0., 0., 1.]]) 79 | 80 | # order of combination is reversed 81 | combined = center2center.dot(flip).dot(scale).dot(rotate).dot(center2zero) 82 | 83 | return combined[0:2] 84 | 85 | 86 | def transform(img, mask, meta, mask_shape: (int, int), skeleton_config: SkeletonConfigBase, aug: AugmentSelection = AugmentSelection.unrandom()): 87 | # warp picture and mask 88 | M = aug.affine(meta['objpos'][0], meta['scale_provided'][0]) 89 | 90 | img = cv2.warpAffine(img, M, (cfg.general.input_height, cfg.general.input_width), flags=cv2.INTER_CUBIC, 91 | borderMode=cv2.BORDER_CONSTANT, borderValue=(127, 127, 127)) 92 | mask = cv2.warpAffine(mask, M, (cfg.general.input_height, cfg.general.input_width), flags=cv2.INTER_CUBIC, 93 | borderMode=cv2.BORDER_CONSTANT, borderValue=255) 94 | mask = cv2.resize(mask, mask_shape, interpolation=cv2.INTER_CUBIC) 95 | # _, mask = cv2.threshold(mask, 128, 255, cv2.THRESH_BINARY) 96 | # assert np.all((mask == 0) | (mask == 255)), "Interpolation of mask should be thresholded only 0 or 255\n" + str(mask) 97 | mask = mask.astype(np.float) / 255. 98 | 99 | original_points = meta['joints'].copy() 100 | original_points[:, :, 2] = 1 # we reuse 3rd column in completely different way here, it is hack 101 | converted_points = np.matmul(M, original_points.transpose([0, 2, 1])).transpose([0, 2, 1]) 102 | meta['joints'][:, :, 0:2] = converted_points 103 | 104 | # we just made image flip, i.e. right leg just became left leg, and vice versa 105 | 106 | if aug.flip: 107 | tmpLeft = meta['joints'][:, skeleton_config.left_parts, :] 108 | tmpRight = meta['joints'][:, skeleton_config.right_parts, :] 109 | meta['joints'][:, skeleton_config.left_parts, :] = tmpRight 110 | meta['joints'][:, skeleton_config.right_parts, :] = tmpLeft 111 | 112 | return img, mask, meta 113 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noboevbo/openpose-pytorch/4bc9cf4c927fdb507d89198724a237800cad9b3e/datasets/__init__.py -------------------------------------------------------------------------------- /datasets/coco_dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | import cv2 5 | import h5py 6 | import numpy as np 7 | import torch 8 | import torch.multiprocessing as mp 9 | 10 | from datasets.Augmenter import AugmentSelection, transform 11 | from datasets.dataset_base import DatasetBase 12 | from models.model_base import NetworkModelBase 13 | from skeletons.gt_generators.gt_generator_base import GroundTruthGeneratorBase 14 | from skeletons.joint_converteres.joint_converter_coco_rtpose2d import JointConverterCocoRtPose2D 15 | from skeletons.skeleton_config_base import SkeletonConfigBase 16 | from skeletons.skeleton_config_openpose import SkeletonConfigOpenPose 17 | from skeletons.skeleton_config_rtpose2d import SkeletonConfigRtPose2D 18 | from util_img import normalize 19 | 20 | 21 | def get_joint_converter(skeleton_config): 22 | if type(skeleton_config) is SkeletonConfigRtPose2D: 23 | return JointConverterCocoRtPose2D() 24 | if type(skeleton_config) is SkeletonConfigOpenPose: 25 | return JointConverterCocoRtPose2D() 26 | 27 | 28 | class CocoDataset(DatasetBase): 29 | def __init__(self, h5files: [str], skeleton_config: SkeletonConfigBase, gt_generator: GroundTruthGeneratorBase, 30 | model: NetworkModelBase, num_samples=None, augment=False): 31 | self.h5s = [h5py.File(fname, "r") for fname in h5files] 32 | self.h5_contents = [(h5['dataset'], h5['images'], h5['miss_masks'] if 'miss_masks' in h5 else None) for h5 in 33 | self.h5s] 34 | self.joint_converter = get_joint_converter(skeleton_config) 35 | self.skeleton_config = skeleton_config 36 | self.keys = [] 37 | self.lock = mp.Lock() 38 | self.augment = augment 39 | self.model = model 40 | self.gt_generator = gt_generator 41 | 42 | self.logger = logging.getLogger("coco_dataset_log") 43 | self.hdlr = logging.FileHandler('coco_dataset.log') 44 | self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') 45 | self.hdlr.setFormatter(self.formatter) 46 | self.logger.addHandler(self.hdlr) 47 | self.logger.setLevel(logging.INFO) 48 | 49 | with self.lock: 50 | for idx, content in enumerate(self.h5_contents): 51 | keys = list(content[0].keys()) 52 | a = len(keys) 53 | if num_samples: 54 | keys = np.random.choice(keys, num_samples, replace=False).tolist() 55 | b = len(keys) 56 | #print(len(keys)) 57 | 58 | self.keys += zip([idx] * len(keys), keys) 59 | 60 | def __getitem__(self, index): 61 | key = self.keys[index] 62 | self.logger.info("IDX[{}], KEY[{}]".format(index, key)) 63 | image, mask_misss, meta = self.read_data(key[0], key[1]) 64 | if image is None or image.shape[0] < 1 or image.shape[1] < 1 or image.shape[2] != 3: 65 | self.logger.error("Error with img idx: {}, key: {}".format(index, key)) 66 | idx_to_use = index+1 67 | if idx_to_use >= self.__len__(): 68 | idx_to_use = 0 69 | return self.__getitem__(idx_to_use) 70 | image, mask_misss, meta, labels = self.transform_data(image, mask_misss, meta) 71 | image = self.get_img_as_tensor(image) 72 | limb_map_masks = self.get_mask_as_tensor(np.repeat(mask_misss[:, :, np.newaxis], self.model.num_limb_maps, axis=2)) 73 | joint_map_masks = self.get_mask_as_tensor(np.repeat(mask_misss[:, :, np.newaxis], self.model.num_joint_maps, axis=2)) 74 | 75 | limb_maps = torch.from_numpy(labels[:self.model.num_limb_maps, :, :]).float() 76 | joint_maps = torch.from_numpy(labels[self.model.num_limb_maps:, :, :]).float() 77 | return {'image': image, 'joint_map_gt': joint_maps, 'limb_map_gt': limb_maps, 78 | 'limb_map_masks': limb_map_masks, 'joint_map_masks': joint_map_masks} 79 | 80 | def __len__(self): 81 | return len(self.keys) 82 | 83 | def get_dataset_id_from_index(self, index): 84 | key = self.keys[index] 85 | image, mask_misss, meta = self.read_data(key[0], key[1]) 86 | return int(meta["image"]) 87 | 88 | def get_img_as_tensor(self, img): 89 | image = np.transpose(img, (2, 0, 1)) # transpose to channels, height, width 90 | image = normalize(image) 91 | return torch.from_numpy(image).float() 92 | 93 | def get_mask_as_tensor(self, img): 94 | image = np.transpose(img, (2, 0, 1)) # transpose to channels, height, width 95 | return torch.from_numpy(image).float() 96 | 97 | def transform_data(self, img, mask, meta): 98 | aug = AugmentSelection.random() if self.augment else AugmentSelection.unrandom() 99 | img, mask, meta = transform(img, mask, meta, self.model.mask_shape, self.skeleton_config, aug=aug) 100 | labels = self.gt_generator.get_ground_truth(meta['joints'], mask) 101 | 102 | return img, mask, meta, labels 103 | 104 | def read_data(self, num, key): 105 | content = self.h5_contents[num] 106 | dataset, images, mask_misss = content 107 | return self.read_data_new(dataset, images, mask_misss, key) 108 | 109 | def read_data_new(self, dataset, images, mask_misss, key): 110 | with self.lock: #HDF5 is not threadsafe, so lock while accessing it. 111 | entry = dataset[key] 112 | meta = json.loads(entry.value) 113 | img = images[meta['image']].value 114 | mask_miss = mask_misss[meta['image']].value 115 | #debug = json.loads(entry.attrs['meta']) 116 | meta["joints"] = self.joint_converter.get_converted_joint_list(meta['joints']) 117 | 118 | img = cv2.imdecode(img, flags=-1) 119 | mask_miss = cv2.imdecode(mask_miss, flags=-1) # TODO: Mask_Miss always available? 120 | 121 | return img, mask_miss, meta 122 | -------------------------------------------------------------------------------- /datasets/coco_dataset_dev.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import cv2 4 | import h5py 5 | import numpy as np 6 | import torch 7 | import torch.multiprocessing as mp 8 | from torch.utils.data import Dataset 9 | 10 | from datasets.Augmenter import AugmentSelection, transform 11 | from datasets.dataset_base import DatasetBase 12 | from models.model_base import NetworkModelBase 13 | from skeletons.gt_generators.gt_generator_base import GroundTruthGeneratorBase 14 | from skeletons.joint_converteres.joint_converter_coco_rtpose2d import JointConverterCocoRtPose2D 15 | from skeletons.skeleton_config_base import SkeletonConfigBase 16 | from skeletons.skeleton_config_openpose import SkeletonConfigOpenPose 17 | from skeletons.skeleton_config_rtpose2d import SkeletonConfigRtPose2D 18 | from util_img import normalize 19 | 20 | 21 | def get_joint_converter(skeleton_config): 22 | if type(skeleton_config) is SkeletonConfigRtPose2D: 23 | return JointConverterCocoRtPose2D() 24 | if type(skeleton_config) is SkeletonConfigOpenPose: 25 | return JointConverterCocoRtPose2D() 26 | 27 | 28 | class CocoDataset(DatasetBase): 29 | def __init__(self, h5files: [str], skeleton_config: SkeletonConfigBase, gt_generator: GroundTruthGeneratorBase, 30 | model: NetworkModelBase, num_samples=None, augment=False): 31 | self.h5s = [h5py.File(fname, "r") for fname in h5files] 32 | self.h5_contents = [(h5['dataset'], h5['images'], h5['miss_masks'] if 'miss_masks' in h5 else None) for h5 in 33 | self.h5s] 34 | self.joint_converter = get_joint_converter(skeleton_config) 35 | self.skeleton_config = skeleton_config 36 | self.keys = [] 37 | self.lock = mp.Lock() 38 | self.augment = augment 39 | self.model = model 40 | self.gt_generator = gt_generator 41 | with self.lock: 42 | for idx, content in enumerate(self.h5_contents): 43 | keys = list(content[0].keys()) 44 | a = len(keys) 45 | if num_samples: 46 | keys = np.random.choice(keys, num_samples, replace=False).tolist() 47 | b = len(keys) 48 | #print(len(keys)) 49 | 50 | self.keys += zip([idx] * len(keys), keys) 51 | 52 | def __getitem__(self, index): 53 | key = self.keys[0] 54 | image, mask_misss, meta = self.read_data(key[0], key[1]) 55 | image, mask_misss, meta, labels = self.transform_data(image, mask_misss, meta) 56 | image = self.get_img_as_tensor(image) 57 | limb_map_masks = self.get_mask_as_tensor(np.repeat(mask_misss[:, :, np.newaxis], self.model.num_limb_maps, axis=2)) 58 | joint_map_masks = self.get_mask_as_tensor(np.repeat(mask_misss[:, :, np.newaxis], self.model.num_joint_maps, axis=2)) 59 | 60 | limb_maps = torch.from_numpy(labels[:self.model.num_limb_maps, :, :]).float() 61 | joint_maps = torch.from_numpy(labels[self.model.num_limb_maps:, :, :]).float() 62 | return {'image': image, 'joint_map_gt': joint_maps, 'limb_map_gt': limb_maps, 63 | 'limb_map_masks': limb_map_masks, 'joint_map_masks': joint_map_masks, 'meta': meta} 64 | 65 | def __len__(self): 66 | return 500 67 | return len(self.keys) 68 | 69 | def get_dataset_id_from_index(self, index): 70 | key = self.keys[index] 71 | image, mask_misss, meta = self.read_data(key[0], key[1]) 72 | return int(meta["image"]) 73 | 74 | def get_img_as_tensor(self, img): 75 | image = np.transpose(img, (2, 0, 1)) # transpose to channels, height, width 76 | image = normalize(image) 77 | return torch.from_numpy(image).float() 78 | 79 | def get_mask_as_tensor(self, img): 80 | image = np.transpose(img, (2, 0, 1)) # transpose to channels, height, width 81 | return torch.from_numpy(image).float() 82 | 83 | def transform_data(self, img, mask, meta): 84 | aug = AugmentSelection.random() if self.augment else AugmentSelection.unrandom() 85 | img, mask, meta = transform(img, mask, meta, self.model.mask_shape, self.skeleton_config, aug=aug) 86 | labels = self.gt_generator.get_ground_truth(meta['joints'], mask) 87 | 88 | return img, mask, meta, labels 89 | 90 | def read_data(self, num, key): 91 | content = self.h5_contents[num] 92 | dataset, images, mask_misss = content 93 | return self.read_data_new(dataset, images, mask_misss, key) 94 | 95 | def read_data_new(self, dataset, images, mask_misss, key): 96 | with self.lock: #HDF5 is not threadsafe, so lock while accessing it. 97 | entry = dataset[key] 98 | meta = json.loads(entry.value) 99 | img = images[meta['image']].value 100 | mask_miss = mask_misss[meta['image']].value 101 | #debug = json.loads(entry.attrs['meta']) 102 | meta["joints"] = self.joint_converter.get_converted_joint_list(meta['joints']) 103 | 104 | img = cv2.imdecode(img, flags=-1) 105 | mask_miss = cv2.imdecode(mask_miss, flags=-1) # TODO: Mask_Miss always available? 106 | 107 | return img, mask_miss, meta 108 | -------------------------------------------------------------------------------- /datasets/coco_to_hdf5_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Base src code: https://github.com/anatolix/keras_Realtime_Multi-Person_Pose_Estimation 5 | """ 6 | 7 | from pycocotools.coco import COCO 8 | from scipy.spatial.distance import cdist 9 | import numpy as np 10 | import cv2 11 | import os 12 | import os.path 13 | import h5py 14 | import json 15 | 16 | from config import cfg 17 | 18 | 19 | def get_masks(img_dir, img_id, img_anns, coco): 20 | """ 21 | Creates image masks for people in the image. mask_all contains all masked people, mask_miss contains 22 | only the masks for people without keypoints. 23 | """ 24 | img_path = os.path.join(img_dir, "%012d.jpg" % img_id) 25 | img = cv2.imread(img_path) 26 | h, w, c = img.shape 27 | 28 | mask_all = np.zeros((h, w), dtype=np.uint8) 29 | mask_miss = np.zeros((h, w), dtype=np.uint8) 30 | 31 | flag = 0 32 | for p in img_anns: 33 | if p["iscrowd"] == 1: 34 | mask_crowd = coco.annToMask(p) 35 | temp = np.bitwise_and(mask_all, mask_crowd) 36 | mask_crowd = mask_crowd - temp 37 | flag += 1 38 | continue 39 | else: 40 | mask = coco.annToMask(p) 41 | 42 | mask_all = np.bitwise_or(mask, mask_all) 43 | if p["num_keypoints"] <= 0: 44 | mask_miss = np.bitwise_or(mask, mask_miss) 45 | 46 | if flag < 1: 47 | mask_miss = np.logical_not(mask_miss) 48 | elif flag == 1: 49 | mask_miss = np.logical_not(np.bitwise_or(mask_miss, mask_crowd)) 50 | mask_all = np.bitwise_or(mask_all, mask_crowd) 51 | else: 52 | raise Exception("crowd segments > 1") 53 | 54 | mask_miss = mask_miss.astype(np.uint8) 55 | mask_miss *= 255 56 | 57 | mask_all = mask_all.astype(np.uint8) 58 | mask_all *= 255 59 | 60 | return img, mask_miss, mask_all 61 | 62 | 63 | def get_persons(img_annotations): 64 | all_persons = [] 65 | for person in img_annotations: 66 | person_dict = dict() 67 | 68 | person_center = [person["bbox"][0] + person["bbox"][2] / 2, 69 | person["bbox"][1] + person["bbox"][3] / 2] 70 | 71 | person_dict["objpos"] = person_center 72 | person_dict["bbox"] = person["bbox"] 73 | person_dict["segment_area"] = person["area"] 74 | person_dict["num_keypoints"] = person["num_keypoints"] 75 | 76 | anno = person["keypoints"] 77 | 78 | person_dict["joint"] = np.zeros((17, 3)) 79 | for part in range(17): 80 | person_dict["joint"][part, 0] = anno[part * 3] 81 | person_dict["joint"][part, 1] = anno[part * 3 + 1] 82 | 83 | if anno[part * 3 + 2] == 2: 84 | person_dict["joint"][part, 2] = 2 85 | elif anno[part * 3 + 2] == 1: 86 | person_dict["joint"][part, 2] = 1 87 | else: 88 | person_dict["joint"][part, 2] = 0 89 | 90 | # Scale provided -> Person Height / Img Height 91 | person_dict["scale_provided"] = person["bbox"][3] / cfg.general.input_height 92 | 93 | all_persons.append(person_dict) 94 | return all_persons 95 | 96 | 97 | def get_main_persons(all_persons): 98 | """ 99 | Returns only persons which: 100 | - Have enough joints 101 | - Have a great enough segmented area 102 | - Have a high enough distance to existing persons 103 | """ 104 | prev_center = [] 105 | main_persons = [] 106 | for person in all_persons: 107 | 108 | # skip this person if parts number is too low or if 109 | # segmentation area is too small 110 | if person["num_keypoints"] < 5 or person["segment_area"] < 32 * 32: 111 | continue 112 | 113 | person_center = person["objpos"] 114 | 115 | # skip this person if the distance to exiting person is too small 116 | flag = 0 117 | for pc in prev_center: 118 | a = np.expand_dims(pc[:2], axis=0) 119 | b = np.expand_dims(person_center, axis=0) 120 | dist = cdist(a, b)[0] 121 | if dist < pc[2] * 0.3: 122 | flag = 1 123 | continue 124 | 125 | if flag == 1: 126 | continue 127 | 128 | main_persons.append(person) 129 | prev_center.append(np.append(person_center, max(person["bbox"][2], person["bbox"][3]))) 130 | return main_persons 131 | 132 | 133 | def get_annotation_template(img_id, img_index, img_rec, dataset_type): 134 | template = dict() 135 | template["dataset"] = dataset_type 136 | template["is_validation"] = img_index < cfg.dataset.val_size and 'val' in dataset_type 137 | template["img_width"] = img_rec['width'] 138 | template["img_height"] = img_rec['height'] 139 | template["image_id"] = img_id 140 | template["annolist_index"] = img_index 141 | template["img_path"] = '%012d.jpg' % img_id 142 | return template 143 | 144 | 145 | def process_image(img_id, img_index, img_rec, img_annotations, dataset_type): 146 | print("Process image ID: ", img_id) 147 | 148 | all_persons = get_persons(img_annotations) 149 | main_persons = get_main_persons(all_persons) 150 | 151 | template = get_annotation_template(img_id, img_index, img_rec, dataset_type) 152 | 153 | for main_person in main_persons: 154 | 155 | instance = template.copy() 156 | 157 | instance["objpos"] = [main_person["objpos"]] 158 | # Joint Format: For each object, ground truth keypoints have the form [x1,y1,v1,...,xk,yk,vk], where x,y are the 159 | # keypoint locations and v is a visibility flag defined as v=0: not labeled, v=1: labeled but not visible, 160 | # and v=2: labeled and visible. 161 | instance["joints"] = [main_person["joint"].tolist()] 162 | instance["scale_provided"] = [main_person["scale_provided"]] 163 | 164 | lenOthers = 0 165 | 166 | for ot, operson in enumerate(all_persons): 167 | 168 | if main_person is operson: 169 | assert not "people_index" in instance, "several main persons? couldn't be" 170 | instance["people_index"] = ot 171 | continue 172 | 173 | if operson["num_keypoints"] == 0: 174 | continue 175 | 176 | instance["joints"].append(all_persons[ot]["joint"].tolist()) 177 | instance["scale_provided"].append(all_persons[ot]["scale_provided"]) 178 | instance["objpos"].append(all_persons[ot]["objpos"]) 179 | 180 | lenOthers += 1 181 | 182 | assert "people_index" in instance, "No main person index" 183 | instance['num_other_people'] = lenOthers 184 | 185 | yield instance 186 | 187 | 188 | def write_img(grp, img_grp, data, img, mask_miss, count, image_id, mask_grp): 189 | serializable_meta = data 190 | serializable_meta['count'] = count 191 | 192 | num_other_people = data['num_other_people'] 193 | 194 | assert len(serializable_meta['joints']) == 1 + num_other_people, [len(serializable_meta['joints']), 1 + num_other_people] 195 | assert len(serializable_meta['scale_provided']) == 1 + num_other_people, [len(serializable_meta['scale_provided']), 1 + num_other_people] 196 | assert len(serializable_meta['objpos']) == 1 + num_other_people, [len(serializable_meta['objpos']), 1 + num_other_people] 197 | 198 | img_key = "%012d" % image_id 199 | if not img_key in img_grp: 200 | _, img_bin = cv2.imencode(".jpg", img) 201 | _, img_mask = cv2.imencode(".png", mask_miss) 202 | img_ds1 = img_grp.create_dataset(img_key, data=img_bin, chunks=None) 203 | img_ds2 = mask_grp.create_dataset(img_key, data=img_mask, chunks=None) 204 | 205 | key = '%07d' % count 206 | required = {'image': img_key, 'joints': serializable_meta['joints'], 'objpos': serializable_meta['objpos'], 207 | 'scale_provided': serializable_meta['scale_provided']} 208 | ds = grp.create_dataset(key, data=json.dumps(required), chunks=None) 209 | ds.attrs['meta'] = json.dumps(serializable_meta) 210 | 211 | print('Writing sample %d' % count) 212 | 213 | 214 | def process(): 215 | datasets = [ 216 | { 217 | 'annotation_dir': cfg.dataset.val_annotation_dir, 218 | 'image_dir': cfg.dataset.val_img_dir, 219 | 'type': 'coco_val', 220 | }, 221 | { 222 | 'annotation_dir': cfg.dataset.train_annotation_dir, 223 | 'image_dir': cfg.dataset.train_img_dir, 224 | 'type': 'coco', 225 | }, 226 | ] 227 | 228 | train_h5 = h5py.File(cfg.dataset.train_convert_hdf5, 'w') 229 | train_group = train_h5.create_group("dataset") 230 | train_write_count = 0 231 | train_grp_img = train_h5.create_group("images") 232 | train_grp_miss_mask = train_h5.create_group("miss_masks") 233 | 234 | val_h5 = h5py.File(cfg.dataset.val_convert_hdf5, 'w') 235 | val_grp = val_h5.create_group("dataset") 236 | val_write_count = 0 237 | val_grp_img = val_h5.create_group("images") 238 | val_grp_miss_mask = val_h5.create_group("miss_masks") 239 | 240 | for ds in datasets: 241 | coco = COCO(ds['annotation_dir']) 242 | ids = list(coco.imgs.keys()) 243 | 244 | for img_index, img_id in enumerate(ids): 245 | ann_ids = coco.getAnnIds(imgIds=img_id) 246 | img_anns = coco.loadAnns(ann_ids) 247 | img_rec = coco.imgs[img_id] 248 | 249 | img = None 250 | mask_miss = None 251 | cached_img_id = None 252 | 253 | for data in process_image(img_id, img_index, img_rec, img_anns, ds['type']): 254 | 255 | if cached_img_id != data['image_id']: 256 | assert img_id == data['image_id'] 257 | cached_img_id = data['image_id'] 258 | img, mask_miss, mask_all = get_masks(ds['image_dir'], cached_img_id, img_anns, coco) 259 | 260 | if data['is_validation']: 261 | write_img(val_grp, val_grp_img, data, img, mask_miss, val_write_count, cached_img_id, val_grp_miss_mask) 262 | val_write_count += 1 263 | else: 264 | write_img(train_group, train_grp_img, data, img, mask_miss, train_write_count, cached_img_id, train_grp_miss_mask) 265 | train_write_count += 1 266 | 267 | train_h5.close() 268 | val_h5.close() 269 | 270 | 271 | if __name__ == '__main__': 272 | process() 273 | -------------------------------------------------------------------------------- /datasets/dataset_base.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset 2 | 3 | 4 | class DatasetBase(Dataset): 5 | def __len__(self): 6 | return NotImplementedError 7 | 8 | def __getitem__(self, index): 9 | return NotImplementedError 10 | 11 | def get_dataset_id_from_index(self, index): 12 | return NotImplementedError -------------------------------------------------------------------------------- /datasets/lsp_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import numpy as np 5 | import torch 6 | from scipy.io import loadmat 7 | 8 | from config import cfg 9 | from datasets.Augmenter import AugmentSelection, transform 10 | from datasets.dataset_base import DatasetBase 11 | from models.model_base import NetworkModelBase 12 | from skeletons.gt_generators.gt_generator_base import GroundTruthGeneratorBase 13 | from skeletons.joint_converteres.joint_converter_lsp_rtpose2d import JointConverterLspRtPose2D 14 | from skeletons.skeleton_config_base import SkeletonConfigBase 15 | from util_img import normalize 16 | 17 | 18 | class LspDataset(DatasetBase): 19 | # Format: x, y, visible, visible->0 == visible, 1 == invisible 20 | def __init__(self, lsp_dataset_path, skeleton_config: SkeletonConfigBase, gt_generator: GroundTruthGeneratorBase, 21 | model: NetworkModelBase, num_samples=None, augment=False): 22 | self.path = lsp_dataset_path 23 | self.skeleton_config = skeleton_config 24 | self.augment = augment 25 | self.model = model 26 | self.gt_generator = gt_generator 27 | self.image_folder = os.path.join(self.path, "images") 28 | self.joint_converter = JointConverterLspRtPose2D() 29 | 30 | joints = loadmat(os.path.join(self.path, "joints.mat")) 31 | joints = joints['joints'].transpose(2, 1, 0) 32 | 33 | keys = list(range(0, joints.shape[0])) 34 | if num_samples: 35 | keys = np.random.choice(keys, num_samples, replace=False).tolist() 36 | self.items = [] 37 | for key in keys: 38 | self.items.append({"lsp_id": key + 1, "joints": joints[key, :, :]}) 39 | 40 | def __getitem__(self, index): 41 | item = self.items[index] 42 | img_path = os.path.join(self.image_folder, "im{0:04d}.jpg".format(item["lsp_id"])) 43 | image = cv2.imread(img_path) 44 | meta = {"scale_provided": [150 / cfg.general.input_height], 45 | "joints": self.joint_converter.get_converted_joint_list(item["joints"])} 46 | rhip_pos = meta["joints"][0, 8, 0:2] 47 | lhip_pos = meta["joints"][0, 11, 0:2] 48 | meta["objpos"] = [[(rhip_pos[0] + lhip_pos[0]) / 2, (rhip_pos[1] + lhip_pos[1]) / 2]] 49 | mask_misss = np.ones((image.shape[0], image.shape[1])) * 255 50 | image, mask_misss, meta, labels = self.transform_data(image, mask_misss, meta) 51 | image = self.get_img_as_tensor(image) 52 | limb_map_masks = self.get_mask_as_tensor(np.repeat(mask_misss[:, :, np.newaxis], self.model.num_limb_maps, axis=2)) 53 | joint_map_masks = self.get_mask_as_tensor(np.repeat(mask_misss[:, :, np.newaxis], self.model.num_joint_maps, axis=2)) 54 | 55 | limb_maps = torch.from_numpy(labels[:self.model.num_limb_maps, :, :]).float() 56 | joint_maps = torch.from_numpy(labels[self.model.num_limb_maps:, :, :]).float() 57 | return {'image': image, 'joint_map_gt': joint_maps, 'limb_map_gt': limb_maps, 58 | 'limb_map_masks': limb_map_masks, 'joint_map_masks': joint_map_masks} 59 | 60 | def __len__(self): 61 | return len(self.items) 62 | 63 | def get_dataset_id_from_index(self, index): 64 | item = self.items[index] 65 | return int(item["lsp_id"]) 66 | 67 | def get_img_as_tensor(self, img): 68 | image = np.transpose(img, (2, 0, 1)) # transpose to channels, height, width 69 | image = normalize(image) 70 | return torch.from_numpy(image).float() 71 | 72 | def get_mask_as_tensor(self, img): 73 | image = np.transpose(img, (2, 0, 1)) # transpose to channels, height, width 74 | return torch.from_numpy(image).float() 75 | 76 | def transform_data(self, img, mask, meta): 77 | aug = AugmentSelection.random() if self.augment else AugmentSelection.unrandom() 78 | img, mask, meta = transform(img, mask, meta, self.model.mask_shape, self.skeleton_config, aug=aug) 79 | labels = self.gt_generator.get_ground_truth(meta['joints'], mask) 80 | 81 | return img, mask, meta, labels 82 | -------------------------------------------------------------------------------- /evaluations/eval_coco_openpose.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import cv2 5 | 6 | from network import get_network_results 7 | from utils import util 8 | from models.model_openpose import OpenPoseModelHandler 9 | from skeletons.skeleton_config_openpose import SkeletonConfigOpenPose 10 | from utils.util_eval import get_coco_joints_for_evaluation, get_coco_result_json, evaluate, get_result_json 11 | from utils.util_predict import get_human_data 12 | 13 | 14 | @util.measure_time 15 | def get_result_json(model, img_path, image_id): 16 | original_img = cv2.imread(img_path) 17 | img_result = get_network_results(model, original_img) 18 | joint_positions, limbs, humans = get_human_data(img_result, original_img, skeleton_config) 19 | eval_humans = get_coco_joints_for_evaluation(humans, skeleton_config) 20 | json_data = get_coco_result_json(image_id, eval_humans) 21 | return json_data 22 | 23 | 24 | if __name__ == "__main__": 25 | skeleton_config = SkeletonConfigOpenPose() 26 | val_img_folder = "/media/USERNAME/Data/Datasets/COCO/val2017" 27 | jsons = [] 28 | count = 0 29 | files = os.listdir(val_img_folder) 30 | 31 | skeleton_config = SkeletonConfigOpenPose() 32 | model = OpenPoseModelHandler().get_model() 33 | OpenPoseModelHandler().load_state_dict(model) 34 | img_ids = [] 35 | for file in files: 36 | if file.endswith(".jpg"): 37 | img_id = int(os.path.splitext(file)[0]) 38 | img_ids.append(img_id) 39 | json_data = get_result_json(model, os.path.join(val_img_folder, file), img_id) 40 | jsons.extend(json_data) 41 | print("{}/{} ({}) (found: {})".format(count, len(files), file, len(json_data))) 42 | count += 1 43 | # if count > 25: 44 | # break 45 | with open('/media/USERNAME/Data/Dump/test_exports/big_sim_c13.json', 'w') as outfile: 46 | json.dump(jsons, outfile) 47 | evaluate("/media/USERNAME/Data/Dump/test_exports/big_sim_c13.json") -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noboevbo/openpose-pytorch/4bc9cf4c927fdb507d89198724a237800cad9b3e/models/__init__.py -------------------------------------------------------------------------------- /models/model_base.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from config import cfg 4 | 5 | 6 | class NetworkModelBase(nn.Module): 7 | @property 8 | def num_joint_maps(self) -> int: 9 | raise NotImplementedError 10 | 11 | @property 12 | def num_limb_maps(self) -> int: 13 | raise NotImplementedError 14 | 15 | @property 16 | def num_layers(self) -> int: 17 | return self.num_joint_maps + self.num_limb_maps 18 | 19 | @property 20 | def limb_maps_start(self) -> int: 21 | return 0 22 | 23 | @property 24 | def joint_maps_start(self) -> int: 25 | return self.num_limb_maps 26 | 27 | @property 28 | def limb_map_bg(self) -> int: 29 | return self.num_limb_maps - 1 30 | 31 | @property 32 | def joint_map_bg(self) -> int: 33 | return self.num_layers - 1 34 | 35 | @property 36 | def parts_shape(self) -> (int, int, int): 37 | return self.num_layers, cfg.general.input_height // cfg.general.stride, cfg.general.input_width // cfg.general.stride 38 | 39 | @property 40 | def mask_shape(self) -> (int, int): 41 | return cfg.general.input_height // cfg.general.stride, cfg.general.input_width // cfg.general.stride # 46, 46 42 | 43 | @property 44 | def data_shape(self) -> (int, int, int): 45 | return 3, cfg.general.input_height, cfg.general.input_width 46 | -------------------------------------------------------------------------------- /models/model_handler_base.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | from torch import nn 5 | 6 | from utils import util 7 | from config import cfg 8 | from models.model_base import NetworkModelBase 9 | 10 | 11 | def load_parameters_by_layer_structure(network: nn.Module, model_state_dict: OrderedDict): 12 | pretrained_model_keys = list(model_state_dict.keys()) 13 | net_keys = list(network.state_dict().keys()) 14 | weights_load = {} 15 | for i in range(len(net_keys)): 16 | weights_load[net_keys[i]] = model_state_dict[pretrained_model_keys[i]] 17 | state = network.state_dict() 18 | state.update(weights_load) 19 | network.load_state_dict(state) 20 | return network 21 | 22 | 23 | class NetworkModelHandlerBase(object): 24 | @util.measure_time 25 | def get_model(self) -> NetworkModelBase: 26 | raise NotImplementedError 27 | 28 | @util.measure_time 29 | def get_train_model(self) -> NetworkModelBase: 30 | raise NotImplementedError 31 | 32 | def load_state_dict(self, network: nn.Module, state_dict_path: str = cfg.network.model_state_file): 33 | state_dict = torch.load(state_dict_path) 34 | network.load_state_dict(state_dict) 35 | 36 | def load_pretrained_feature_extractor_parameters(self, network:nn.Module): 37 | raise NotImplementedError 38 | 39 | def load_pretrained_stage1_parameters(self, network: nn.Module): 40 | raise NotImplementedError 41 | -------------------------------------------------------------------------------- /models/model_openpose.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import OrderedDict 3 | 4 | import torch.nn.functional as F 5 | from torch import nn, cat 6 | from torchvision import models 7 | 8 | from utils import util 9 | 10 | from config import cfg 11 | from models.model_handler_base import NetworkModelHandlerBase, NetworkModelBase 12 | 13 | 14 | class OpenPoseModelHandler(NetworkModelHandlerBase): 15 | @util.measure_time 16 | def get_model(self): 17 | network = OpenPoseModel() 18 | if cfg.network.use_gpu == 1: 19 | network.float() 20 | network.cuda() 21 | return network 22 | 23 | @util.measure_time 24 | def get_train_model(self): 25 | network = OpenPoseTrainModel() 26 | if cfg.network.use_gpu == 1: 27 | network.float() 28 | network.cuda() 29 | return network 30 | 31 | def load_pretrained_feature_extractor_parameters(self, network: nn.Module): 32 | vgg19 = models.vgg19(pretrained=True) 33 | vgg19_state_dict = vgg19.state_dict() 34 | vgg19_keys = list(vgg19_state_dict.keys()) 35 | net_keys = list(network.state_dict().keys()) 36 | weights_load = {} 37 | for i in range(20): 38 | weights_load[net_keys[i]] = vgg19_state_dict[vgg19_keys[i]] 39 | state = network.state_dict() 40 | state.update(weights_load) 41 | network.load_state_dict(state) 42 | return network 43 | 44 | def load_pretrained_stage1_parameters(self, network: nn.Module): 45 | # todo 46 | # openpose_state_dict = openpose.state_dict() 47 | # openpose_keys = list(openpose_state_dict.keys()) 48 | # net_keys = list(network.state_dict().keys()) 49 | # weights_load = {} 50 | # for i in range(20): 51 | # weights_load[net_keys[i]] = openpose_state_dict[openpose_keys[i]] 52 | # state = network.state_dict() 53 | # state.update(weights_load) 54 | # network.load_state_dict(state) 55 | # return network 56 | raise NotImplementedError 57 | 58 | 59 | class OpenPoseModel(NetworkModelBase): 60 | num_joint_maps = 19 61 | num_limb_maps = 38 62 | num_limb_in = 128 + num_limb_maps 63 | num_limb_joint_in = 128 + num_limb_maps + num_joint_maps 64 | limb_paf_mapping = [[12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], 65 | [4, 5], [6, 7], [8, 9], [10, 11], [28, 29], [30, 31], [34, 35], 66 | [32, 33], [36, 37], [18, 19], [26, 27]] 67 | 68 | def __init__(self): 69 | super(OpenPoseModel, self).__init__() 70 | # Feature Extractor 71 | self.conv1_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=1) 72 | self.conv1_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=1) 73 | self.pool1_stage1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0) 74 | self.conv2_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1) 75 | self.conv2_2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1) 76 | self.pool2_stage1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0) 77 | self.conv3_1 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1) 78 | self.conv3_2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1) 79 | self.conv3_3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1) 80 | self.conv3_4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1) 81 | self.pool3_stage1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0) 82 | self.conv4_1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1) 83 | self.conv4_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1) 84 | self.conv4_3_CPM = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=(3, 3), stride=(1, 1), 85 | padding=1) 86 | self.conv4_4_CPM = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1) 87 | 88 | # Stage 1 89 | 90 | self.stage1_1_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), 91 | padding=1) 92 | self.stage1_2_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), 93 | padding=1) 94 | self.stage1_3_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), 95 | padding=1) 96 | self.stage1_4_limb_maps = nn.Conv2d(in_channels=128, out_channels=512, kernel_size=(1, 1), stride=(1, 1), 97 | padding=0) 98 | self.stage1_5_limb_maps = nn.Conv2d(in_channels=512, out_channels=self.num_limb_maps, kernel_size=(1, 1), stride=(1, 1), 99 | padding=0) 100 | 101 | self.stage1_1_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), 102 | padding=1) 103 | self.stage1_2_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), 104 | padding=1) 105 | self.stage1_3_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), 106 | padding=1) 107 | self.stage1_4_joint_maps = nn.Conv2d(in_channels=128, out_channels=512, kernel_size=(1, 1), stride=(1, 1), 108 | padding=0) 109 | self.stage1_5_joint_maps = nn.Conv2d(in_channels=512, out_channels=self.num_joint_maps, kernel_size=(1, 1), stride=(1, 1), 110 | padding=0) 111 | 112 | # Stage 2 113 | 114 | self.stage2_1_limb_maps = nn.Conv2d(in_channels=self.num_limb_joint_in, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 115 | self.stage2_2_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 116 | self.stage2_3_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 117 | self.stage2_4_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 118 | self.stage2_5_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 119 | self.stage2_6_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(1, 1), stride=1, padding=0) 120 | self.stage2_7_limb_maps = nn.Conv2d(in_channels=128, out_channels=self.num_limb_maps, kernel_size=(1, 1), stride=1, padding=0) 121 | 122 | self.stage2_1_joint_maps = nn.Conv2d(in_channels=self.num_limb_joint_in, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 123 | self.stage2_2_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 124 | self.stage2_3_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 125 | self.stage2_4_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 126 | self.stage2_5_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 127 | self.stage2_6_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(1, 1), stride=1, padding=0) 128 | self.stage2_7_joint_maps = nn.Conv2d(in_channels=128, out_channels=self.num_joint_maps, kernel_size=(1, 1), stride=1, padding=0) 129 | 130 | # Stage 3 131 | 132 | self.stage3_1_limb_maps = nn.Conv2d(in_channels=self.num_limb_joint_in, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 133 | self.stage3_2_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 134 | self.stage3_3_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 135 | self.stage3_4_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 136 | self.stage3_5_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 137 | self.stage3_6_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(1, 1), stride=1, padding=0) 138 | self.stage3_7_limb_maps = nn.Conv2d(in_channels=128, out_channels=self.num_limb_maps, kernel_size=(1, 1), stride=1, padding=0) 139 | 140 | self.stage3_1_joint_maps = nn.Conv2d(in_channels=self.num_limb_joint_in, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 141 | self.stage3_2_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 142 | self.stage3_3_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 143 | self.stage3_4_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 144 | self.stage3_5_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 145 | self.stage3_6_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(1, 1), stride=1, padding=0) 146 | self.stage3_7_joint_maps = nn.Conv2d(in_channels=128, out_channels=self.num_joint_maps, kernel_size=(1, 1), stride=1, padding=0) 147 | 148 | # Stage 4 149 | 150 | self.stage4_1_limb_maps = nn.Conv2d(in_channels=self.num_limb_joint_in, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 151 | self.stage4_2_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 152 | self.stage4_3_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 153 | self.stage4_4_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 154 | self.stage4_5_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 155 | self.stage4_6_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(1, 1), stride=1, padding=0) 156 | self.stage4_7_limb_maps = nn.Conv2d(in_channels=128, out_channels=self.num_limb_maps, kernel_size=(1, 1), stride=1, padding=0) 157 | 158 | self.stage4_1_joint_maps = nn.Conv2d(in_channels=self.num_limb_joint_in, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 159 | self.stage4_2_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 160 | self.stage4_3_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 161 | self.stage4_4_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 162 | self.stage4_5_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 163 | self.stage4_6_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(1, 1), stride=1, padding=0) 164 | self.stage4_7_joint_maps = nn.Conv2d(in_channels=128, out_channels=self.num_joint_maps, kernel_size=(1, 1), stride=1, padding=0) 165 | 166 | # Stage 5 167 | 168 | self.stage5_1_limb_maps = nn.Conv2d(in_channels=self.num_limb_joint_in, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 169 | self.stage5_2_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 170 | self.stage5_3_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 171 | self.stage5_4_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 172 | self.stage5_5_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 173 | self.stage5_6_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(1, 1), stride=1, padding=0) 174 | self.stage5_7_limb_maps = nn.Conv2d(in_channels=128, out_channels=self.num_limb_maps, kernel_size=(1, 1), stride=1, padding=0) 175 | 176 | self.stage5_1_joint_maps = nn.Conv2d(in_channels=self.num_limb_joint_in, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 177 | self.stage5_2_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 178 | self.stage5_3_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 179 | self.stage5_4_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 180 | self.stage5_5_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 181 | self.stage5_6_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(1, 1), stride=1, padding=0) 182 | self.stage5_7_joint_maps = nn.Conv2d(in_channels=128, out_channels=self.num_joint_maps, kernel_size=(1, 1), stride=1, padding=0) 183 | 184 | # Stage 6 185 | 186 | self.stage6_1_limb_maps = nn.Conv2d(in_channels=self.num_limb_joint_in, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 187 | self.stage6_2_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 188 | self.stage6_3_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 189 | self.stage6_4_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 190 | self.stage6_5_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 191 | self.stage6_6_limb_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(1, 1), stride=1, padding=0) 192 | self.stage6_7_limb_maps = nn.Conv2d(in_channels=128, out_channels=self.num_limb_maps, kernel_size=(1, 1), stride=1, padding=0) 193 | 194 | self.stage6_1_joint_maps = nn.Conv2d(in_channels=self.num_limb_joint_in, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 195 | self.stage6_2_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 196 | self.stage6_3_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 197 | self.stage6_4_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 198 | self.stage6_5_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(7, 7), stride=1, padding=3) 199 | self.stage6_6_joint_maps = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(1, 1), stride=1, padding=0) 200 | self.stage6_7_joint_maps = nn.Conv2d(in_channels=128, out_channels=self.num_joint_maps, kernel_size=(1, 1), stride=1, padding=0) 201 | 202 | self.out_feature = None 203 | 204 | self.out_stage1_limb_maps = None 205 | self.out_stage1_joint_maps = None 206 | 207 | self.out_stage2_limb_maps = None 208 | self.out_stage2_joint_maps = None 209 | 210 | self.out_stage3_limb_maps = None 211 | self.out_stage3_joint_maps = None 212 | 213 | self.out_stage4_limb_maps = None 214 | self.out_stage4_joint_maps = None 215 | 216 | self.out_stage5_limb_maps = None 217 | self.out_stage5_joint_maps = None 218 | 219 | self.out_stage6_limb_maps = None 220 | self.out_stage6_joint_maps = None 221 | 222 | def forward_feature_extraction(self, x): 223 | out = F.relu(self.conv1_1(x), inplace=True) 224 | out = F.relu(self.conv1_2(out), inplace=True) 225 | out = self.pool1_stage1(out) 226 | out = F.relu(self.conv2_1(out), inplace=True) 227 | out = F.relu(self.conv2_2(out), inplace=True) 228 | out = self.pool2_stage1(out) 229 | out = F.relu(self.conv3_1(out), inplace=True) 230 | out = F.relu(self.conv3_2(out), inplace=True) 231 | out = F.relu(self.conv3_3(out), inplace=True) 232 | out = F.relu(self.conv3_4(out), inplace=True) 233 | out = self.pool3_stage1(out) 234 | out = F.relu(self.conv4_1(out), inplace=True) 235 | out = F.relu(self.conv4_2(out), inplace=True) 236 | out = F.relu(self.conv4_3_CPM(out), inplace=True) 237 | return F.relu(self.conv4_4_CPM(out), inplace=True) 238 | 239 | def forward_stage_1_limb_maps(self, input_var): 240 | out = F.relu(self.stage1_1_limb_maps(input_var), inplace=True) 241 | out = F.relu(self.stage1_2_limb_maps(out), inplace=True) 242 | out = F.relu(self.stage1_3_limb_maps(out), inplace=True) 243 | out = F.relu(self.stage1_4_limb_maps(out), inplace=True) 244 | return self.stage1_5_limb_maps(out) 245 | 246 | def forward_stage_1_joint_maps(self, input_var): 247 | out = F.relu(self.stage1_1_joint_maps(input_var), inplace=True) 248 | out = F.relu(self.stage1_2_joint_maps(out), inplace=True) 249 | out = F.relu(self.stage1_3_joint_maps(out), inplace=True) 250 | out = F.relu(self.stage1_4_joint_maps(out), inplace=True) 251 | return self.stage1_5_joint_maps(out) 252 | 253 | def forward_stage2_limb_maps(self, input_var): 254 | out = F.relu(self.stage2_1_limb_maps(input_var), inplace=True) 255 | out = F.relu(self.stage2_2_limb_maps(out), inplace=True) 256 | out = F.relu(self.stage2_3_limb_maps(out), inplace=True) 257 | out = F.relu(self.stage2_4_limb_maps(out), inplace=True) 258 | out = F.relu(self.stage2_5_limb_maps(out), inplace=True) 259 | out = F.relu(self.stage2_6_limb_maps(out), inplace=True) 260 | return self.stage2_7_limb_maps(out) 261 | 262 | def forward_stage2_joint_maps(self, input_var): 263 | out = F.relu(self.stage2_1_joint_maps(input_var), inplace=True) 264 | out = F.relu(self.stage2_2_joint_maps(out), inplace=True) 265 | out = F.relu(self.stage2_3_joint_maps(out), inplace=True) 266 | out = F.relu(self.stage2_4_joint_maps(out), inplace=True) 267 | out = F.relu(self.stage2_5_joint_maps(out), inplace=True) 268 | out = F.relu(self.stage2_6_joint_maps(out), inplace=True) 269 | return self.stage2_7_joint_maps(out) 270 | 271 | def forward_stage3_limb_maps(self, input_var): 272 | out = F.relu(self.stage3_1_limb_maps(input_var), inplace=True) 273 | out = F.relu(self.stage3_2_limb_maps(out), inplace=True) 274 | out = F.relu(self.stage3_3_limb_maps(out), inplace=True) 275 | out = F.relu(self.stage3_4_limb_maps(out), inplace=True) 276 | out = F.relu(self.stage3_5_limb_maps(out), inplace=True) 277 | out = F.relu(self.stage3_6_limb_maps(out), inplace=True) 278 | return self.stage3_7_limb_maps(out) 279 | 280 | def forward_stage3_joint_maps(self, input_var): 281 | out = F.relu(self.stage3_1_joint_maps(input_var), inplace=True) 282 | out = F.relu(self.stage3_2_joint_maps(out), inplace=True) 283 | out = F.relu(self.stage3_3_joint_maps(out), inplace=True) 284 | out = F.relu(self.stage3_4_joint_maps(out), inplace=True) 285 | out = F.relu(self.stage3_5_joint_maps(out), inplace=True) 286 | out = F.relu(self.stage3_6_joint_maps(out), inplace=True) 287 | return self.stage3_7_joint_maps(out) 288 | 289 | def forward_stage4_limb_maps(self, input_var): 290 | out = F.relu(self.stage4_1_limb_maps(input_var), inplace=True) 291 | out = F.relu(self.stage4_2_limb_maps(out), inplace=True) 292 | out = F.relu(self.stage4_3_limb_maps(out), inplace=True) 293 | out = F.relu(self.stage4_4_limb_maps(out), inplace=True) 294 | out = F.relu(self.stage4_5_limb_maps(out), inplace=True) 295 | out = F.relu(self.stage4_6_limb_maps(out), inplace=True) 296 | return self.stage4_7_limb_maps(out) 297 | 298 | def forward_stage4_joint_maps(self, input_var): 299 | out = F.relu(self.stage4_1_joint_maps(input_var), inplace=True) 300 | out = F.relu(self.stage4_2_joint_maps(out), inplace=True) 301 | out = F.relu(self.stage4_3_joint_maps(out), inplace=True) 302 | out = F.relu(self.stage4_4_joint_maps(out), inplace=True) 303 | out = F.relu(self.stage4_5_joint_maps(out), inplace=True) 304 | out = F.relu(self.stage4_6_joint_maps(out), inplace=True) 305 | return self.stage4_7_joint_maps(out) 306 | 307 | def forward_stage5_limb_maps(self, input_var): 308 | out = F.relu(self.stage5_1_limb_maps(input_var), inplace=True) 309 | out = F.relu(self.stage5_2_limb_maps(out), inplace=True) 310 | out = F.relu(self.stage5_3_limb_maps(out), inplace=True) 311 | out = F.relu(self.stage5_4_limb_maps(out), inplace=True) 312 | out = F.relu(self.stage5_5_limb_maps(out), inplace=True) 313 | out = F.relu(self.stage5_6_limb_maps(out), inplace=True) 314 | return self.stage5_7_limb_maps(out) 315 | 316 | def forward_stage5_joint_maps(self, input_var): 317 | out = F.relu(self.stage5_1_joint_maps(input_var), inplace=True) 318 | out = F.relu(self.stage5_2_joint_maps(out), inplace=True) 319 | out = F.relu(self.stage5_3_joint_maps(out), inplace=True) 320 | out = F.relu(self.stage5_4_joint_maps(out), inplace=True) 321 | out = F.relu(self.stage5_5_joint_maps(out), inplace=True) 322 | out = F.relu(self.stage5_6_joint_maps(out), inplace=True) 323 | return self.stage5_7_joint_maps(out) 324 | 325 | def forward_stage6_limb_maps(self, input_var): 326 | out = F.relu(self.stage6_1_limb_maps(input_var), inplace=True) 327 | out = F.relu(self.stage6_2_limb_maps(out), inplace=True) 328 | out = F.relu(self.stage6_3_limb_maps(out), inplace=True) 329 | out = F.relu(self.stage6_4_limb_maps(out), inplace=True) 330 | out = F.relu(self.stage6_5_limb_maps(out), inplace=True) 331 | out = F.relu(self.stage6_6_limb_maps(out), inplace=True) 332 | return self.stage6_7_limb_maps(out) 333 | 334 | def forward_stage6_joint_maps(self, input_var): 335 | out = F.relu(self.stage6_1_joint_maps(input_var), inplace=True) 336 | out = F.relu(self.stage6_2_joint_maps(out), inplace=True) 337 | out = F.relu(self.stage6_3_joint_maps(out), inplace=True) 338 | out = F.relu(self.stage6_4_joint_maps(out), inplace=True) 339 | out = F.relu(self.stage6_5_joint_maps(out), inplace=True) 340 | out = F.relu(self.stage6_6_joint_maps(out), inplace=True) 341 | return self.stage6_7_joint_maps(out) 342 | 343 | @util.measure_time 344 | def forward(self, x): 345 | # Feature Extraction 346 | self.out_feature = self.forward_feature_extraction(x) 347 | start_time = time.time() 348 | 349 | # Stage 1 350 | self.out_stage1_limb_maps = self.forward_stage_1_limb_maps(self.out_feature) 351 | self.out_stage1_joint_maps = self.forward_stage_1_joint_maps(self.out_feature) 352 | 353 | # Stage 2 354 | 355 | concat_stage2 = cat([self.out_stage1_limb_maps, self.out_stage1_joint_maps, self.out_feature], 1) 356 | self.out_stage2_limb_maps = self.forward_stage2_limb_maps(concat_stage2) 357 | self.out_stage2_joint_maps = self.forward_stage2_joint_maps(concat_stage2) 358 | 359 | # Stage 3 360 | 361 | concat_stage3 = cat([self.out_stage2_limb_maps, self.out_stage2_joint_maps, self.out_feature], 1) 362 | self.out_stage3_limb_maps = self.forward_stage3_limb_maps(concat_stage3) 363 | self.out_stage3_joint_maps = self.forward_stage3_joint_maps(concat_stage3) 364 | 365 | # Stage 4 366 | 367 | concat_stage4 = cat([self.out_stage3_limb_maps, self.out_stage3_joint_maps, self.out_feature], 1) 368 | self.out_stage4_limb_maps = self.forward_stage4_limb_maps(concat_stage4) 369 | self.out_stage4_joint_maps = self.forward_stage4_joint_maps(concat_stage4) 370 | 371 | # Stage 5 372 | 373 | concat_stage5 = cat([self.out_stage4_limb_maps, self.out_stage4_joint_maps, self.out_feature], 1) 374 | self.out_stage5_limb_maps = self.forward_stage5_limb_maps(concat_stage5) 375 | self.out_stage5_joint_maps = self.forward_stage5_joint_maps(concat_stage5) 376 | 377 | # Stage 6 378 | 379 | concat_stage6 = cat([self.out_stage5_limb_maps, self.out_stage5_joint_maps, self.out_feature], 1) 380 | self.out_stage6_limb_maps = self.forward_stage6_limb_maps(concat_stage6) 381 | self.out_stage6_joint_maps = self.forward_stage6_joint_maps(concat_stage6) 382 | 383 | util.debug_additional_timer("Stages", start_time) 384 | return self.out_stage6_limb_maps, self.out_stage6_joint_maps 385 | # # self.output 386 | # concat_stage7 = cat([self.out_stage6_limb_maps, self.out_stage6_joint_maps], 1) 387 | # 388 | # return concat_stage7 389 | 390 | 391 | class OpenPoseTrainModel(OpenPoseModel): 392 | def __init__(self): 393 | super(OpenPoseTrainModel, self).__init__() 394 | 395 | def forward(self, x, joint_map_masks, limb_map_masks, epoch): 396 | """ 397 | Applies the ground truth miss masks to the output of each stage because the ground truth is also masked! 398 | """ 399 | result_dict = OrderedDict() 400 | # Feature Extraction 401 | self.out_feature = self.forward_feature_extraction(x) 402 | 403 | # Stage 1 404 | self.out_stage1_limb_maps = self.forward_stage_1_limb_maps(self.out_feature) 405 | self.out_stage1_joint_maps = self.forward_stage_1_joint_maps(self.out_feature) 406 | result_dict[1] = OrderedDict({ 407 | "limb_map": self.out_stage1_limb_maps * limb_map_masks, 408 | "joint_map": self.out_stage1_joint_maps * joint_map_masks, 409 | }) 410 | 411 | # Stage 2 412 | 413 | if epoch >= cfg.network.stage_delay_epochs[0]: 414 | concat_stage2 = cat([self.out_stage1_limb_maps, self.out_stage1_joint_maps, self.out_feature], 1) 415 | self.out_stage2_limb_maps = self.forward_stage2_limb_maps(concat_stage2) 416 | self.out_stage2_joint_maps = self.forward_stage2_joint_maps(concat_stage2) 417 | result_dict[2] = OrderedDict({ 418 | "limb_map": self.out_stage2_limb_maps * limb_map_masks, 419 | "joint_map": self.out_stage2_joint_maps * joint_map_masks, 420 | }) 421 | 422 | # Stage 3 423 | 424 | if epoch >= cfg.network.stage_delay_epochs[1]: 425 | concat_stage3 = cat([self.out_stage2_limb_maps, self.out_stage2_joint_maps, self.out_feature], 1) 426 | self.out_stage3_limb_maps = self.forward_stage3_limb_maps(concat_stage3) 427 | self.out_stage3_joint_maps = self.forward_stage3_joint_maps(concat_stage3) 428 | result_dict[3] = OrderedDict({ 429 | "limb_map": self.out_stage3_limb_maps * limb_map_masks, 430 | "joint_map": self.out_stage3_joint_maps * joint_map_masks, 431 | }) 432 | 433 | # Stage 4 434 | if epoch >= cfg.network.stage_delay_epochs[2]: 435 | concat_stage4 = cat([self.out_stage3_limb_maps, self.out_stage3_joint_maps, self.out_feature], 1) 436 | self.out_stage4_limb_maps = self.forward_stage4_limb_maps(concat_stage4) 437 | self.out_stage4_joint_maps = self.forward_stage4_joint_maps(concat_stage4) 438 | result_dict[4] = OrderedDict({ 439 | "limb_map": self.out_stage4_limb_maps * limb_map_masks, 440 | "joint_map": self.out_stage4_joint_maps * joint_map_masks, 441 | }) 442 | # Stage 5 443 | 444 | if epoch >= cfg.network.stage_delay_epochs[3]: 445 | concat_stage5 = cat([self.out_stage4_limb_maps, self.out_stage4_joint_maps, self.out_feature], 1) 446 | self.out_stage5_limb_maps = self.forward_stage5_limb_maps(concat_stage5) 447 | self.out_stage5_joint_maps = self.forward_stage5_joint_maps(concat_stage5) 448 | result_dict[5] = OrderedDict({ 449 | "limb_map": self.out_stage5_limb_maps * limb_map_masks, 450 | "joint_map": self.out_stage5_joint_maps * joint_map_masks, 451 | }) 452 | 453 | # Stage 6 454 | 455 | if epoch >= cfg.network.stage_delay_epochs[4]: 456 | concat_stage6 = cat([self.out_stage5_limb_maps, self.out_stage5_joint_maps, self.out_feature], 1) 457 | self.out_stage6_limb_maps = self.forward_stage6_limb_maps(concat_stage6) 458 | self.out_stage6_joint_maps = self.forward_stage6_joint_maps(concat_stage6) 459 | result_dict[6] = OrderedDict({ 460 | "limb_map": self.out_stage6_limb_maps * limb_map_masks, 461 | "joint_map": self.out_stage6_joint_maps * joint_map_masks, 462 | }) 463 | 464 | return result_dict 465 | -------------------------------------------------------------------------------- /network.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | from torch.autograd import Variable 5 | 6 | from utils import util 7 | from config import cfg 8 | from models.model_handler_base import NetworkModelBase 9 | from util_img import normalize 10 | 11 | 12 | @util.measure_time 13 | def get_pytorch_from_numpy(img): 14 | pytorch_img = np.transpose(img, (2, 0, 1)) # transpose to channels, height, width 15 | pytorch_img = np.expand_dims(pytorch_img, axis=0) # add dim 16 | pytorch_img = normalize(pytorch_img) 17 | pytorch_img = torch.from_numpy(pytorch_img).float() 18 | if cfg.network.use_gpu == 1: 19 | pytorch_img = pytorch_img.pin_memory().cuda() 20 | return Variable(pytorch_img) 21 | 22 | 23 | @util.measure_time 24 | def get_network_results(model: NetworkModelBase, original_img): 25 | scales = cfg.network.scale_search 26 | multiplier_width = [x * cfg.general.input_width / original_img.shape[1] for x in scales] 27 | img_result = { 28 | "original_img": original_img, 29 | "results": [] 30 | } 31 | 32 | for idx, m in enumerate(range(len(scales))): 33 | scale_x = multiplier_width[m] 34 | imageToTest = cv2.resize(original_img, (0, 0), fx=scale_x, fy=scale_x, interpolation=cv2.INTER_CUBIC) 35 | imageToTest_padded, pad = util.pad_by_stride(imageToTest, cfg.general.stride, cfg.network.pad_color) 36 | pytorch_img = get_pytorch_from_numpy(imageToTest_padded) 37 | with torch.no_grad(): 38 | limb_maps_output, joint_maps_output = model(pytorch_img) 39 | 40 | img_result["results"].append({ 41 | "img_to_test_padded": imageToTest_padded, 42 | "pad": pad, 43 | "joint_maps": joint_maps_output, 44 | "limb_maps": limb_maps_output 45 | }) 46 | 47 | return img_result 48 | -------------------------------------------------------------------------------- /predict_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | 5 | import config 6 | from network import get_network_results 7 | from utils import util 8 | from models.model_openpose import OpenPoseModelHandler 9 | from skeletons.skeleton_config_openpose import SkeletonConfigOpenPose 10 | from utils.util_predict import get_human_data 11 | from visualization import visualize_human_pose, save_human_pose_img 12 | 13 | skeleton_config = SkeletonConfigOpenPose() 14 | 15 | 16 | @util.measure_time 17 | def predict(img_path, visualize=False): 18 | model = OpenPoseModelHandler().get_model() 19 | OpenPoseModelHandler().load_state_dict(model) 20 | original_img = cv2.imread(img_path) 21 | img_result = get_network_results(model, original_img) 22 | joint_positions, limbs, humans = get_human_data(model, img_result, original_img, skeleton_config) 23 | 24 | if visualize: 25 | visualize_human_pose(original_img, joint_positions, humans, skeleton_config.limbs, skeleton_config.limb_colors) 26 | # save_human_pose_img(original_img, joint_positions, humans, skeleton_config.limbs, skeleton_config.limb_colors) 27 | 28 | 29 | if __name__ == "__main__": 30 | config.cfg = config.OpenPoseConfig() 31 | predict("/home/USERNAME/Pictures/test.jpg", True) -------------------------------------------------------------------------------- /predict_webcam.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import time 3 | from multiprocessing import set_start_method 4 | 5 | import cv2 6 | 7 | from models.model_openpose import OpenPoseModelHandler 8 | 9 | try: 10 | set_start_method('spawn') 11 | except RuntimeError: 12 | pass 13 | from utils import util 14 | from skeletons.skeleton_config_rtpose2d import SkeletonConfigRtPose2D 15 | from network import get_network_results 16 | 17 | from utils.util_predict import get_human_data 18 | from visualization import get_human_pose_image 19 | 20 | fps_time = 0 21 | 22 | 23 | @util.measure_time 24 | def predict(model, img_result, skeleton_config, input_img): 25 | global fps_time 26 | start_time = time.time() 27 | joint_positions, limbs, humans = get_human_data(model, img_result, input_img, skeleton_config) 28 | print("{}: {}".format("get poses", time.time() - start_time)) 29 | pose_img = get_human_pose_image(input_img, joint_positions, humans, SkeletonConfigRtPose2D.limbs, SkeletonConfigRtPose2D.limb_colors) 30 | cv2.putText(pose_img, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, 31 | (0, 255, 0), 2) 32 | cv2.imshow('webcam', pose_img) 33 | if cv2.waitKey(1) == 27: 34 | exit(0) 35 | fps_time = time.time() 36 | 37 | 38 | def cam_loop(pipe_parent): 39 | cap = cv2.VideoCapture(0) 40 | model = OpenPoseModelHandler().get_model() 41 | OpenPoseModelHandler().load_state_dict(model) 42 | model.eval() 43 | while True: 44 | ret_val, img = cap.read() 45 | if img is not None: 46 | img_result = get_network_results(model, img) 47 | pipe_parent.send((model, img, img_result)) 48 | 49 | 50 | def show_loop(pipe_child): 51 | cv2.namedWindow('webcam') 52 | 53 | skeleton_config = SkeletonConfigRtPose2D() 54 | 55 | while True: 56 | from_queue = pipe_child.recv() 57 | predict(from_queue[0], from_queue[2], skeleton_config, from_queue[1]) 58 | 59 | 60 | if __name__ == "__main__": 61 | pipe_parent, pipe_child = multiprocessing.Pipe() 62 | 63 | cam_process = multiprocessing.Process(target=cam_loop, args=(pipe_parent, )) 64 | cam_process.start() 65 | 66 | show_process = multiprocessing.Process(target=show_loop, args=(pipe_child, )) 67 | show_process.start() 68 | 69 | cam_process.join() 70 | show_process.join() 71 | 72 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | tensorflow 3 | torch 4 | torchvision 5 | configobj 6 | matplotlib 7 | cerberus -------------------------------------------------------------------------------- /skeletons/gt_generators/gt_generator_base.py: -------------------------------------------------------------------------------- 1 | class GroundTruthGeneratorBase: 2 | def get_ground_truth(self, joints, mask_miss): 3 | raise NotImplementedError -------------------------------------------------------------------------------- /skeletons/gt_generators/gt_generator_openpose.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from math import sqrt, isnan 4 | 5 | import numpy as np 6 | 7 | from config import cfg 8 | from models.model_openpose import OpenPoseModel 9 | from skeletons.gt_generators.gt_generator_base import GroundTruthGeneratorBase 10 | from skeletons.skeleton_config_base import SkeletonConfigBase 11 | 12 | 13 | class GroundTruthGeneratorOpenPose(GroundTruthGeneratorBase): 14 | def __init__(self, model: OpenPoseModel, skeleton_config: SkeletonConfigBase): 15 | self.double_sigma2 = 2 * cfg.train.augmentation.sigma * cfg.train.augmentation.sigma 16 | self.thre = cfg.network.paf_thre 17 | self.model = model 18 | self.skeleton_config = skeleton_config 19 | 20 | # cached common parameters which same for all iterations and all pictures 21 | 22 | stride = cfg.general.stride 23 | width = cfg.general.input_width//stride 24 | height = cfg.general.input_height//stride 25 | 26 | # this is coordinates of centers of bigger grid 27 | self.grid_x = np.arange(width)*stride + stride/2-0.5 28 | self.grid_y = np.arange(height)*stride + stride/2-0.5 29 | 30 | self.Y, self.X = np.mgrid[0:cfg.general.input_height:stride,0:cfg.general.input_width:stride] 31 | 32 | def get_ground_truth(self, joints, mask_miss): 33 | heatmaps = np.zeros(self.model.parts_shape, dtype=np.float) 34 | 35 | self.generate_joint_heatmaps(heatmaps, joints) 36 | slice_joint_maps = slice(self.model.joint_maps_start, self.model.joint_map_bg) 37 | heatmaps[self.model.joint_map_bg] = 1. - np.amax(heatmaps[slice_joint_maps, :, :], axis=0) 38 | 39 | self.generate_limb_pafs(heatmaps, joints) 40 | 41 | heatmaps *= mask_miss 42 | 43 | return heatmaps 44 | 45 | def put_gaussian_maps(self, heatmaps, layer, joints): 46 | # actually exp(a+b) = exp(a)*exp(b), lets use it calculating 2d exponent, it could just be calculated by 47 | for i in range(joints.shape[0]): 48 | 49 | exp_x = np.exp(-(self.grid_x-joints[i,0])**2/self.double_sigma2) 50 | exp_y = np.exp(-(self.grid_y-joints[i,1])**2/self.double_sigma2) 51 | 52 | exp = np.outer(exp_y, exp_x) 53 | 54 | # note this is correct way of combination - min(sum(...),1.0) as was in C++ code is incorrect 55 | # https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation/issues/118 56 | heatmaps[self.model.joint_maps_start + layer, :, :] = \ 57 | np.maximum(heatmaps[self.model.joint_maps_start + layer, :, :], exp) 58 | 59 | def generate_joint_heatmaps(self, heatmaps, joints): 60 | for i in range(len(self.skeleton_config.joints)): 61 | # 0 - invisible, 1 visible, 2 - absent 62 | joint_visibility_flags = joints[:,i,2] 63 | visible = joint_visibility_flags < 2 64 | self.put_gaussian_maps(heatmaps, i, joints[visible, i, 0:2]) 65 | 66 | def put_vector_maps(self, heatmaps, layerX, layerY, joint_from, joint_to): 67 | count = np.zeros(heatmaps.shape[1:], dtype=np.int) 68 | 69 | for i in range(joint_from.shape[0]): 70 | (x1, y1) = joint_from[i] 71 | (x2, y2) = joint_to[i] 72 | 73 | dx = x2-x1 74 | dy = y2-y1 75 | dnorm = sqrt(dx*dx + dy*dy) 76 | 77 | if dnorm==0: # we get nan here sometimes, it's kills NN 78 | # TODO: handle it better. probably we should add zero paf, centered paf, or skip this completely 79 | print("Parts are too close to each other. Length is zero. Skipping") 80 | continue 81 | 82 | dx = dx / dnorm 83 | dy = dy / dnorm 84 | 85 | assert not isnan(dx) and not isnan(dy), "dnorm is zero, wtf" 86 | 87 | min_sx, max_sx = (x1, x2) if x1 < x2 else (x2, x1) 88 | min_sy, max_sy = (y1, y2) if y1 < y2 else (y2, y1) 89 | 90 | stride = cfg.general.stride 91 | min_sx = int(round((min_sx - self.thre) / stride)) 92 | min_sy = int(round((min_sy - self.thre) / stride)) 93 | max_sx = int(round((max_sx + self.thre) / stride)) 94 | max_sy = int(round((max_sy + self.thre) / stride)) 95 | 96 | # check PAF off screen. do not really need to do it with max>grid size 97 | if max_sy < 0: 98 | continue 99 | 100 | if max_sx < 0: 101 | continue 102 | 103 | if min_sx < 0: 104 | min_sx = 0 105 | 106 | if min_sy < 0: 107 | min_sy = 0 108 | 109 | slice_x = slice(min_sx, max_sx) # + 1 this mask is not only speed up but crops paf really. This copied from original code 110 | slice_y = slice(min_sy, max_sy) # + 1 int g_y = min_y; g_y < max_y; g_y++ -- note strict < 111 | 112 | dist = distances(self.X[slice_y,slice_x], self.Y[slice_y,slice_x], x1, y1, x2, y2) 113 | dist = dist <= self.thre 114 | 115 | heatmaps[layerX, slice_y, slice_x][dist] = (dist * dx)[dist] # += dist * dx 116 | heatmaps[layerY, slice_y, slice_x][dist] = (dist * dy)[dist] # += dist * dy 117 | count[slice_y, slice_x][dist] += 1 118 | 119 | # TODO: averaging by pafs mentioned in the paper but never worked in C++ augmentation code 120 | # heatmaps[layerX, :, :][count > 0] /= count[count > 0] 121 | # heatmaps[layerY, :, :][count > 0] /= count[count > 0] 122 | 123 | def generate_limb_pafs(self, heatmaps, joints): 124 | for (i,(fr,to)) in enumerate(self.skeleton_config.limbs): # TODO Check if this works, tuple instead list 125 | visible_from = joints[:,fr,2] < 2 126 | visible_to = joints[:,to, 2] < 2 127 | visible = visible_from & visible_to 128 | 129 | # get from mapping 130 | mapping = self.model.limb_paf_mapping[i] 131 | paf_layer_x, paf_layer_y = mapping[0], mapping[1] 132 | self.put_vector_maps(heatmaps, paf_layer_x, paf_layer_y, joints[visible, fr, 0:2], joints[visible, to, 0:2]) 133 | 134 | 135 | def distances(X, Y, x1, y1, x2, y2): 136 | 137 | # classic formula is: 138 | # d = (x2-x1)*(y1-y)-(x1-x)*(y2-y1)/sqrt((x2-x1)**2 + (y2-y1)**2) 139 | 140 | xD = (x2-x1) 141 | yD = (y2-y1) 142 | norm2 = sqrt(xD**2 + yD**2) 143 | dist = xD*(y1-Y)-(x1-X)*yD 144 | dist /= norm2 145 | 146 | return np.abs(dist) 147 | -------------------------------------------------------------------------------- /skeletons/joint_converteres/joint_converter_base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class JointConverterBase: 5 | def get_converted_joint_list(self, source_joints: []) -> np.array: 6 | raise NotImplementedError -------------------------------------------------------------------------------- /skeletons/joint_converteres/joint_converter_coco_rtpose2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from skeletons.joint_converteres.joint_converter_base import JointConverterBase 4 | from skeletons.skeleton_config_rtpose2d import SkeletonConfigRtPose2D 5 | 6 | 7 | def get_internal_neck_visibility(joints, both_shoulders_known, r_shoulder, l_shoulder): 8 | return np.minimum(joints[both_shoulders_known, r_shoulder, 2], joints[both_shoulders_known, l_shoulder, 2]) 9 | 10 | 11 | def get_internal_neck_position(joints, both_shoulders_known, r_shoulder, l_shoulder): 12 | return (joints[both_shoulders_known, r_shoulder, 0:2] + joints[both_shoulders_known, l_shoulder, 0:2]) / 2 13 | 14 | 15 | class JointConverterCocoRtPose2D(JointConverterBase): 16 | joints = { 17 | 'Nose': 0, 18 | 'LEye': 1, 19 | 'REye': 2, 20 | 'LEar': 3, 21 | 'REar': 4, 22 | 'LShoulder': 5, 23 | 'RShoulder': 6, 24 | 'LElbow': 7, 25 | 'RElbow': 8, 26 | 'LWrist': 9, 27 | 'RWrist': 10, 28 | 'LHip': 11, 29 | 'RHip': 12, 30 | 'LKnee': 13, 31 | 'RKnee': 14, 32 | 'LAnkle': 15, 33 | 'RAnkle': 16 34 | } 35 | 36 | def get_converted_joint_list(self, source_joints: []) -> np.array: 37 | joints = np.array(source_joints) 38 | result = np.zeros((joints.shape[0], len(SkeletonConfigRtPose2D.joints), 3), dtype=np.float) 39 | result[:, :, 2] = 2. # 2 - absent, 1 visible, 0 - invisible 40 | 41 | for coco_joint, coco_joint_id in self.joints.items(): 42 | internal_joint_id = SkeletonConfigRtPose2D.joints[coco_joint] 43 | assert internal_joint_id != 1, "Neck shouldn't be known yet" 44 | result[:, internal_joint_id, :] = joints[:, coco_joint_id, :] 45 | 46 | neck_internal = SkeletonConfigRtPose2D.joints['Neck'] 47 | r_shoulder_coco = self.joints['RShoulder'] 48 | l_shoulder_coco = self.joints['LShoulder'] 49 | 50 | # no neck in coco database, we calculate it as average of shoulders 51 | # TODO: we use 0 - hidden, 1 visible, 2 absent - it is not coco values they processed by generate_hdf5 52 | both_shoulders_known = (joints[:, l_shoulder_coco, 2] < 2) & (joints[:, r_shoulder_coco, 2] < 2) 53 | result[both_shoulders_known, neck_internal, 0:2] = get_internal_neck_position(joints, both_shoulders_known, 54 | r_shoulder_coco, l_shoulder_coco) 55 | result[both_shoulders_known, neck_internal, 2] = get_internal_neck_visibility(joints, both_shoulders_known, 56 | r_shoulder_coco, l_shoulder_coco) 57 | 58 | return result 59 | -------------------------------------------------------------------------------- /skeletons/joint_converteres/joint_converter_lsp_rtpose2d.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import numpy as np 4 | 5 | from skeletons.joint_converteres.joint_converter_base import JointConverterBase 6 | from skeletons.skeleton_config_rtpose2d import SkeletonConfigRtPose2D 7 | 8 | 9 | class JointConverterLspRtPose2D(JointConverterBase): 10 | joints = OrderedDict([ 11 | ("Nose", None), 12 | ("Neck", 12), 13 | ("RShoulder", 8), 14 | ("RElbow", 7), 15 | ("RWrist", 6), 16 | ("LShoulder", 9), 17 | ("LElbow", 10), 18 | ("LWrist", 11), 19 | ("RHip", 2), 20 | ("RKnee", 1), 21 | ("RAnkle", 0), 22 | ("LHip", 3), 23 | ("LKnee", 4), 24 | ("LAnkle", 5), 25 | ("REye", None), 26 | ("LEye", None), 27 | ("REar", None), 28 | ("LEar", None) 29 | ]) 30 | 31 | # LSP Visibility: 0 = Visible, 1 = Invisible 32 | # Always only one annotated person, thus 1 as array rows 33 | def get_converted_joint_list(self, source_joints: []) -> np.array: 34 | joints = np.array(source_joints) 35 | result = np.zeros((1, len(SkeletonConfigRtPose2D.joints), 3), dtype=np.float) 36 | result[:, :, 2] = 2. # 2 - absent, 1 visible, 0 - invisible 37 | 38 | for rtpose2d_joint, rtpose2d_joint_id in SkeletonConfigRtPose2D.joints.items(): 39 | joint_num = self.joints[rtpose2d_joint] 40 | if joint_num: 41 | result[:, rtpose2d_joint_id, :] = np.expand_dims(joints[joint_num], axis=0) 42 | visibility_lsp = result[:, rtpose2d_joint_id, 2] 43 | result[:, rtpose2d_joint_id, 2] = 1 - visibility_lsp 44 | 45 | return result 46 | -------------------------------------------------------------------------------- /skeletons/joint_setup.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | joints_rtpose2d = OrderedDict([ 4 | ("Nose", 0), 5 | ("Neck", 1), 6 | ("RShoulder", 2), 7 | ("RElbow", 3), 8 | ("RWrist", 4), 9 | ("LShoulder", 5), 10 | ("LElbow", 6), 11 | ("LWrist", 7), 12 | ("RHip", 8), 13 | ("RKnee", 9), 14 | ("RAnkle", 10), 15 | ("LHip", 11), 16 | ("LKnee", 12), 17 | ("LAnkle", 13), 18 | ("REye", 14), 19 | ("LEye", 15), 20 | ("REar", 16), 21 | ("LEar", 17) 22 | ]) 23 | -------------------------------------------------------------------------------- /skeletons/skeleton_config_base.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | 4 | class SkeletonConfigBase: 5 | @property 6 | def joints(self) -> OrderedDict: 7 | raise NotImplementedError 8 | 9 | @property 10 | def limbs(self) -> [[int, int]]: 11 | raise NotImplementedError 12 | 13 | @property 14 | def left_parts(self) -> [str]: 15 | raise NotImplementedError 16 | 17 | @property 18 | def right_parts(self) -> [str]: 19 | raise NotImplementedError 20 | 21 | @property 22 | def important_limbs(self) -> [int]: 23 | raise NotImplementedError 24 | 25 | @property 26 | def important_joints(self) -> [int]: 27 | raise NotImplementedError 28 | 29 | def get_joint_name_by_id(self, joint_id): 30 | return list(self.joints.items())[joint_id][0] -------------------------------------------------------------------------------- /skeletons/skeleton_config_openpose.py: -------------------------------------------------------------------------------- 1 | from skeletons.skeleton_config_rtpose2d import SkeletonConfigRtPose2D 2 | 3 | 4 | class SkeletonConfigOpenPose(SkeletonConfigRtPose2D): 5 | def __init__(self): 6 | self.limb_names = [] 7 | __joint_list = list(self.joints.items()) 8 | for limb in self.limbs: 9 | self.limb_names.append("{}-{}-X".format(__joint_list[limb[0]][0], __joint_list[limb[1]][0])) 10 | self.limb_names.append("{}-{}-Y".format(__joint_list[limb[0]][0], __joint_list[limb[1]][0])) 11 | self.limb_names.append("Background-X") 12 | self.limb_names.append("Background-Y") 13 | -------------------------------------------------------------------------------- /skeletons/skeleton_config_rtpose2d.py: -------------------------------------------------------------------------------- 1 | from skeletons.joint_setup import joints_rtpose2d 2 | from skeletons.skeleton_config_base import SkeletonConfigBase 3 | from collections import defaultdict 4 | 5 | 6 | def ltr_parts(parts_dict): 7 | # when we flip image left parts became right parts and vice versa. This is the list of parts to exchange each other. 8 | left_parts = [parts_dict[p] for p in 9 | ["LShoulder", "LElbow", "LWrist", "LHip", "LKnee", "LAnkle", "LEye", "LEar"]] 10 | right_parts = [parts_dict[p] for p in 11 | ["RShoulder", "RElbow", "RWrist", "RHip", "RKnee", "RAnkle", "REye", "REar"]] 12 | return left_parts, right_parts 13 | 14 | 15 | class SkeletonGraphRtPose2D: 16 | def __init__(self): 17 | self.outgoing_limbs = defaultdict(list) 18 | self.incoming_limbs = defaultdict(list) 19 | self.outgoing_joints = defaultdict(list) 20 | self.incoming_joints = defaultdict(list) 21 | 22 | self.edges = defaultdict(list) 23 | self.weights = {} 24 | self.joint_instances = defaultdict(list) 25 | 26 | def add_instances(self, limbs): 27 | for limb in limbs: 28 | joint_a = limb["joint_a"] 29 | joint_a = (joint_a["joint_nr"], joint_a["x"], joint_a["y"]) 30 | joint_b = limb["joint_b"] 31 | joint_b = (joint_b["joint_nr"], joint_b["x"], joint_b["y"]) 32 | self.edges[joint_a].append(joint_b) 33 | self.weights[(joint_a, joint_b)] = limb["matched_score"] 34 | if joint_a not in self.joint_instances[joint_a[0]]: 35 | self.joint_instances[joint_a[0]].append(joint_a) 36 | if joint_b not in self.joint_instances[joint_b[0]]: 37 | self.joint_instances[joint_b[0]].append(joint_b) 38 | 39 | def neighbors(self, id): 40 | return self.edges[id] 41 | 42 | def cost(self, from_node, to_node): 43 | return self.weights[(from_node, to_node)] 44 | 45 | 46 | class SkeletonConfigRtPose2D(SkeletonConfigBase): 47 | important_joints = [] 48 | important_limbs = [6, 7, 8, 9, 10, 11] # Right and Left Neck -> Ankle Connection 49 | 50 | joints = joints_rtpose2d 51 | 52 | limbs = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], 53 | [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], 54 | [0, 15], [15, 17], [2, 16], [5, 17]] 55 | 56 | joint_is_source_in_limb = defaultdict(list) 57 | joint_is_target_in_limb = defaultdict(list) 58 | for joint_name, joint_num in joints.items(): 59 | for limb_nr, limb in enumerate(limbs): 60 | if limb[0] == joint_num: 61 | joint_is_source_in_limb[joint_num].append(limb_nr) 62 | if limb[1] == joint_num: 63 | joint_is_target_in_limb[joint_num].append(limb_nr) 64 | 65 | graph = SkeletonGraphRtPose2D() 66 | for joint_name, joint_num in joints.items(): 67 | for limb_nr, limb in enumerate(limbs): 68 | if limb[0] == joint_num: 69 | graph.outgoing_limbs[joint_name].append(limb) 70 | graph.outgoing_joints[joint_name].append(list(joints.items())[limb[1]]) 71 | if limb[1] == joint_num: 72 | graph.incoming_limbs[joint_name].append(limb) 73 | graph.incoming_joints[joint_name].append(list(joints.items())[limb[0]]) 74 | a = 1 75 | 76 | limb_names = [] 77 | __joint_list = list(joints.items()) 78 | for limb in limbs: 79 | limb_names.append("{}-{}".format(__joint_list[limb[0]][0], __joint_list[limb[1]][0])) 80 | limb_names.append("Background") 81 | 82 | limb_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], 83 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], 84 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [0, 0, 0]] 85 | 86 | left_parts, right_parts = ltr_parts(joints) -------------------------------------------------------------------------------- /training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noboevbo/openpose-pytorch/4bc9cf4c927fdb507d89198724a237800cad9b3e/training/__init__.py -------------------------------------------------------------------------------- /training/loss_functions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | 4 | 5 | class L2Loss(torch.nn.Module): 6 | def __init__(self, batch_size): 7 | super(L2Loss, self).__init__() 8 | self.batch_size = batch_size 9 | 10 | def forward(self, x: Variable, y: Variable, weights: Variable = None): 11 | if weights is not None: 12 | val = (x-y) * weights[:x.data.shape[0], :, :, :] # Slice by shape[n,..] for batch size (last batch < batch_size) 13 | else: 14 | val = x-y 15 | l = torch.sum(val ** 2) / self.batch_size / 2 16 | return l -------------------------------------------------------------------------------- /training/tensorboard_logger.py: -------------------------------------------------------------------------------- 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 2 | from collections import OrderedDict 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | import cv2 7 | import torch 8 | 9 | from util_img import get_img_from_network_output 10 | from visualization import get_network_train_output 11 | 12 | try: 13 | from StringIO import StringIO # Python 2.7 14 | except ImportError: 15 | from io import BytesIO # Python 3.x 16 | 17 | 18 | class TensorboardLogger(object): 19 | def __init__(self, log_dir): 20 | """Create a summary writer logging to log_dir.""" 21 | self.writer = tf.summary.FileWriter(log_dir) 22 | 23 | def scalar_summary(self, tag, value, step): 24 | """Log a scalar variable.""" 25 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 26 | self.writer.add_summary(summary, step) 27 | 28 | def image_summary(self, tag, image_dict, step): 29 | """Log a list of images.""" 30 | 31 | img_summaries = [] 32 | for i, (key, img) in enumerate(image_dict.items()): 33 | # Write the image to a string 34 | s = cv2.imencode(".png", img)[1].tostring() 35 | 36 | # Create an Image object 37 | img_sum = tf.Summary.Image(encoded_image_string=s, 38 | height=img.shape[0], 39 | width=img.shape[1]) 40 | # Create a Summary value 41 | img_summaries.append(tf.Summary.Value(tag='%s_%s' % (tag, key), image=img_sum)) 42 | 43 | # Create and write Summary 44 | summary = tf.Summary(value=img_summaries) 45 | self.writer.add_summary(summary, step) 46 | 47 | 48 | def histo_summary(self, tag, values, step, bins=1000): 49 | """Log a histogram of the tensor of values.""" 50 | 51 | # Create a histogram using numpy 52 | counts, bin_edges = np.histogram(values, bins=bins) 53 | 54 | # Fill the fields of the histogram proto 55 | hist = tf.HistogramProto() 56 | hist.min = float(np.min(values)) 57 | hist.max = float(np.max(values)) 58 | hist.num = int(np.prod(values.shape)) 59 | hist.sum = float(np.sum(values)) 60 | hist.sum_squares = float(np.sum(values ** 2)) 61 | 62 | # Drop the start of the first bin 63 | bin_edges = bin_edges[1:] 64 | 65 | # Add bin edges and counts 66 | for edge in bin_edges: 67 | hist.bucket_limit.append(edge) 68 | for c in counts: 69 | hist.bucket.append(c) 70 | 71 | # Create and write Summary 72 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 73 | self.writer.add_summary(summary, step) 74 | self.writer.flush() 75 | 76 | 77 | def log_tensorboard_train_details(logger: TensorboardLogger, iteration_total, outputs: OrderedDict, losses, total_loss): 78 | info = {'1_loss_total': total_loss.data[0]} 79 | count = 0 80 | for stage_nr, stage_layers in outputs.items(): 81 | for name, _ in stage_layers.items(): 82 | loss_name = "loss_s{}_{}".format(stage_nr, name) 83 | info[loss_name] = losses[count] 84 | count += 1 85 | 86 | for tag, value in info.items(): 87 | logger.scalar_summary(tag, value, iteration_total) 88 | 89 | 90 | def log_tensorboard_net_params(logger: TensorboardLogger, iteration_total, net: torch.nn.Module): 91 | for tag, value in net.named_parameters(): 92 | tag = tag.replace('.', '/') 93 | logger.histo_summary(tag, value.data.cpu().numpy(), iteration_total) 94 | if value.grad is not None: 95 | logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), iteration_total) 96 | 97 | 98 | def log_tensorboard_map_imgs(logger: TensorboardLogger, iteration_total, img_var, joint_maps_gt_var, limb_maps_gt_var, 99 | outputs: OrderedDict): 100 | # TODO: Add a summary img (grid with img, limb maps) 101 | img_dict = {'original': get_img_from_network_output(img_var[0].data.cpu().numpy())} 102 | for stage_nr, stage_layers in outputs.items(): 103 | limb_maps = None 104 | joint_maps = None 105 | for name, value in stage_layers.items(): 106 | if name == "limb_map": 107 | limb_maps = value[0] 108 | if name == "joint_map": 109 | joint_maps = value[0] 110 | img = get_network_train_output(joint_maps_gt_var[0].data.cpu().numpy(), limb_maps_gt_var[0].data.cpu().numpy(), 111 | joint_maps.data.cpu().numpy(), limb_maps.data.cpu().numpy(), 112 | get_img_from_network_output(img_var[0].data.cpu().numpy())) 113 | img_dict['stage{}'.format(stage_nr)] = cv2.resize(img, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC) 114 | 115 | logger.image_summary('maps', img_dict, iteration_total) 116 | 117 | -------------------------------------------------------------------------------- /training/train_openpose.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DataLoader 3 | 4 | from config import cfg 5 | from datasets.coco_dataset import CocoDataset 6 | from models import model_openpose 7 | from skeletons.gt_generators.gt_generator_openpose import GroundTruthGeneratorOpenPose 8 | from skeletons.skeleton_config_openpose import SkeletonConfigOpenPose 9 | from training.train_prod import train 10 | from training.train_utils import get_losses, fix_layers_weights 11 | 12 | network_model_handler = model_openpose.OpenPoseModelHandler() 13 | network = network_model_handler.get_train_model() 14 | network_model_handler.load_state_dict(network) 15 | fix_layers_weights(network, "stage[2-6]_[1-9]_(joint|limb)_maps") 16 | skeleton_config = SkeletonConfigOpenPose() 17 | gt_generator = GroundTruthGeneratorOpenPose(network, skeleton_config) 18 | 19 | 20 | train_dataset = CocoDataset([cfg.dataset.train_hdf5], skeleton_config, gt_generator, 21 | network, augment=True) 22 | sim_dataset = CocoDataset(["/media/USERNAME/Store1/sim_train_18_04_17_ITSC.h5"], skeleton_config, gt_generator, 23 | network, augment=True) 24 | train_sets = torch.utils.data.ConcatDataset([train_dataset, sim_dataset]) 25 | train_loader = DataLoader(train_sets, cfg.train.batch_size, shuffle=True) 26 | 27 | train(network, train_loader, get_losses, fix_regex="stage[2-6]_[1-9]_(joint|limb)_maps") 28 | -------------------------------------------------------------------------------- /training/train_prod.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import matplotlib 4 | matplotlib.use('Agg') 5 | 6 | import torch 7 | import torch.optim as optim 8 | from torch.autograd import Variable 9 | from torch.backends import cudnn 10 | from torch.optim.lr_scheduler import LambdaLR 11 | from torch.utils.data import DataLoader 12 | 13 | from config import cfg 14 | from models.model_base import NetworkModelBase 15 | from models.model_handler_base import NetworkModelHandlerBase 16 | from training.loss_functions import L2Loss 17 | from training.tensorboard_logger import TensorboardLogger, log_tensorboard_train_details, log_tensorboard_net_params, \ 18 | log_tensorboard_map_imgs 19 | from training.train_utils import get_per_parameter_optimizer_settings, get_learning_rate_decay_lambdas 20 | 21 | 22 | def train(network: NetworkModelBase, data_loader_train: DataLoader, get_losses_func, loss_weights_tuple: () = None, fix_regex=None): 23 | logger = TensorboardLogger(cfg.train.log_dir) 24 | cudnn.benchmark = True 25 | 26 | lr_per_parameter = get_per_parameter_optimizer_settings(network.named_parameters(), fix_regex) 27 | 28 | optimizer = optim.SGD(lr_per_parameter, 29 | lr=cfg.train.learning_rate, 30 | momentum=cfg.train.momentum, 31 | weight_decay=cfg.train.weight_decay) 32 | 33 | scheduler = LambdaLR(optimizer, lr_lambda=get_learning_rate_decay_lambdas(len(data_loader_train))) 34 | criterion = L2Loss(cfg.train.batch_size).cuda() 35 | 36 | for epoch in range(0, cfg.train.checkpoint_epoch): 37 | scheduler.step() 38 | 39 | for epoch in range(cfg.train.checkpoint_epoch, 90): 40 | print("Begin train epoch: {}".format(epoch)) 41 | train_epoch(data_loader_train, network, criterion, optimizer, epoch, logger, get_losses_func, loss_weights_tuple) 42 | scheduler.step() 43 | save_checkpoint(network, epoch) 44 | 45 | 46 | def save_checkpoint(network, epoch): 47 | checkpoint_model_path = os.path.join(cfg.train.checkpoint_model_base_dir, 'checkpoint_{}.pth'.format(epoch)) 48 | torch.save(network.state_dict(), checkpoint_model_path) 49 | cfg.train.update_checkpoint(checkpoint_model_path, epoch, None) 50 | 51 | 52 | def train_epoch(train_loader, network, criterion, optimizer, epoch, logger, get_losses_func, loss_weights_tuple: ()): 53 | num_previous_iterations = epoch * len(train_loader) 54 | num_samples = len(train_loader) 55 | percentage_0_1 = int(num_samples * 0.001) 56 | percentage_10 = int(num_samples * 0.1) 57 | percentage_25 = int(num_samples * 0.25) 58 | for iteration, data in enumerate(train_loader): 59 | print("epoch {} [{}/{}]".format(epoch, iteration, num_samples)) 60 | image_var = Variable(data['image'].cuda()) 61 | joint_map_gt_var = Variable(data['joint_map_gt'].cuda()) 62 | limb_map_gt_var = Variable(data['limb_map_gt'].cuda()) 63 | joint_map_mask_var = Variable(data['joint_map_masks'].cuda()) 64 | limb_map_mask_var = Variable(data['limb_map_masks'].cuda()) 65 | optimizer.zero_grad() # zero the gradient buffer 66 | output = network(image_var, joint_map_mask_var, limb_map_mask_var, epoch) 67 | 68 | losses = get_losses_func(criterion, output, (joint_map_gt_var, limb_map_gt_var), loss_weights_tuple) 69 | 70 | total_loss = sum(losses) 71 | total_loss.backward() 72 | optimizer.step() 73 | 74 | if iteration % percentage_0_1 == 0: 75 | log_tensorboard_train_details(logger, num_previous_iterations + iteration, output, losses, total_loss) 76 | if iteration % percentage_10 == 0: 77 | log_tensorboard_net_params(logger, num_previous_iterations + iteration, network) 78 | if iteration % percentage_25 == 0: 79 | log_tensorboard_map_imgs(logger, num_previous_iterations + iteration, image_var, joint_map_gt_var, 80 | limb_map_gt_var, 81 | output) 82 | 83 | 84 | # Validation 85 | 86 | # def validate_network(network, val_loader, criterion, best_model_loss, train_iteration, logger: TensorboardLogger, get_losses_func): 87 | # network.eval() 88 | # iteration_losses = AverageMeter() 89 | # for iteration, data in enumerate(val_loader): 90 | # image_var = Variable(data['image'].cuda()) 91 | # joint_map_gt_var = Variable(data['joint_map_gt'].cuda()) 92 | # limb_map_gt_var = Variable(data['limb_map_gt'].cuda()) 93 | # joint_map_mask_var = Variable(data['joint_map_masks'].cuda()) 94 | # limb_map_mask_var = Variable(data['limb_map_masks'].cuda()) 95 | # 96 | # output = network(image_var, joint_map_mask_var, limb_map_mask_var) 97 | # 98 | # losses = get_losses_func(criterion, output, joint_map_gt_var, limb_map_gt_var) 99 | # 100 | # total_loss = sum(losses) 101 | # iteration_losses.update(total_loss.data[0], image_var.size(0)) 102 | # 103 | # if best_model_loss: 104 | # is_best = iteration_losses.avg < best_model_loss 105 | # best_model = min(best_model_loss, iteration_losses.avg) 106 | # else: 107 | # is_best = True 108 | # best_model = iteration_losses.avg 109 | # logger.scalar_summary('val_loss', iteration_losses.avg, train_iteration) 110 | # 111 | # network.train() 112 | # return is_best, best_model 113 | 114 | 115 | def get_network_model(network_model_handler: NetworkModelHandlerBase): 116 | network_model = network_model_handler.get_train_model() 117 | if cfg.train.checkpoint_epoch == 0: # Fresh training 118 | print("Load pretrained feature extractor weights") 119 | network_model = network_model_handler.load_pretrained_feature_extractor_parameters(network_model) 120 | else: 121 | print("Load from checkpoint: {}".format(cfg.train.checkpoint_model_path)) 122 | network_model = torch.load(cfg.train.checkpoint_model_path) 123 | return network_model 124 | -------------------------------------------------------------------------------- /training/train_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import OrderedDict 3 | 4 | import torch 5 | import math 6 | 7 | from torch.autograd import Variable 8 | from torch.utils.data import DataLoader 9 | 10 | from config import cfg 11 | from skeletons.skeleton_config_base import SkeletonConfigBase 12 | import numpy as np 13 | 14 | layer_weight_suffix = ".weight" 15 | layer_bias_suffix = ".bias" 16 | 17 | 18 | def get_parameter_groups(named_parameters, fix_regex): 19 | """ 20 | When using MultiGPU (DataParallel) the parameters are named module.[...] that 21 | :param named_parameters: 22 | :return: 23 | """ 24 | parameter_groups = {"feature_extractor_weights": [], "feature_extractor_bias": [], 25 | "stage1_weights": [], "stage1_bias": [], 26 | "stageN_weights": [], "stageN_bias": []} 27 | for parameter in named_parameters: 28 | layer_name = parameter[0] 29 | if fix_regex and not re.match(fix_regex, layer_name): 30 | continue 31 | # Parameters - Stage 1 32 | if re.match("(module.)?stage1_\d_.*", layer_name): 33 | if layer_name.endswith(layer_weight_suffix): 34 | parameter_groups["stage1_weights"].append(parameter[1]) 35 | else: 36 | parameter_groups["stage1_bias"].append(parameter[1]) 37 | # Parameters - Stage 2+ 38 | elif re.match("(module.)?stage[2-9][0-9]*_\d_.*", layer_name): 39 | if layer_name.endswith(layer_weight_suffix): 40 | parameter_groups["stageN_weights"].append(parameter[1]) 41 | else: 42 | parameter_groups["stageN_bias"].append(parameter[1]) 43 | # Parameters - Feature Extractor 44 | else: 45 | if layer_name.endswith(layer_weight_suffix): 46 | parameter_groups["feature_extractor_weights"].append(parameter[1]) 47 | else: 48 | parameter_groups["feature_extractor_bias"].append(parameter[1]) 49 | return parameter_groups 50 | 51 | 52 | def get_per_parameter_optimizer_settings(named_parameters, fix_regex=None): 53 | parameter_groups = get_parameter_groups(named_parameters, fix_regex) 54 | return [{'params': parameter_groups["feature_extractor_weights"], 'lr': cfg.train.learning_rate * 1., 'weight_decay': cfg.train.weight_decay}, 55 | {'params': parameter_groups["feature_extractor_bias"], 'lr': cfg.train.learning_rate * 2., 'weight_decay': 0}, 56 | {'params': parameter_groups["stage1_weights"], 'lr': cfg.train.learning_rate * 1., 'weight_decay': cfg.train.weight_decay}, 57 | {'params': parameter_groups["stage1_bias"], 'lr': cfg.train.learning_rate * 2., 'weight_decay': 0}, 58 | {'params': parameter_groups["stageN_weights"], 'lr': cfg.train.learning_rate * 4., 'weight_decay': cfg.train.weight_decay}, 59 | {'params': parameter_groups["stageN_bias"], 'lr': cfg.train.learning_rate * 8., 'weight_decay': 0}, 60 | ] 61 | 62 | 63 | def learning_rate_step_decay(epoch, iterations_per_epoch, base_learning_rate): 64 | steps = epoch * iterations_per_epoch * cfg.train.batch_size 65 | # TODO: Stepsize from cfg -> Calculate live 66 | lrate = base_learning_rate * math.pow(cfg.train.gamma, math.floor(steps/cfg.train.stepsize)) 67 | print("Epoch:", epoch, "Learning rate:", lrate) 68 | return lrate 69 | 70 | 71 | def get_learning_rate_decay_lambdas(num_training_samples): 72 | ipe = iterations_per_epoch = num_training_samples // cfg.train.batch_size 73 | """ 74 | Returns a learning rate decay function for each parameter group (get_per_parameter_optimizer_settings) 75 | """ 76 | return [ 77 | lambda epoch: learning_rate_step_decay(epoch, ipe, cfg.train.learning_rate * 1.), # FeatureExtrac.Weight 78 | lambda epoch: learning_rate_step_decay(epoch, ipe, cfg.train.learning_rate * 2.), # FeatureExtrac.Bias 79 | lambda epoch: learning_rate_step_decay(epoch, ipe, cfg.train.learning_rate * 1.), # Stage1.Weight 80 | lambda epoch: learning_rate_step_decay(epoch, ipe, cfg.train.learning_rate * 2.), # Stage1.Bias 81 | lambda epoch: learning_rate_step_decay(epoch, ipe, cfg.train.learning_rate * 4.), # StageN.Weight 82 | lambda epoch: learning_rate_step_decay(epoch, ipe, cfg.train.learning_rate * 8.), # StageN.Bias 83 | ] 84 | 85 | 86 | def fix_layers_weights(network: torch.nn.Module, fix_layer_regex): 87 | named_params = list(network.named_parameters()) 88 | count = len(named_params) 89 | for param in named_params: 90 | layer_name = param[0] 91 | if not re.match(fix_layer_regex, layer_name): 92 | value = param[1] 93 | value.requires_grad = False 94 | count -= 1 95 | print("Params with grad: {}".format(count)) 96 | 97 | 98 | def get_loss_weights(data_loader: DataLoader, skeleton_config: SkeletonConfigBase): 99 | sample = next(iter(data_loader)) 100 | 101 | sample_limb_gt = sample['limb_map_gt'].numpy() 102 | loss_weights_limbs = np.ones_like(sample_limb_gt) 103 | for limb in skeleton_config.important_limbs: 104 | loss_weights_limbs[:, limb, :, :] = loss_weights_limbs[:, limb, :, :] * 2 105 | 106 | sample_joint_gt = sample['joint_map_gt'].numpy() 107 | loss_weights_joints = np.ones_like(sample_joint_gt) 108 | for joint in skeleton_config.important_joints: 109 | loss_weights_joints[:, joint, :, :] = loss_weights_joints[:, joint, :, :] * 2 110 | 111 | return Variable(torch.from_numpy(loss_weights_limbs).cuda()), Variable(torch.from_numpy(loss_weights_joints).cuda()) 112 | 113 | 114 | def get_losses(criterion, output: OrderedDict, ground_truth_tuple: (), loss_weight_tuple: () = None) -> []: 115 | """ 116 | Creates a loss for each output and maps it to the corresponding joint map / limb map. 117 | Iterates in steps of two to set the criterion for the joint map / limb map for each output stage 118 | """ 119 | joint_map_gt_var = ground_truth_tuple[0] 120 | limb_map_gt_var = ground_truth_tuple[1] 121 | loss_weights_limbs, loss_weights_joints = None, None 122 | if loss_weight_tuple: 123 | loss_weights_limbs, loss_weights_joints = loss_weight_tuple 124 | losses = [] 125 | for stage, stage_layers in output.items(): 126 | for layer_name, layer_value in stage_layers.items(): 127 | gt = joint_map_gt_var if layer_name == "joint_map" else limb_map_gt_var 128 | weight = loss_weights_joints if layer_name == "joint_map" else loss_weights_limbs 129 | losses.append(criterion(layer_value, gt, weight)) 130 | return losses -------------------------------------------------------------------------------- /util_img.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | from config import cfg 5 | 6 | 7 | def normalize(img: np.array): 8 | img = img / 255.0 9 | img = img - 0.5 10 | return img 11 | 12 | 13 | def denormalize(img: np.array): 14 | img = img + 0.5 15 | img *= 255.0 16 | return img 17 | 18 | 19 | def get_img_from_network_output(img: np.ndarray) -> np.ndarray: 20 | img = denormalize(img) 21 | img = img.astype(np.uint8) 22 | return np.transpose(img, (1, 2, 0)) 23 | 24 | 25 | def get_combined_maps(maps: np.ndarray) -> np.ndarray: 26 | combined_map = np.transpose(maps, (1, 2, 0)) 27 | return np.amax(combined_map, axis=2) 28 | 29 | 30 | def get_upscaled_map(map_data: np.ndarray, original_img: np.ndarray, input_img: np.ndarray, pad: []) -> np.ndarray: 31 | tmp_map = cv2.resize(map_data, (0, 0), fx=cfg.general.stride, fy=cfg.general.stride, interpolation=cv2.INTER_CUBIC) 32 | tmp_map = tmp_map[:input_img.shape[0] - pad[2], :input_img.shape[1] - pad[3]] 33 | return cv2.resize(tmp_map, (original_img.shape[1], original_img.shape[0]), interpolation=cv2.INTER_CUBIC) 34 | 35 | 36 | def get_upscaled_maps(maps: np.ndarray, original_img: np.ndarray, input_img: np.ndarray, pad: []) -> np.ndarray: 37 | num_maps = maps.shape[0] 38 | output_maps = np.empty((num_maps, cfg.general.input_width, cfg.general.input_height)) 39 | for i in range(num_maps): 40 | output_maps[i] = get_upscaled_map(maps[i, :, :], original_img, input_img, pad) 41 | return output_maps 42 | 43 | 44 | def get_img_padded_as_box(img: np.ndarray): 45 | new_img = img.copy() 46 | if img.shape[0] == img.shape[1]: 47 | return new_img 48 | elif img.shape[0] > img.shape[1]: 49 | size_diff = img.shape[0] - img.shape[1] 50 | top = 0 51 | bottom = 0 52 | left = 0 53 | right = 0 54 | if size_diff % 2 == 0: 55 | left = right = int(size_diff / 2) 56 | else: 57 | left = int(size_diff / 2) 58 | right = size_diff - left 59 | return cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,value=[255, 255, 255]) 60 | else: 61 | size_diff = img.shape[1] - img.shape[0] 62 | top = 0 63 | bottom = 0 64 | left = 0 65 | right = 0 66 | if size_diff % 2 == 0: 67 | top = bottom = int(size_diff / 2) 68 | else: 69 | top = int(size_diff / 2) 70 | bottom = size_diff - top 71 | return cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,value=[255, 255, 255]) -------------------------------------------------------------------------------- /utils/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import cv2 4 | 5 | from config import cfg 6 | import concurrent.futures 7 | 8 | 9 | def get_pad_column(img, column='left', pad_color=None): 10 | if column == 'left': 11 | column_val = img[:,:1:] 12 | elif column == 'right': 13 | column_val = img[:,-1:,:] 14 | elif column == 'top': 15 | column_val = img[:1,:,:] 16 | else: # bottom 17 | column_val = img[-1:,:,:] 18 | if pad_color: # else just use the values from the column 19 | column_val = column_val * 0 # Set color values to zero 20 | column_val = column_val + np.array(pad_color, dtype=np.uint8) # Fill with defined color values 21 | return column_val 22 | 23 | 24 | def pad_by_stride(img, stride, pad_color): 25 | h = img.shape[0] 26 | w = img.shape[1] 27 | 28 | pad = 4 * [None] 29 | pad[0] = 0 # up 30 | pad[1] = 0 # left 31 | pad[2] = 0 if (h%stride==0) else stride - (h % stride) # down 32 | pad[3] = 0 if (w%stride==0) else stride - (w % stride) # right 33 | 34 | img_padded = img 35 | pad_up = np.tile(get_pad_column(img_padded, 'up', pad_color), (pad[0], 1, 1)) 36 | img_padded = np.concatenate((pad_up, img_padded), axis=0) 37 | pad_left = np.tile(get_pad_column(img_padded, 'left', pad_color), (1, pad[1], 1)) 38 | img_padded = np.concatenate((pad_left, img_padded), axis=1) 39 | pad_down = np.tile(get_pad_column(img_padded, 'down', pad_color), (pad[2], 1, 1)) 40 | img_padded = np.concatenate((img_padded, pad_down), axis=0) 41 | pad_right = np.tile(get_pad_column(img_padded, 'right', pad_color), (1, pad[3], 1)) 42 | img_padded = np.concatenate((img_padded, pad_right), axis=1) 43 | 44 | return img_padded, pad 45 | 46 | 47 | def measure_time(method): 48 | def timed(*args, **kw): 49 | if not cfg.general.debug_timers: 50 | return method(*args, **kw) 51 | ts = time.time() 52 | result = method(*args, **kw) 53 | te = time.time() 54 | 55 | print('%r %2.5f sec' % (method.__name__, te-ts)) 56 | return result 57 | 58 | return timed 59 | 60 | 61 | def get_map_as_numpy(in_map): 62 | if cfg.network.use_gpu == 1: 63 | return in_map.cpu().numpy() 64 | else: 65 | return in_map.numpy() 66 | 67 | 68 | def __get_avg_map(idx, result, original_img, maps_name, interpolation): 69 | start_time = time.time() 70 | maps = get_map_as_numpy(result[maps_name].data[0]) 71 | #print("{}: {}".format("Get From GPU", time.time() - start_time)) 72 | start_time = time.time() 73 | pad = result["pad"] 74 | imageToTest_padded = result["img_to_test_padded"] 75 | heatmap = np.transpose(maps, (1, 2, 0)) 76 | heatmap = cv2.resize(heatmap, (0, 0), fx=cfg.general.stride, fy=cfg.general.stride, 77 | interpolation=interpolation) 78 | heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] 79 | heatmap = cv2.resize(heatmap, (original_img.shape[1], original_img.shape[0]), interpolation=interpolation) 80 | heatmap = np.transpose(heatmap, (2, 0, 1)) 81 | #print("{}: {}".format("Actual calc", time.time() - start_time)) 82 | return idx, heatmap 83 | 84 | 85 | def get_upsampled_maps(img_results: dict, maps_name, interpolation=cv2.INTER_CUBIC): 86 | original_img = img_results["original_img"] 87 | num_maps = img_results["results"][0][maps_name].data.shape[1] 88 | maps_average = np.zeros((len(img_results["results"]), num_maps, original_img.shape[0], original_img.shape[1])) 89 | 90 | with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: 91 | futures = [executor.submit(__get_avg_map, idx, result, original_img, maps_name, interpolation=interpolation) 92 | for idx, result in enumerate(img_results["results"])] 93 | for idx, future in enumerate(concurrent.futures.as_completed(futures)): 94 | idx, result = future.result() 95 | maps_average[idx] = result 96 | 97 | return maps_average 98 | 99 | 100 | def get_average_map_from_upscaled_maps(img: np.ndarray): 101 | # This step may take a while because it needs to wait for the gpu operations to finish. 102 | # Could use torch.cuda.synchronize() after network output to wait for sync directly after the output 103 | img = np.mean(img, 0) 104 | img = np.transpose(img, (1, 2, 0)) 105 | return img 106 | 107 | 108 | def debug_additional_timer(name, start_time): 109 | if cfg.general.additional_debug_timers: 110 | print("{}: {}".format(name, time.time() - start_time)) 111 | 112 | 113 | def get_num_params(model): 114 | """ 115 | Returns the number of parameters of a model 116 | """ 117 | num_parameters=0 118 | for p in list(model.parameters()): 119 | nn=1 120 | for s in list(p.size()): 121 | nn = nn*s 122 | num_parameters += nn 123 | return num_parameters 124 | 125 | 126 | def getEquidistantPoints(p1, p2, num_samples): 127 | """ 128 | Returns num_samples points between p1 / p2 evenly distributed 129 | :param p1: 130 | :param p2: 131 | :param num_samples: 132 | :return: 133 | """ 134 | return list(zip(np.linspace(p1[0], p2[0], num_samples), np.linspace(p1[1], p2[1], num_samples))) 135 | -------------------------------------------------------------------------------- /utils/util_eval.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from pycocotools.coco import COCO 4 | from pycocotools.cocoeval import COCOeval 5 | 6 | from skeletons.joint_converteres.joint_converter_coco_rtpose2d import JointConverterCocoRtPose2D 7 | from skeletons.skeleton_config_base import SkeletonConfigBase 8 | from skeletons.skeleton_config_rtpose2d import SkeletonConfigRtPose2D 9 | 10 | # TODO: Remove duplicated code with parametsrs (JointConfig and joints to ignore) 11 | 12 | 13 | def get_joints_for_json_export(humans: {}, skeleton_config: SkeletonConfigBase): 14 | humans_joints = [] 15 | for human in humans: 16 | human_joints = {} 17 | for limb_dict in human["limbs"]: 18 | if limb_dict is None: 19 | continue 20 | limb_joints = [limb_dict["joint_a"], limb_dict["joint_b"]] 21 | for joint in limb_joints: 22 | score = (limb_dict["matched_score"] + joint["score"]) / 2 23 | joint_name = skeleton_config.get_joint_name_by_id(joint["joint_nr"]) 24 | joint = { 25 | "id": SkeletonConfigRtPose2D.joints[joint_name], 26 | "x": joint["x"], 27 | "y": joint["y"], 28 | "score": score 29 | } 30 | if joint["id"] not in human_joints: 31 | human_joints[joint["id"]] = joint 32 | humans_joints.append(human_joints) 33 | return humans_joints 34 | 35 | 36 | def get_result_json(image_id, humans: []): 37 | jsons_x = [] 38 | for human in humans: 39 | json_data = {} 40 | json_data["image_id"] = image_id 41 | keypoint_list = [] 42 | for i in range(len(SkeletonConfigRtPose2D.joints)): 43 | if i in human: 44 | joint = human[i] 45 | keypoint_list.append(int(joint["x"])) 46 | keypoint_list.append(int(joint["y"])) 47 | keypoint_list.append(int(joint["score"])) 48 | else: 49 | keypoint_list.append(0) 50 | keypoint_list.append(0) 51 | keypoint_list.append(0) 52 | json_data["keypoints"] = keypoint_list 53 | score = 0 54 | for joint in human.values(): 55 | score += joint["score"] 56 | score = score / len(human) 57 | json_data["score"] = score 58 | 59 | jsons_x.append(json_data) 60 | return jsons_x 61 | 62 | 63 | def get_coco_joints_for_evaluation(humans: {}, skeleton_config: SkeletonConfigBase): 64 | humans_joints = [] 65 | for human in humans: 66 | human_joints = {} 67 | for limb_dict in human["limbs"]: 68 | if limb_dict is None: 69 | continue 70 | limb_joints = [limb_dict["joint_a"], limb_dict["joint_b"]] 71 | for joint in limb_joints: 72 | score = (limb_dict["matched_score"] + joint["score"]) / 2 73 | joint_name = skeleton_config.get_joint_name_by_id(joint["joint_nr"]) 74 | if joint_name == "Neck": 75 | continue #TODO: Handle better / generic. Neck is not in coco 76 | joint = { 77 | "coco_id": JointConverterCocoRtPose2D.joints[joint_name], 78 | "x": joint["x"], 79 | "y": joint["y"], 80 | "score": score 81 | } 82 | if joint["coco_id"] not in human_joints: 83 | human_joints[joint["coco_id"]] = joint 84 | humans_joints.append(human_joints) 85 | return humans_joints 86 | 87 | 88 | def get_coco_result_json(image_id, humans: []): 89 | jsons_x = [] 90 | for human in humans: 91 | json_data = {} 92 | json_data["image_id"] = image_id 93 | json_data["category_id"] = 1 94 | keypoint_list = [] 95 | for i in range(len(JointConverterCocoRtPose2D.joints)): 96 | if i in human: 97 | joint = human[i] 98 | keypoint_list.append(int(joint["x"])) 99 | keypoint_list.append(int(joint["y"])) 100 | keypoint_list.append(int(joint["score"])) 101 | else: 102 | keypoint_list.append(0) 103 | keypoint_list.append(0) 104 | keypoint_list.append(0) 105 | json_data["keypoints"] = keypoint_list 106 | score = 0 107 | for joint in human.values(): 108 | score += joint["score"] 109 | score = score / len(human) 110 | json_data["score"] = score 111 | 112 | jsons_x.append(json_data) 113 | return jsons_x 114 | 115 | 116 | def evaluate(result_json_file_path, img_ids_calced = None): 117 | annFile = '/media/USERNAME/Data/Datasets/COCO/annotations/person_keypoints_val2017.json' 118 | cocoGt = COCO(annFile) 119 | 120 | anns = json.load(open(result_json_file_path)) 121 | annsImgIds = [ann['image_id'] for ann in anns] 122 | 123 | assert set(annsImgIds) == (set(annsImgIds) & set(cocoGt.getImgIds())), \ 124 | 'Results do not correspond to current coco set' 125 | 126 | resFile = result_json_file_path 127 | cocoDt = cocoGt.loadRes(resFile) 128 | 129 | #imgIds = imgIds[0:100] 130 | # imgId = imgIds[np.random.randint(5)] 131 | 132 | # running evaluation 133 | cocoEval = COCOeval(cocoGt, cocoDt, "keypoints") 134 | cocoEval.params.imgIds = img_ids_calced if img_ids_calced else sorted(cocoGt.getImgIds()) 135 | cocoEval.evaluate() 136 | cocoEval.accumulate() 137 | cocoEval.summarize() 138 | 139 | 140 | if __name__ == "__main__": 141 | evaluate("/media/USERNAME/Data/rtpose2d/evaluations/caffe_openpose.json") -------------------------------------------------------------------------------- /utils/util_joint_map.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | 3 | import numpy as np 4 | from scipy.ndimage import gaussian_filter 5 | 6 | from utils import util 7 | from config import cfg 8 | 9 | 10 | @util.measure_time 11 | def get_peaks(heatmap, joints): 12 | all_peaks = {} 13 | peak_counter = 0 14 | # execute the part calculation parallel len(joints) - 1 because background 15 | with concurrent.futures.ThreadPoolExecutor() as executor: 16 | futures = [executor.submit(get_joint_peaks, heatmap, part) 17 | for part in range(len(joints))] 18 | for idx, future in enumerate(concurrent.futures.as_completed(futures)): 19 | part, peaks = future.result() 20 | id = range(peak_counter, peak_counter + len(peaks)) 21 | joint_peaks = [] 22 | for idx, peak in enumerate(peaks): 23 | joint_peaks.append( 24 | {'joint_nr': part, 'coords': [peak[0], peak[1]], 'x': peak[0], 'y': peak[1], 'score': peak[2], 25 | 'id': id[idx]}) 26 | all_peaks[part] = joint_peaks 27 | peak_counter += len(joint_peaks) 28 | 29 | return all_peaks 30 | 31 | 32 | def get_joint_peaks(average_joint_maps, part): 33 | part_heatmap_ori = average_joint_maps[:, :, part] 34 | part_heatmap = gaussian_filter(part_heatmap_ori, sigma=3) 35 | 36 | map_left = np.zeros(part_heatmap.shape) 37 | map_left[1:, :] = part_heatmap[:-1, :] 38 | map_right = np.zeros(part_heatmap.shape) 39 | map_right[:-1, :] = part_heatmap[1:, :] 40 | map_up = np.zeros(part_heatmap.shape) 41 | map_up[:, 1:] = part_heatmap[:, :-1] 42 | map_down = np.zeros(part_heatmap.shape) 43 | map_down[:, :-1] = part_heatmap[:, 1:] 44 | 45 | is_local_peak_list = np.logical_and.reduce( 46 | (part_heatmap >= map_left, part_heatmap >= map_right, 47 | part_heatmap >= map_up, part_heatmap >= map_down, 48 | part_heatmap > cfg.network.heatmap_thresh) 49 | ) 50 | 51 | peaks = list(zip(np.nonzero(is_local_peak_list)[1], np.nonzero(is_local_peak_list)[0])) 52 | peaks_with_score = [x + (part_heatmap_ori[x[1], x[0]],) for x in peaks] 53 | 54 | return part, peaks_with_score 55 | -------------------------------------------------------------------------------- /utils/util_paf_map.py: -------------------------------------------------------------------------------- 1 | import math 2 | from collections import defaultdict 3 | 4 | import numpy as np 5 | 6 | from utils import util 7 | from config import cfg 8 | 9 | 10 | @util.measure_time 11 | def get_limbs(paf_results, joint_positions, original_img, limb_cfg, limb_paf_mapping): 12 | limbs_all = defaultdict(list) 13 | 14 | for k in range(len(limb_paf_mapping)): 15 | score_mid = paf_results[:, :, [x for x in limb_paf_mapping[k]]] 16 | candidates_a = joint_positions[limb_cfg[k][0]] 17 | candidates_b = joint_positions[limb_cfg[k][1]] 18 | num_candidates_a = len(candidates_a) 19 | num_candidates_b = len(candidates_b) 20 | if num_candidates_a != 0 and num_candidates_b != 0: 21 | limb_candidates = get_limb_candidates(candidates_a, candidates_b, score_mid, 22 | original_img) 23 | 24 | limbs = 0 25 | used_joint_as = [] 26 | used_joint_bs = [] 27 | for limb_candidate in limb_candidates: 28 | if limb_candidate['joint_a_idx'] not in used_joint_as and limb_candidate['joint_b_idx'] not in used_joint_bs: 29 | used_joint_as.append(limb_candidate['joint_a_idx']) 30 | used_joint_bs.append(limb_candidate['joint_b_idx']) 31 | limbs_all[k].append({ 32 | 'limb_nr': k, 33 | 'limb': limb_cfg[k], 34 | 'joint_a': limb_candidate['joint_a'], 35 | 'joint_b': limb_candidate['joint_b'], 36 | 'limb_score': limb_candidate['limb_score'], 37 | 'matched_score': limb_candidate['matched_score'] 38 | }) 39 | limbs += 1 40 | if limbs >= min(num_candidates_a, num_candidates_b): 41 | break 42 | else: 43 | # TODO: Handle limb Ks missing joints somehow? 44 | continue 45 | return limbs_all 46 | 47 | 48 | def get_limb_candidates(candidates_a, candidates_b, score_mid, original_img): 49 | """ 50 | Returns limb candidates between (joint) candidates_a and (joint) candidates_b 51 | """ 52 | limb_candidates = [] 53 | for i in range(len(candidates_a)): 54 | for j in range(len(candidates_b)): 55 | joint_a = candidates_a[i]['coords'] 56 | joint_b = candidates_b[j]['coords'] 57 | paf_x = np.squeeze(score_mid[:, :, :1], axis=2) 58 | paf_y = np.squeeze(score_mid[:, :, 1:], axis=2) 59 | 60 | limb_score, sample_scores = get_limb_score(original_img, joint_a, joint_b, paf_x, paf_y) 61 | if limb_score <= 0: 62 | continue 63 | samples_over_thresh = np.nonzero(sample_scores > cfg.network.paf_thresh_sample_score)[0] 64 | enough_samples_over_thresh = len(samples_over_thresh) > cfg.network.paf_samples_over_thresh * len( 65 | sample_scores) 66 | min_joint_score_reached = limb_score > 0 67 | if enough_samples_over_thresh and min_joint_score_reached: 68 | limb_candidates.append({ 69 | 'joint_a_idx': i, 70 | 'joint_b_idx': j, 71 | 'joint_a': candidates_a[i], 72 | 'joint_b': candidates_b[j], 73 | 'limb_score': limb_score, 74 | 'matched_score': limb_score + candidates_a[i]['score'] + candidates_b[j]['score'] 75 | } 76 | ) 77 | return sorted(limb_candidates, key=lambda x: x['limb_score'], reverse=True) 78 | 79 | 80 | def get_limb_score(img, point_a, point_b, paf_x, paf_y): 81 | """ 82 | Calculates a score for a limb between the given points p1 and p2. Score is calculated by the line integral which 83 | measures the effect of the part affinity fields along the given joint. 84 | """ 85 | num_samples = cfg.network.paf_num_samples 86 | x1, y1 = point_a[0], point_a[1] 87 | x2, y2 = point_b[0], point_b[1] 88 | 89 | distance_x, distance_y = x2 - x1, y2 - y1 90 | distance_joints = math.sqrt(distance_x ** 2 + distance_y ** 2) 91 | 92 | if distance_joints < 1e-4: 93 | return 0.0, np.zeros([10], dtype=np.float32) 94 | 95 | vx, vy = distance_x / distance_joints, distance_y / distance_joints 96 | xs = np.around(np.linspace(x1, x2, num=num_samples)).astype(np.uint32) 97 | ys = np.around(np.linspace(y1, y2, num=num_samples)).astype(np.uint32) 98 | 99 | paf_xs = np.zeros(num_samples) 100 | paf_ys = np.zeros(num_samples) 101 | for idx, (mx, my) in enumerate(zip(xs, ys)): 102 | paf_xs[idx] = paf_x[my][mx] 103 | paf_ys[idx] = paf_y[my][mx] 104 | 105 | sample_scores = paf_xs * vx + paf_ys * vy 106 | d_punishment = min(0.5 * img.shape[0] / distance_joints - 1, 0) # Punish joint distances > img_height/2 107 | line_integral = sum(sample_scores) / num_samples + d_punishment 108 | return line_integral, sample_scores 109 | 110 | # Ground Truth 111 | -------------------------------------------------------------------------------- /utils/util_predict.py: -------------------------------------------------------------------------------- 1 | import time 2 | from concurrent.futures import ThreadPoolExecutor 3 | 4 | import numpy as np 5 | 6 | from models.model_base import NetworkModelBase 7 | from skeletons.skeleton_config_base import SkeletonConfigBase 8 | from utils import util, util_paf_map 9 | from utils.util_joint_map import get_peaks 10 | from utils.util_skeleton import get_humans_from_limbs 11 | 12 | 13 | def get_avg_map_mulpr(img_result): 14 | upscaled_limb_maps = util.get_upsampled_maps(img_result, "limb_maps") 15 | return util.get_average_map_from_upscaled_maps(upscaled_limb_maps) 16 | 17 | 18 | def get_human_data(model: NetworkModelBase, img_result: dict, original_img: np.ndarray, skeleton_config: SkeletonConfigBase): 19 | with ThreadPoolExecutor() as executor: 20 | future = executor.submit(get_avg_map_mulpr, img_result) 21 | start_time = time.time() 22 | upscaled_joint_maps = util.get_upsampled_maps(img_result, "joint_maps") 23 | average_joint_maps = util.get_average_map_from_upscaled_maps(upscaled_joint_maps) 24 | print("{}: {}".format("Upscale joints", time.time() - start_time)) 25 | 26 | joint_positions = get_peaks(average_joint_maps, skeleton_config.joints) 27 | 28 | average_limb_maps = future.result() 29 | print("{}: {}".format("Upscale Limbs / Get Peaks", time.time() - start_time)) 30 | 31 | limbs = util_paf_map.get_limbs(average_limb_maps, joint_positions, original_img, skeleton_config.limbs, 32 | model.limb_paf_mapping) 33 | humans = get_humans_from_limbs(limbs) 34 | return joint_positions, limbs, humans -------------------------------------------------------------------------------- /utils/util_skeleton.py: -------------------------------------------------------------------------------- 1 | from config import cfg 2 | from skeletons.skeleton_config_rtpose2d import SkeletonConfigRtPose2D 3 | 4 | 5 | def is_joint_from_limb_in_human(human, limb, limb_candidate): 6 | return human["joints"][limb[0]] == limb_candidate["joint_a"] or human["joints"][limb[1]] == limb_candidate["joint_b"] 7 | 8 | 9 | def get_empty_human_dict(num_joints, num_limbs): 10 | human = { 11 | "score": 0, 12 | "num_joints": 0, 13 | "joints": [None] * num_joints, 14 | "limbs": [None] * num_limbs 15 | } 16 | return human 17 | 18 | 19 | def are_joints_in_both_humans(human_a, human_b): 20 | for joint_idx, joint in enumerate(human_b["joints"]): 21 | if joint is not None and human_a["joints"][joint_idx] is not None: 22 | return True 23 | return False 24 | 25 | 26 | def get_merged_humans(human_a, human_b): 27 | for joint_idx, joint in enumerate(human_b["joints"]): 28 | if joint is None: 29 | continue 30 | if human_a["joints"][joint_idx] is not None: 31 | raise RuntimeError("Merge conflict, joint exists in both humans") 32 | human_a["joints"][joint_idx] = joint 33 | 34 | for limb_idx, limb in enumerate(human_b["limbs"]): 35 | if limb is None: 36 | continue 37 | if human_a["limbs"][limb_idx] is not None: 38 | raise RuntimeError("Merge conflict, limb exists in both humans")# 39 | human_a["limbs"][limb_idx] = limb 40 | 41 | human_a["score"] += human_b["score"] 42 | human_a["num_joints"] += human_b["num_joints"] 43 | return human_a 44 | 45 | 46 | def get_humans_from_limbs(limbs): 47 | # last number in each row is the total parts number of that person 48 | # the second last number in each row is the score of the overall configuration 49 | human_list = [] # Humans n, 20 50 | 51 | for limb_nr, limb_candidates in limbs.items(): 52 | limb = SkeletonConfigRtPose2D.limbs[limb_nr] 53 | for i, limb_candidate in enumerate(limb_candidates): 54 | found = 0 55 | subset_idx = [-1, 1] 56 | for j in range(len(human_list)): 57 | if is_joint_from_limb_in_human(human_list[j], limb, limb_candidate): 58 | subset_idx[found] = j 59 | found += 1 60 | if found == 1: 61 | j = subset_idx[0] 62 | if human_list[j]["joints"][limb[1]] != limb_candidate["joint_b"]: 63 | human_list[j]["joints"][limb[1]] = limb_candidate["joint_b"] 64 | human_list[j]["limbs"][limb_candidate["limb_nr"]] = limb_candidate 65 | human_list[j]["num_joints"] += 1 66 | human_list[j]["score"] += limb_candidate["limb_score"] + limb_candidate["joint_b"]["score"] 67 | 68 | elif found == 2: 69 | j1, j2 = subset_idx 70 | #print("found = 2") 71 | if not are_joints_in_both_humans(human_list[j1], human_list[j2]): 72 | human_list[j1] = get_merged_humans(human_list[j1], human_list[j2]) 73 | del human_list[j2] 74 | else: # as like found == 1 75 | human_list[j]["joints"][limb[1]] = limb_candidate["joint_b"] 76 | human_list[j]["limbs"][limb_candidate["limb_nr"]] = limb_candidate 77 | human_list[j]["num_joints"] += 1 78 | human_list[j]["score"] += limb_candidate["limb_score"] + limb_candidate["joint_b"]["score"] 79 | elif not found: 80 | row = get_empty_human_dict(len(SkeletonConfigRtPose2D.joints), len(SkeletonConfigRtPose2D.limbs)) 81 | row["joints"][limb[0]] = limb_candidate["joint_a"] 82 | row["joints"][limb[1]] = limb_candidate["joint_b"] 83 | row["limbs"][limb_candidate["limb_nr"]] = limb_candidate 84 | row["num_joints"] = 2 85 | row["score"] = limb_candidate["matched_score"] 86 | human_list.append(row) 87 | 88 | # delete some rows of subset which has few parts occur 89 | deleteIdx = [] 90 | for i in range(len(human_list)): 91 | if human_list[i]["num_joints"] < cfg.network.skeleton_min_limbs or human_list[i]["score"] / \ 92 | human_list[i]["num_joints"] < cfg.network.skeleton_limb_score: 93 | deleteIdx.append(i) 94 | return [x for i, x in enumerate(human_list) if i not in deleteIdx] 95 | -------------------------------------------------------------------------------- /visualization.py: -------------------------------------------------------------------------------- 1 | import math 2 | from io import BytesIO 3 | 4 | import cv2 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | from util_img import get_combined_maps, get_upscaled_maps 9 | 10 | 11 | # Heatmap Plotting 12 | def plot_map(in_map: np.ndarray, diverging: bool = False, is_255: bool = False): 13 | cmap = plt.cm.seismic if diverging else plt.cm.afmhot 14 | if is_255: 15 | plt.imshow(in_map, cmap=cmap, vmin=0, vmax=255) 16 | else: 17 | plt.imshow(in_map, cmap=cmap) 18 | plt.colorbar() 19 | 20 | 21 | def plot_map_with_bg(original_img: np.ndarray, in_map: np.ndarray, diverging: bool = False, is_255: bool = False): 22 | cmap = plt.cm.seismic if diverging else plt.cm.afmhot 23 | plt.imshow(original_img, alpha=0.5) 24 | if is_255: 25 | plt.imshow(in_map, cmap=cmap, alpha=0.5, vmin=0, vmax=255) 26 | else: 27 | plt.imshow(in_map, cmap=cmap, alpha=0.5) 28 | plt.colorbar() 29 | 30 | 31 | # Wrapper 32 | 33 | 34 | #Variable to nd.array -> varname[0].data.cpu().numpy() 35 | def visualize_network(original_img: np.ndarray, cropped_img: np.ndarray, joint_maps: np.ndarray, limb_maps: np.ndarray, 36 | img_size: int, window_id=None): 37 | joint_map = get_combined_maps(joint_maps) 38 | limb_map = get_combined_maps(limb_maps) 39 | visualize_heatmaps(original_img, cropped_img, joint_map, limb_map, img_size, window_id) 40 | 41 | 42 | # Network Outputs 43 | 44 | def visualize_heatmaps(original_img: np.ndarray, input_img: np.ndarray, joint_map: np.ndarray, limb_map: np.ndarray, 45 | img_size: int, window_id=None): 46 | fig = plt.figure(window_id, figsize=(11, 11), dpi=100) 47 | 48 | plt.suptitle('Results for img size: {}'.format(img_size)) 49 | 50 | # Original Image 51 | fig_original = fig.add_subplot(2, 2, 1) 52 | fig_original.set_title('Original') 53 | plt.imshow(original_img) 54 | 55 | # Cropped Image 56 | fig_cropped = fig.add_subplot(2, 2, 2) 57 | fig_cropped.set_title('Input') 58 | plt.imshow(input_img) 59 | 60 | # Joint Map 61 | fig_joint_maps = fig.add_subplot(2, 2, 3) 62 | fig_joint_maps.set_title('Joint Maps') 63 | plot_map_with_bg(original_img, joint_map) 64 | 65 | # Limb Map 66 | fig_limb_maps = fig.add_subplot(2, 2, 4) 67 | fig_limb_maps.set_title('Limb Maps') 68 | plot_map_with_bg(original_img, limb_map) 69 | 70 | plt.show() 71 | 72 | 73 | def build_compare_joint_limb_maps(joint_map_a: np.ndarray, limb_map_a: np.ndarray, joint_map_b: np.ndarray, 74 | limb_map_b: np.ndarray, bg_img=None, is_255: bool = False): 75 | def plt_func(in_map): plot_map_with_bg(bg_img, in_map, is_255=is_255) if bg_img is not None else plot_map(in_map, is_255=is_255) 76 | 77 | plt.cla() 78 | plt.clf() 79 | fig = plt.figure("cpr_maps", figsize=(11, 11), dpi=100) 80 | 81 | fig_joint_map = fig.add_subplot(2, 2, 1) 82 | fig_joint_map.set_title('Joint Map #1') 83 | plt_func(joint_map_a) 84 | 85 | fig_limb_map = fig.add_subplot(2, 2, 2) 86 | fig_limb_map.set_title('Limb Map #1') 87 | plt_func(limb_map_a) 88 | 89 | if joint_map_b is not None: 90 | fig_joint_map2 = fig.add_subplot(2, 2, 3) 91 | fig_joint_map2.set_title('Joint Map #2') 92 | plt_func(joint_map_b) 93 | 94 | if limb_map_b is not None: 95 | fig_limb_map2 = fig.add_subplot(2, 2, 4) 96 | fig_limb_map2.set_title('Limb Map #2') 97 | plt_func(limb_map_b) 98 | 99 | return fig 100 | 101 | 102 | def visualize_compare_joint_limb_maps(joint_map_a: np.ndarray, limb_map_a: np.ndarray, joint_map_b: np.ndarray, 103 | limb_map_b: np.ndarray, bg_img=None): 104 | fig = build_compare_joint_limb_maps(joint_map_a, limb_map_a, joint_map_b, limb_map_b, bg_img) 105 | plt.draw() 106 | plt.pause(0.05) 107 | 108 | 109 | def get_compare_joint_limb_maps(joint_map_a: np.ndarray, limb_map_a: np.ndarray, joint_map_b: np.ndarray, 110 | limb_map_b: np.ndarray, bg_img=None): 111 | fig = build_compare_joint_limb_maps(joint_map_a, limb_map_a, joint_map_b, limb_map_b, bg_img) 112 | img_data = BytesIO() 113 | fig.savefig(img_data, format='png') 114 | img_data.seek(0) 115 | file_bytes = np.asarray(bytearray(img_data.read()), dtype=np.uint8) 116 | img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR) 117 | return img 118 | 119 | 120 | # Visualize Limbs 121 | 122 | def visualize_limbs(original_img: np.ndarray, joint_positions: [], limb_candidates: [], limbs: [], limb_colors: []): 123 | img = get_limb_image(original_img, joint_positions, limb_candidates, limbs, limb_colors) 124 | cv2.imshow("human_pose", img) 125 | cv2.waitKey(0) 126 | 127 | 128 | def get_limb_image(original_img: np.ndarray, joint_positions: [], limb_candidates: [], limbs: [], limb_colors: []): 129 | canvas = original_img.copy() 130 | for i in range(18): 131 | for j in range(len(joint_positions[i])): 132 | cv2.circle(canvas, tuple(joint_positions[i][j]['coords']), 4, limb_colors[i], thickness=-1) 133 | 134 | stickwidth = 2 135 | 136 | for limb in limb_candidates: 137 | if limb['limb'] in [[2, 16], [5, 17]]: # Ignore the Left/Right Eye to Left/Right Shoulder Joints 138 | continue 139 | color_idx = [i for i, x in enumerate(limbs) if x == limb['limb']] 140 | cur_canvas = canvas.copy() 141 | X = [limb['joint_a']['y'], limb['joint_b']['y']] 142 | Y = [limb['joint_a']['x'], limb['joint_b']['x']] 143 | mX = np.mean(X) 144 | mY = np.mean(Y) 145 | length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 146 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) 147 | polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) 148 | cv2.fillConvexPoly(cur_canvas, polygon, limb_colors[color_idx[0]]) 149 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) 150 | return canvas 151 | 152 | 153 | # Human Pose 154 | 155 | def visualize_human_pose(original_img: np.ndarray, joint_positions: [], humans: [], limbs: [], limb_colors: []): 156 | img = get_human_pose_image(original_img, joint_positions, humans, limbs, limb_colors) 157 | cv2.imshow("human_pose", img) 158 | cv2.waitKey(0) 159 | 160 | 161 | def save_human_pose_img(original_img: np.ndarray, joint_positions: [], humans: [], limbs: [], limb_colors: [], 162 | file_path="human_pose.png"): 163 | img = get_human_pose_image(original_img, joint_positions, humans, limbs, limb_colors) 164 | cv2.imwrite(file_path, img) 165 | 166 | 167 | def add_img_title(img, title): 168 | font = cv2.FONT_HERSHEY_SIMPLEX 169 | cv2.putText(img, title, (0, 30), font, 1, (0, 0, 255), 1, cv2.LINE_AA) 170 | 171 | 172 | def get_human_pose_image(original_img: np.ndarray, joint_positions: [], humans: [], limbs: [], limb_colors: []): 173 | canvas = original_img.copy() 174 | add_img_title(canvas, "Humans: {}".format(len(humans))) 175 | for i in range(18): 176 | for j in range(len(joint_positions[i])): 177 | cv2.circle(canvas, tuple(joint_positions[i][j]['coords']), 4, limb_colors[i], thickness=1) 178 | 179 | stickwidth = 4 180 | 181 | for human in humans: 182 | for idx, limb in enumerate(human["limbs"]): 183 | if not limb: 184 | continue 185 | if limb['limb'] in [[2, 16], [5, 17]]: # Ignore the Left/Right Eye to Left/Right Shoulder Joints 186 | continue 187 | # if limb['limb'] not in [[1,5], [1,2]]: 188 | # continue 189 | color_idx = [i for i, x in enumerate(limbs) if x == limb['limb']] 190 | color = limb_colors[color_idx[0]] 191 | # color = [color[2], color[1], color[0]] 192 | #color = [0, 85, 255] 193 | print("{}: {}".format(limb['limb'], color)) 194 | cur_canvas = canvas.copy() 195 | X = [limb['joint_a']['y'], limb['joint_b']['y']] 196 | Y = [limb['joint_a']['x'], limb['joint_b']['x']] 197 | cv2.circle(canvas, (limb['joint_a']['x'], limb['joint_a']['y']), 4, color, thickness=-1) 198 | cv2.circle(canvas, (limb['joint_b']['x'], limb['joint_b']['y']), 4, color, thickness=-1) 199 | mX = np.mean(X) 200 | mY = np.mean(Y) 201 | length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 202 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) 203 | polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) 204 | cv2.fillConvexPoly(cur_canvas, polygon, color) 205 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) 206 | return canvas 207 | 208 | 209 | # Debug 210 | 211 | def visualize_network_train_output(joint_maps_gt: np.array, limb_maps_gt: np.array, joint_maps: np.array, 212 | limb_maps: np.array, bg_img: np.array = None): 213 | """ 214 | Opens some debug visualizations for network training outputs (compares with ground truth) 215 | """ 216 | if bg_img is not None: 217 | img = cv2.cvtColor(bg_img, cv2.COLOR_BGR2RGB) 218 | pad = [0, 0, 0, 0] 219 | upscaled_joint_maps = get_upscaled_maps(joint_maps_gt, img, img, pad) 220 | upscaled_limb_maps = get_upscaled_maps(limb_maps_gt, img, img, pad) 221 | upscaled_joint_maps_2 = get_upscaled_maps(joint_maps, img, img, pad) 222 | upscaled_limb_maps_2 = get_upscaled_maps(limb_maps, img, img, pad) 223 | visualize_compare_combined_joint_limb_maps(upscaled_joint_maps, upscaled_limb_maps, upscaled_joint_maps_2, 224 | upscaled_limb_maps_2, img) 225 | else: 226 | visualize_compare_combined_joint_limb_maps(joint_maps_gt, limb_maps_gt, 227 | joint_maps, limb_maps) 228 | 229 | 230 | def visualize_compare_combined_joint_limb_maps(joint_maps_a: np.array, limb_maps_a: np.array, joint_maps_b: np.array, 231 | limb_maps_b: np.array, bg_img=None): 232 | joint_map = get_combined_maps(joint_maps_a) 233 | limb_map = get_combined_maps(limb_maps_a) 234 | joint_map2 = get_combined_maps(joint_maps_b) 235 | limb_map2 = get_combined_maps(limb_maps_b) 236 | visualize_compare_joint_limb_maps(joint_map, limb_map, joint_map2, limb_map2, bg_img) 237 | 238 | 239 | def get_network_train_output(joint_maps_gt: np.array, limb_maps_gt: np.array, joint_maps: np.array, 240 | limb_maps: np.array, bg_img: np.array = None): 241 | """ 242 | Opens some debug visualizations for network training outputs (compares with ground truth) 243 | """ 244 | if bg_img is not None: 245 | img = cv2.cvtColor(bg_img, cv2.COLOR_BGR2RGB) 246 | pad = [0, 0, 0, 0] 247 | upscaled_joint_maps = get_upscaled_maps(joint_maps_gt, img, img, pad) 248 | upscaled_limb_maps = get_upscaled_maps(limb_maps_gt, img, img, pad) 249 | upscaled_joint_maps_2 = None 250 | upscaled_limb_maps_2 = None 251 | if joint_maps is not None: 252 | upscaled_joint_maps_2 = get_upscaled_maps(joint_maps, img, img, pad) 253 | if limb_maps is not None: 254 | upscaled_limb_maps_2 = get_upscaled_maps(limb_maps, img, img, pad) 255 | return get_compare_combined_joint_limb_maps(upscaled_joint_maps, upscaled_limb_maps, upscaled_joint_maps_2, 256 | upscaled_limb_maps_2, img) 257 | else: 258 | return get_compare_combined_joint_limb_maps(joint_maps_gt, limb_maps_gt, joint_maps, limb_maps) 259 | 260 | 261 | def get_compare_combined_joint_limb_maps(joint_maps_a: np.array, limb_maps_a: np.array, joint_maps_b: np.array, 262 | limb_maps_b: np.array, bg_img=None): 263 | joint_map = get_combined_maps(joint_maps_a) 264 | limb_map = get_combined_maps(limb_maps_a) 265 | joint_map2 = None 266 | limb_map2 = None 267 | if joint_maps_b is not None: 268 | joint_map2 = get_combined_maps(joint_maps_b) 269 | if limb_maps_b is not None: 270 | limb_map2 = get_combined_maps(limb_maps_b) 271 | return get_compare_joint_limb_maps(joint_map, limb_map, joint_map2, limb_map2, bg_img) 272 | 273 | 274 | def visualize_all_maps(maps: np.ndarray, bg_img: np.array = None, map_names: [] = None): 275 | def plt_func(in_map, in_img = None): 276 | plot_map_with_bg(in_img, in_map, is_255=True) if in_img is not None else plot_map(in_map, is_255=True) 277 | tmp_maps = maps 278 | num_maps = tmp_maps.shape[0] 279 | rows = math.ceil(num_maps / 4) 280 | fig = plt.figure("Debug Maps", figsize=(11, 11), dpi=100) 281 | 282 | for map_idx in range(num_maps): 283 | # Heatmap 284 | fig_heatmap = fig.add_subplot(rows, 4, map_idx + 1) 285 | map_name = map_names[map_idx] if map_names else map_idx 286 | fig_heatmap.set_title(map_name) 287 | tmp_map = np.array(tmp_maps[map_idx] * 255, dtype=np.uint8) 288 | plt_func(tmp_map, bg_img) 289 | 290 | plt.tight_layout() 291 | plt.show() 292 | 293 | 294 | def visualize_map(map_data: np.array, bg_img: np.array = None, map_name: str = None): 295 | def plt_func(in_map, in_img = None): 296 | plot_map_with_bg(in_img, in_map) if in_img is not None else plot_map(in_map) 297 | tmp_map = np.array(map_data * 255, dtype=np.uint8) 298 | fig_name = map_name if map_name else "Debug Map" 299 | fig = plt.figure(fig_name, figsize=(11, 11), dpi=100) 300 | 301 | plt_func(tmp_map, bg_img) 302 | plt.tight_layout() 303 | plt.show() 304 | -------------------------------------------------------------------------------- /workbenches/export_rarepose_results.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import cv2 5 | 6 | from models.model_openpose import OpenPoseModelHandler as ModelHandler 7 | from network import get_network_results 8 | from skeletons.skeleton_config_openpose import SkeletonConfigOpenPose as SkeletonConfig 9 | from utils import util 10 | from utils.util_eval import get_joints_for_json_export, get_result_json 11 | from utils.util_predict import get_human_data 12 | from visualization import save_human_pose_img 13 | 14 | 15 | @util.measure_time 16 | def export(model, original_img, image_id): 17 | export_path = "/media/USERNAME/Data/rtpose2d/exports/rare_pose_dataset/big_sim_fixed_c13" 18 | img_result = get_network_results(model, original_img) 19 | joint_positions, limbs, humans = get_human_data(img_result, original_img, skeleton_config) 20 | humans_for_export = get_joints_for_json_export(humans, skeleton_config) 21 | export_json = get_result_json(image_id, humans_for_export) 22 | 23 | save_human_pose_img(original_img, joint_positions, humans, skeleton_config.limbs, skeleton_config.limb_colors, 24 | file_path=os.path.join(export_path, "{}.jpg".format(image_id))) 25 | with open(os.path.join(export_path, "{}.json".format(image_id)), 'w') as outfile: 26 | json.dump(export_json, outfile) 27 | # save_human_pose_img(original_img, joint_positions, humans, SkeletonConfigRtPose2D.limbs, SkeletonConfigRtPose2D.limb_colors) 28 | 29 | 30 | if __name__ == "__main__": 31 | skeleton_config = SkeletonConfig() 32 | jsons = [] 33 | count = 0 34 | model_handler = ModelHandler() 35 | model = model_handler.get_model() 36 | model_handler.load_state_dict(model) 37 | dataset_path = "/media/USERNAME/Data/Datasets/rare_pose_dataset/images" 38 | for file in os.listdir(dataset_path): 39 | if file.endswith(".jpg") or file.endswith(".png"): 40 | img_id = os.path.splitext(file)[0] 41 | print(count) 42 | image = cv2.imread(os.path.join(dataset_path, file)) 43 | export(model, image, img_id) 44 | count += 1 45 | --------------------------------------------------------------------------------