├── .gitignore ├── LICENSE ├── README.md ├── alphapose ├── __init__.py ├── datasets │ ├── __init__.py │ ├── coco_det.py │ ├── coco_wholebody.py │ ├── coco_wholebody_det.py │ ├── concat_dataset.py │ ├── custom.py │ ├── halpe_136.py │ ├── halpe_136_det.py │ ├── halpe_26.py │ ├── halpe_26_det.py │ ├── halpe_68_noface.py │ ├── halpe_68_noface_det.py │ ├── halpe_coco_wholebody_136.py │ ├── halpe_coco_wholebody_136_det.py │ ├── halpe_coco_wholebody_26.py │ ├── halpe_coco_wholebody_26_det.py │ ├── mpii.py │ ├── mscoco.py │ ├── single_hand.py │ └── single_hand_det.py ├── models │ ├── __init__.py │ ├── builder.py │ ├── criterion.py │ ├── fastpose.py │ ├── fastpose_duc.py │ ├── fastpose_duc_dense.py │ ├── hardnet.py │ ├── hrnet.py │ ├── layers │ │ ├── DUC.py │ │ ├── PixelUnshuffle.py │ │ ├── Resnet.py │ │ ├── SE_Resnet.py │ │ ├── SE_module.py │ │ ├── ShuffleResnet.py │ │ ├── dcn │ │ │ ├── DCN.py │ │ │ ├── __init__.py │ │ │ ├── deform_conv.py │ │ │ ├── deform_pool.py │ │ │ └── src │ │ │ │ ├── deform_conv_cuda.cpp │ │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ │ ├── deform_pool_cuda.cpp │ │ │ │ └── deform_pool_cuda_kernel.cu │ │ └── smpl │ │ │ ├── SMPL.py │ │ │ └── lbs.py │ ├── simple3dposeSMPLWithCam.py │ └── simplepose.py ├── opt.py ├── utils │ ├── __init__.py │ ├── bbox.py │ ├── config.py │ ├── detector.py │ ├── env.py │ ├── file_detector.py │ ├── logger.py │ ├── metrics.py │ ├── pPose_nms.py │ ├── presets │ │ ├── __init__.py │ │ ├── simple_transform.py │ │ └── simple_transform_3d_smpl.py │ ├── registry.py │ ├── render_pytorch3d.py │ ├── roi_align │ │ ├── __init__.py │ │ ├── roi_align.py │ │ └── src │ │ │ ├── roi_align_cuda.cpp │ │ │ └── roi_align_kernel.cu │ ├── transforms.py │ ├── vis.py │ ├── webcam_detector.py │ ├── writer.py │ └── writer_smpl.py └── version.py ├── configs ├── coco │ ├── hardnet │ │ ├── 256x192_hard68_lr1e-3_1x.yaml │ │ └── 256x192_hard85_lr1e-3_1x.yaml │ ├── hrnet │ │ └── 256x192_w32_lr1e-3.yaml │ └── resnet │ │ ├── 256x192_res152_lr1e-3_1x-duc.yaml │ │ ├── 256x192_res50_lr1e-3_1x-concat.yaml │ │ ├── 256x192_res50_lr1e-3_1x-duc.yaml │ │ ├── 256x192_res50_lr1e-3_1x-simple.yaml │ │ ├── 256x192_res50_lr1e-3_1x.yaml │ │ ├── 256x192_res50_lr1e-3_2x-dcn.yaml │ │ ├── 256x192_res50_lr1e-3_2x-regression.yaml │ │ └── 256x192_res50_lr1e-3_2x.yaml ├── coco_wholebody │ └── resnet │ │ ├── 256x192_res152_lr1e-3_1x-duc.yaml │ │ ├── 256x192_res50_lr1e-3_2x-combined.yaml │ │ ├── 256x192_res50_lr1e-3_2x-dcn-combined.yaml │ │ ├── 256x192_res50_lr1e-3_2x-dcn-regression.yaml │ │ └── 256x192_res50_lr1e-3_2x-regression.yaml ├── dense_coco │ └── resnet50 │ │ └── 256x192_adam_lr1e-3-duc-dcn_1x_crop.yaml ├── halpe_136 │ └── resnet │ │ ├── 256x192_res152_lr1e-3_1x-duc.yaml │ │ ├── 256x192_res50_lr1e-3_2x-dcn-combined.yaml │ │ ├── 256x192_res50_lr1e-3_2x-dcn-regression.yaml │ │ ├── 256x192_res50_lr1e-3_2x-regression.yaml │ │ └── 256x192_res50_lr1e-3_2x.yaml ├── halpe_26 │ └── resnet │ │ ├── 256x192_res50_lr1e-3_1x.yaml │ │ ├── 256x192_res50_lr1e-3_2x-dcn-regression.yaml │ │ ├── 256x192_res50_lr1e-3_2x-regression.yaml │ │ └── 256x192_res50_lr1e-3_2x.yaml ├── halpe_68_noface │ └── resnet │ │ └── 256x192_res50_lr1e-3_2x-dcn-combined.yaml ├── halpe_coco_wholebody_136 │ └── resnet │ │ ├── 256x192_res50_lr1e-3_2x-dcn-combined.yaml │ │ └── 256x192_res50_lr1e-3_2x-regression.yaml ├── single_hand │ └── resnet │ │ └── 256x192_res50_lr1e-3_2x-dcn-regression.yaml └── smpl │ └── 256x192_adam_lr1e-3-res34_smpl_24_3d_base_2x_mix.yaml ├── detector ├── apis.py ├── effdet_api.py ├── effdet_cfg.py ├── efficientdet │ ├── README.md │ ├── effdet │ │ ├── __init__.py │ │ ├── anchors.py │ │ ├── bench.py │ │ ├── config │ │ │ └── config.py │ │ ├── efficientdet.py │ │ ├── helpers.py │ │ └── object_detection │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── argmax_matcher.py │ │ │ ├── box_coder.py │ │ │ ├── box_list.py │ │ │ ├── faster_rcnn_box_coder.py │ │ │ ├── matcher.py │ │ │ ├── region_similarity_calculator.py │ │ │ └── target_assigner.py │ ├── utils.py │ └── weights │ │ └── get_models.sh ├── nms │ ├── __init__.py │ ├── nms_wrapper.py │ └── src │ │ ├── nms_cpu.cpp │ │ ├── nms_cuda.cpp │ │ ├── nms_kernel.cu │ │ ├── soft_nms_cpu.cpp │ │ └── soft_nms_cpu.pyx ├── tracker │ ├── README.md │ ├── __init__.py │ ├── cfg │ │ ├── ccmcpe.json │ │ └── yolov3.cfg │ ├── models.py │ ├── preprocess.py │ ├── tracker │ │ ├── __init__.py │ │ ├── basetrack.py │ │ ├── matching.py │ │ └── multitracker.py │ └── utils │ │ ├── __init__.py │ │ ├── datasets.py │ │ ├── evaluation.py │ │ ├── io.py │ │ ├── kalman_filter.py │ │ ├── log.py │ │ ├── nms.py │ │ ├── parse_config.py │ │ ├── timer.py │ │ ├── utils.py │ │ └── visualization.py ├── tracker_api.py ├── tracker_cfg.py ├── yolo │ ├── README.md │ ├── __init__.py │ ├── bbox.py │ ├── cam_demo.py │ ├── cfg │ │ ├── tiny-yolo-voc.cfg │ │ ├── yolo-voc.cfg │ │ ├── yolo.cfg │ │ ├── yolov3-spp.cfg │ │ └── yolov3.cfg │ ├── darknet.py │ ├── detect.py │ ├── pallete │ ├── preprocess.py │ ├── util.py │ ├── video_demo.py │ └── video_demo_half.py ├── yolo_api.py ├── yolo_cfg.py ├── yolox │ ├── README.md │ ├── exps │ │ ├── default │ │ │ ├── __init__.py │ │ │ ├── yolov3.py │ │ │ ├── yolox_l.py │ │ │ ├── yolox_m.py │ │ │ ├── yolox_nano.py │ │ │ ├── yolox_s.py │ │ │ ├── yolox_tiny.py │ │ │ └── yolox_x.py │ │ └── example │ │ │ ├── custom │ │ │ ├── nano.py │ │ │ └── yolox_s.py │ │ │ └── yolox_voc │ │ │ └── yolox_voc_s.py │ ├── tools │ │ ├── __init__.py │ │ └── demo.py │ ├── utils │ │ ├── __init__.py │ │ └── preprocess.py │ └── yolox │ │ ├── __init__.py │ │ ├── exp │ │ ├── __init__.py │ │ ├── base_exp.py │ │ ├── build.py │ │ ├── default │ │ │ └── __init__.py │ │ └── yolox_base.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── cocoeval │ │ │ ├── cocoeval.cpp │ │ │ └── cocoeval.h │ │ ├── fast_coco_eval_api.py │ │ └── jit_ops.py │ │ ├── models │ │ ├── __init__.py │ │ ├── build.py │ │ ├── darknet.py │ │ ├── losses.py │ │ ├── network_blocks.py │ │ ├── yolo_fpn.py │ │ ├── yolo_head.py │ │ ├── yolo_pafpn.py │ │ └── yolox.py │ │ ├── tools │ │ └── __init__.py │ │ └── utils │ │ ├── __init__.py │ │ ├── boxes.py │ │ └── compat.py ├── yolox_api.py └── yolox_cfg.py ├── docs ├── CrowdPose.md ├── GETTING_STARTED.md ├── INSTALL.md ├── MODEL_ZOO.md ├── alphapose_136.gif ├── alphapose_17.gif ├── alphapose_26.gif ├── alphapose_hybrik_smpl.gif ├── contributors.md ├── crowdpose.gif ├── faq.md ├── logo.jpg ├── logs │ ├── fast_421_res152_256x192.log │ ├── fast_421_res50-shuffle_256x192.log │ ├── fast_dcn_res50_256x192.log │ ├── fast_res50_256x192.log │ ├── hrnet_w32_256x192.log │ └── simple_res50_256x192.log ├── output.md ├── pose.gif ├── posetrack.gif ├── posetrack2.gif ├── run.md ├── speed_up.md ├── step1.jpg ├── step2.jpg ├── step3.jpg ├── step4.jpg └── win_install.md ├── examples ├── demo │ ├── 1.jpg │ ├── 2.jpg │ └── 3.jpg ├── list-coco-demo.txt ├── list-coco-minival500.txt └── list-coco-val5000.txt ├── model_files ├── J_regressor_h36m.npy ├── h36m_mean_beta.npy └── smpl_faces.npy ├── pretrained_models └── get_models.sh ├── scripts ├── demo_3d_inference.py ├── demo_api.py ├── demo_inference.py ├── inference.sh ├── inference_3d.sh ├── train.py ├── train.sh ├── validate.py └── validate.sh ├── setup.cfg ├── setup.py └── trackers ├── PoseFlow ├── README.md ├── alpha-pose-results-sample.json ├── matching.py ├── parallel_process.py ├── poseflow_infer.py ├── posetrack1.gif ├── posetrack2.gif ├── posetrack_data ├── poseval ├── requirements.txt ├── tracker-baseline.py ├── tracker-general.py └── utils.py ├── README.md ├── ReidModels ├── ResBnLin.py ├── ResNet.py ├── __init__.py ├── backbone │ ├── __init__.py │ ├── googlenet.py │ ├── lrn.py │ └── sqeezenet.py ├── bn_linear.py ├── classification │ ├── __init__.py │ ├── classifier.py │ └── rfcn_cls.py ├── net_utils.py ├── osnet.py ├── osnet_ain.py ├── psroi_pooling │ ├── __init__.py │ ├── _ext │ │ ├── __init__.py │ │ └── psroi_pooling │ │ │ └── __init__.py │ ├── build.py │ ├── functions │ │ ├── __init__.py │ │ └── psroi_pooling.py │ ├── make.sh │ ├── modules │ │ ├── __init__.py │ │ └── psroi_pool.py │ └── src │ │ ├── cuda │ │ ├── psroi_pooling_kernel.cu │ │ └── psroi_pooling_kernel.h │ │ ├── psroi_pooling_cuda.c │ │ └── psroi_pooling_cuda.h ├── reid │ ├── __init__.py │ └── image_part_aligned.py └── resnet_fc.py ├── __init__.py ├── tracker_api.py ├── tracker_cfg.py ├── tracking ├── README.md ├── __init__.py ├── basetrack.py ├── matching.py └── utils │ ├── __init__.py │ ├── io.py │ ├── kalman_filter.py │ ├── nms.py │ ├── parse_config.py │ ├── timer.py │ └── utils.py └── utils ├── basetransforms.py ├── bbox.py ├── io.py ├── kalman_filter.py ├── log.py ├── parse_config.py ├── timer.py ├── transform.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | local_settings.py 56 | db.sqlite3 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # Environments 84 | .env 85 | .venv 86 | env/ 87 | venv/ 88 | ENV/ 89 | env.bak/ 90 | venv.bak/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | .spyproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | 99 | # mkdocs documentation 100 | /site 101 | 102 | # mypy 103 | .mypy_cache/ 104 | .vscode 105 | .tensorboard 106 | exp/coco* 107 | *.pth 108 | *.weights 109 | exp/json/test_kpt.json 110 | exp/json/test_gt_kpt.json 111 | exp/json/validate_rcnn_kpt.json 112 | exp/json/validate_gt_kpt.json 113 | data/ 114 | tmp/ 115 | exp/json 116 | tmp_*/ 117 | examples/res*/ 118 | data/ 119 | exp/ 120 | data 121 | -------------------------------------------------------------------------------- /alphapose/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /alphapose/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_det import Mscoco_det 2 | from .concat_dataset import ConcatDataset 3 | from .custom import CustomDataset 4 | from .mscoco import Mscoco 5 | from .mpii import Mpii 6 | from .coco_wholebody import coco_wholebody 7 | from .coco_wholebody_det import coco_wholebody_det 8 | from .halpe_26 import Halpe_26 9 | from .halpe_136 import Halpe_136 10 | from .halpe_136_det import Halpe_136_det 11 | from .halpe_26_det import Halpe_26_det 12 | from .halpe_coco_wholebody_26 import Halpe_coco_wholebody_26 13 | from .halpe_coco_wholebody_26_det import Halpe_coco_wholebody_26_det 14 | from .halpe_coco_wholebody_136 import Halpe_coco_wholebody_136 15 | from .halpe_coco_wholebody_136_det import Halpe_coco_wholebody_136_det 16 | from .halpe_68_noface import Halpe_68_noface 17 | from .halpe_68_noface_det import Halpe_68_noface_det 18 | from .single_hand import SingleHand 19 | from .single_hand_det import SingleHand_det 20 | 21 | __all__ = ['CustomDataset', 'ConcatDataset', 'Mpii', 'Mscoco', 'Mscoco_det', \ 22 | 'Halpe_26', 'Halpe_26_det', 'Halpe_136', 'Halpe_136_det', \ 23 | 'Halpe_coco_wholebody_26', 'Halpe_coco_wholebody_26_det', \ 24 | 'Halpe_coco_wholebody_136', 'Halpe_coco_wholebody_136_det', \ 25 | 'Halpe_68_noface', 'Halpe_68_noface_det', 'SingleHand', 'SingleHand_det', \ 26 | 'coco_wholebody', 'coco_wholebody_det'] 27 | -------------------------------------------------------------------------------- /alphapose/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import bisect 7 | 8 | import torch 9 | import torch.utils.data as data 10 | 11 | from alphapose.models.builder import DATASET, build_dataset 12 | 13 | 14 | @DATASET.register_module 15 | class ConcatDataset(data.Dataset): 16 | """Custom Concat dataset. 17 | Annotation file must be in `coco` format. 18 | 19 | Parameters 20 | ---------- 21 | train: bool, default is True 22 | If true, will set as training mode. 23 | dpg: bool, default is False 24 | If true, will activate `dpg` for data augmentation. 25 | skip_empty: bool, default is False 26 | Whether skip entire image if no valid label is found. 27 | cfg: dict, dataset configuration. 28 | """ 29 | 30 | def __init__(self, 31 | train=True, 32 | dpg=False, 33 | skip_empty=True, 34 | **cfg): 35 | 36 | self._cfg = cfg 37 | self._subset_cfg_list = cfg['SET_LIST'] 38 | self._preset_cfg = cfg['PRESET'] 39 | self._mask_id = [item['MASK_ID'] for item in self._subset_cfg_list] 40 | 41 | self.num_joints = self._preset_cfg['NUM_JOINTS'] 42 | 43 | self._subsets = [] 44 | self._subset_size = [0] 45 | for _subset_cfg in self._subset_cfg_list: 46 | subset = build_dataset(_subset_cfg, preset_cfg=self._preset_cfg, train=train) 47 | self._subsets.append(subset) 48 | self._subset_size.append(len(subset)) 49 | self.cumulative_sizes = self.cumsum(self._subset_size) 50 | 51 | def __getitem__(self, idx): 52 | assert idx >= 0 53 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 54 | dataset_idx -= 1 55 | sample_idx = idx - self.cumulative_sizes[dataset_idx] 56 | 57 | sample = self._subsets[dataset_idx][sample_idx] 58 | img, label, label_mask, img_id, bbox = sample 59 | 60 | K = label.shape[0] # num_joints from `_subsets[dataset_idx]` 61 | expend_label = torch.zeros((self.num_joints, *label.shape[1:]), dtype=label.dtype) 62 | expend_label_mask = torch.zeros((self.num_joints, *label_mask.shape[1:]), dtype=label_mask.dtype) 63 | expend_label[self._mask_id[dataset_idx]:self._mask_id[dataset_idx] + K] = label 64 | expend_label_mask[self._mask_id[dataset_idx]:self._mask_id[dataset_idx] + K] = label_mask 65 | 66 | return img, expend_label, expend_label_mask, img_id, bbox 67 | 68 | def __len__(self): 69 | return self.cumulative_sizes[-1] 70 | 71 | @staticmethod 72 | def cumsum(sequence): 73 | r, s = [], 0 74 | for e in sequence: 75 | r.append(e + s) 76 | s += e 77 | return r 78 | -------------------------------------------------------------------------------- /alphapose/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .fastpose import FastPose 2 | from .fastpose_duc import FastPose_DUC 3 | from .hrnet import PoseHighResolutionNet 4 | from .simplepose import SimplePose 5 | from .fastpose_duc_dense import FastPose_DUC_Dense 6 | from .hardnet import HarDNetPose 7 | from .simple3dposeSMPLWithCam import Simple3DPoseBaseSMPLCam 8 | from .criterion import L1JointRegression 9 | 10 | __all__ = ['FastPose', 'SimplePose', 'PoseHighResolutionNet', 11 | 'FastPose_DUC', 'FastPose_DUC_Dense', 'HarDNetPose', 12 | 'Simple3DPoseBaseSMPLCam', 13 | 'L1JointRegression'] 14 | -------------------------------------------------------------------------------- /alphapose/models/builder.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from alphapose.utils import Registry, build_from_cfg, retrieve_from_cfg 4 | 5 | 6 | SPPE = Registry('sppe') 7 | LOSS = Registry('loss') 8 | DATASET = Registry('dataset') 9 | 10 | 11 | def build(cfg, registry, default_args=None): 12 | if isinstance(cfg, list): 13 | modules = [ 14 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg 15 | ] 16 | return nn.Sequential(*modules) 17 | else: 18 | return build_from_cfg(cfg, registry, default_args) 19 | 20 | 21 | def build_sppe(cfg, preset_cfg, **kwargs): 22 | default_args = { 23 | 'PRESET': preset_cfg, 24 | } 25 | for key, value in kwargs.items(): 26 | default_args[key] = value 27 | return build(cfg, SPPE, default_args=default_args) 28 | 29 | 30 | def build_loss(cfg): 31 | return build(cfg, LOSS) 32 | 33 | 34 | def build_dataset(cfg, preset_cfg, **kwargs): 35 | exec(f'from ..datasets import {cfg.TYPE}') 36 | default_args = { 37 | 'PRESET': preset_cfg, 38 | } 39 | for key, value in kwargs.items(): 40 | default_args[key] = value 41 | return build(cfg, DATASET, default_args=default_args) 42 | 43 | 44 | def retrieve_dataset(cfg): 45 | exec(f'from ..datasets import {cfg.TYPE}') 46 | return retrieve_from_cfg(cfg, DATASET) 47 | -------------------------------------------------------------------------------- /alphapose/models/fastpose.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch.nn as nn 7 | 8 | from .builder import SPPE 9 | from .layers.DUC import DUC 10 | from .layers.SE_Resnet import SEResnet 11 | 12 | 13 | @SPPE.register_module 14 | class FastPose(nn.Module): 15 | 16 | def __init__(self, norm_layer=nn.BatchNorm2d, **cfg): 17 | super(FastPose, self).__init__() 18 | self._preset_cfg = cfg['PRESET'] 19 | if 'CONV_DIM' in cfg.keys(): 20 | self.conv_dim = cfg['CONV_DIM'] 21 | else: 22 | self.conv_dim = 128 23 | if 'DCN' in cfg.keys(): 24 | stage_with_dcn = cfg['STAGE_WITH_DCN'] 25 | dcn = cfg['DCN'] 26 | self.preact = SEResnet( 27 | f"resnet{cfg['NUM_LAYERS']}", dcn=dcn, stage_with_dcn=stage_with_dcn) 28 | else: 29 | self.preact = SEResnet(f"resnet{cfg['NUM_LAYERS']}") 30 | 31 | # Imagenet pretrain model 32 | import torchvision.models as tm # noqa: F401,F403 33 | assert cfg['NUM_LAYERS'] in [18, 34, 50, 101, 152] 34 | x = eval(f"tm.resnet{cfg['NUM_LAYERS']}(pretrained=True)") 35 | 36 | model_state = self.preact.state_dict() 37 | state = {k: v for k, v in x.state_dict().items() 38 | if k in self.preact.state_dict() and v.size() == self.preact.state_dict()[k].size()} 39 | model_state.update(state) 40 | self.preact.load_state_dict(model_state) 41 | 42 | self.suffle1 = nn.PixelShuffle(2) 43 | self.duc1 = DUC(512, 1024, upscale_factor=2, norm_layer=norm_layer) 44 | if self.conv_dim == 256: 45 | self.duc2 = DUC(256, 1024, upscale_factor=2, norm_layer=norm_layer) 46 | else: 47 | self.duc2 = DUC(256, 512, upscale_factor=2, norm_layer=norm_layer) 48 | self.conv_out = nn.Conv2d( 49 | self.conv_dim, self._preset_cfg['NUM_JOINTS'], kernel_size=3, stride=1, padding=1) 50 | 51 | def forward(self, x): 52 | out = self.preact(x) 53 | out = self.suffle1(out) 54 | out = self.duc1(out) 55 | out = self.duc2(out) 56 | 57 | out = self.conv_out(out) 58 | return out 59 | 60 | def _initialize(self): 61 | for m in self.conv_out.modules(): 62 | if isinstance(m, nn.Conv2d): 63 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 64 | # logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 65 | # logger.info('=> init {}.bias as 0'.format(name)) 66 | nn.init.normal_(m.weight, std=0.001) 67 | nn.init.constant_(m.bias, 0) 68 | -------------------------------------------------------------------------------- /alphapose/models/layers/DUC.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch.nn as nn 7 | 8 | 9 | class DUC(nn.Module): 10 | ''' 11 | Initialize: inplanes, planes, upscale_factor 12 | OUTPUT: (planes // upscale_factor^2) * ht * wd 13 | ''' 14 | 15 | def __init__(self, inplanes, planes, 16 | upscale_factor=2, norm_layer=nn.BatchNorm2d): 17 | super(DUC, self).__init__() 18 | self.conv = nn.Conv2d( 19 | inplanes, planes, kernel_size=3, padding=1, bias=False) 20 | self.bn = norm_layer(planes, momentum=0.1) 21 | self.relu = nn.ReLU(inplace=True) 22 | self.pixel_shuffle = nn.PixelShuffle(upscale_factor) 23 | 24 | def forward(self, x): 25 | x = self.conv(x) 26 | x = self.bn(x) 27 | x = self.relu(x) 28 | x = self.pixel_shuffle(x) 29 | return x 30 | -------------------------------------------------------------------------------- /alphapose/models/layers/PixelUnshuffle.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch.nn as nn 7 | 8 | 9 | class PixelUnshuffle(nn.Module): 10 | ''' 11 | Initialize: inplanes, planes, upscale_factor 12 | OUTPUT: (planes // upscale_factor^2) * ht * wd 13 | ''' 14 | 15 | def __init__(self, downscale_factor=2): 16 | super(PixelUnshuffle, self).__init__() 17 | self._r = downscale_factor 18 | 19 | def forward(self, x): 20 | b, c, h, w = x.shape 21 | out_c = c * (self._r * self._r) 22 | out_h = h // self._r 23 | out_w = w // self._r 24 | 25 | x_view = x.contiguous().view(b, c, out_h, self._r, out_w, self._r) 26 | x_prime = x_view.permute(0, 1, 3, 5, 2, 4).contiguous().view(b, out_c, out_h, out_w) 27 | 28 | return x_prime 29 | -------------------------------------------------------------------------------- /alphapose/models/layers/SE_module.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | from torch import nn 7 | 8 | 9 | class SELayer(nn.Module): 10 | def __init__(self, channel, reduction=1): 11 | super(SELayer, self).__init__() 12 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 13 | self.fc = nn.Sequential( 14 | nn.Linear(channel, channel // reduction), 15 | nn.ReLU(inplace=True), 16 | nn.Linear(channel // reduction, channel), 17 | nn.Sigmoid() 18 | ) 19 | 20 | def forward(self, x): 21 | b, c, _, _ = x.size() 22 | y = self.avg_pool(x).view(b, c) 23 | y = self.fc(y).view(b, c, 1, 1) 24 | return x * y 25 | -------------------------------------------------------------------------------- /alphapose/models/layers/dcn/DCN.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch.nn as nn 7 | 8 | from . import DeformConv, ModulatedDeformConv 9 | 10 | 11 | class DCN(nn.Module): 12 | ''' 13 | Initialize: inplanes, planes, upscale_factor 14 | OUTPUT: (planes // upscale_factor^2) * ht * wd 15 | ''' 16 | 17 | def __init__(self, inplanes, planes, dcn, 18 | kernel_size, stride=1, 19 | padding=0, bias=False): 20 | super(DCN, self).__init__() 21 | fallback_on_stride = dcn.get('FALLBACK_ON_STRIDE', False) 22 | self.with_modulated_dcn = dcn.get('MODULATED', False) 23 | if fallback_on_stride: 24 | self.conv = nn.Conv2d(inplanes, planes, kernel_size=kernel_size, stride=stride, 25 | padding=padding, bias=bias) 26 | else: 27 | self.deformable_groups = dcn.get('DEFORM_GROUP', 1) 28 | if not self.with_modulated_dcn: 29 | conv_op = DeformConv 30 | offset_channels = 18 31 | else: 32 | conv_op = ModulatedDeformConv 33 | offset_channels = 27 34 | 35 | self.conv_offset = nn.Conv2d( 36 | inplanes, 37 | self.deformable_groups * offset_channels, 38 | kernel_size=kernel_size, 39 | stride=stride, 40 | padding=padding) 41 | self.conv = conv_op( 42 | inplanes, 43 | planes, 44 | kernel_size=kernel_size, 45 | stride=stride, 46 | padding=padding, 47 | deformable_groups=self.deformable_groups, 48 | bias=bias) 49 | 50 | def forward(self, x): 51 | if self.with_modulated_dcn: 52 | offset_mask = self.conv_offset(x) 53 | offset = offset_mask[:, :18 * self.deformable_groups, :, :] 54 | mask = offset_mask[:, -9 * self.deformable_groups:, :, :] 55 | mask = mask.sigmoid() 56 | out = self.conv(x, offset, mask) 57 | else: 58 | offset = self.conv_offset(x) 59 | out = self.conv(x, offset) 60 | 61 | return out 62 | -------------------------------------------------------------------------------- /alphapose/models/layers/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, deform_conv, 3 | modulated_deform_conv) 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 5 | ModulatedDeformRoIPoolingPack, deform_roi_pooling) 6 | from .DCN import DCN 7 | 8 | __all__ = [ 9 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 10 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 11 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 12 | 'deform_roi_pooling', 'DCN' 13 | ] 14 | -------------------------------------------------------------------------------- /alphapose/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .registry import Registry, build_from_cfg, retrieve_from_cfg 2 | 3 | __all__ = [ 4 | 'Registry', 'build_from_cfg', 'retrieve_from_cfg' 5 | ] 6 | -------------------------------------------------------------------------------- /alphapose/utils/config.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from easydict import EasyDict as edict 3 | 4 | 5 | def update_config(config_file): 6 | with open(config_file) as f: 7 | config = edict(yaml.load(f, Loader=yaml.FullLoader)) 8 | return config 9 | -------------------------------------------------------------------------------- /alphapose/utils/env.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import os 7 | import torch 8 | import torch.distributed as dist 9 | 10 | 11 | def init_dist(opt): 12 | """Initialize distributed computing environment.""" 13 | opt.ngpus_per_node = torch.cuda.device_count() 14 | 15 | torch.cuda.set_device(opt.gpu) 16 | 17 | if opt.launcher == 'pytorch': 18 | _init_dist_pytorch(opt) 19 | elif opt.launcher == 'mpi': 20 | _init_dist_mpi(opt) 21 | elif opt.launcher == 'slurm': 22 | _init_dist_slurm(opt) 23 | else: 24 | raise ValueError('Invalid launcher type: {}'.format(opt.launcher)) 25 | 26 | 27 | def _init_dist_pytorch(opt, **kwargs): 28 | """Set up environment.""" 29 | # TODO: use local_rank instead of rank % num_gpus 30 | opt.rank = opt.rank * opt.ngpus_per_node + opt.gpu 31 | opt.world_size = opt.world_size 32 | dist.init_process_group(backend=opt.dist_backend, init_method=opt.dist_url, 33 | world_size=opt.world_size, rank=opt.rank) 34 | print(f"{opt.dist_url}, ws:{opt.world_size}, rank:{opt.rank}") 35 | 36 | if opt.rank % opt.ngpus_per_node == 0: 37 | opt.log = True 38 | else: 39 | opt.log = False 40 | 41 | 42 | def _init_dist_slurm(opt, port=23348, **kwargs): 43 | """Set up slurm environment.""" 44 | proc_id = int(os.environ['SLURM_PROCID']) 45 | ntasks = int(os.environ['SLURM_NTASKS']) 46 | node_list = os.environ['SLURM_NODELIST'] 47 | num_gpus = torch.cuda.device_count() 48 | torch.cuda.set_device(proc_id % num_gpus) 49 | if '[' in node_list: 50 | beg = node_list.find('[') 51 | pos1 = node_list.find('-', beg) 52 | if pos1 < 0: 53 | pos1 = 1000 54 | pos2 = node_list.find(',', beg) 55 | if pos2 < 0: 56 | pos2 = 1000 57 | node_list = node_list[:min(pos1, pos2)].replace('[', '') 58 | addr = node_list[8:].replace('-', '.') 59 | os.environ['MASTER_PORT'] = str(port) 60 | os.environ['MASTER_ADDR'] = addr 61 | os.environ['WORLD_SIZE'] = str(ntasks) 62 | os.environ['RANK'] = str(proc_id) 63 | 64 | opt.ngpus_per_node = num_gpus 65 | opt.rank = int(proc_id) 66 | opt.rank = proc_id * num_gpus + opt.gpu 67 | opt.world_size = int(ntasks) * num_gpus 68 | 69 | print(f"tcp://{node_list}:{port}, ws:{opt.world_size}, rank:{opt.rank}, proc_id:{proc_id}") 70 | dist.init_process_group(backend=opt.dist_backend, 71 | init_method=f'tcp://{node_list}:{port}', 72 | world_size=opt.world_size, 73 | rank=opt.rank) 74 | if opt.rank == 0: 75 | opt.log = True 76 | else: 77 | opt.log = False 78 | 79 | 80 | def _init_dist_mpi(backend, **kwargs): 81 | raise NotImplementedError 82 | -------------------------------------------------------------------------------- /alphapose/utils/logger.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch 7 | import torch.nn.functional as F 8 | 9 | 10 | def board_writing(writer, loss, acc, iterations, dataset='Train'): 11 | writer.add_scalar( 12 | '{}/Loss'.format(dataset), loss, iterations) 13 | writer.add_scalar( 14 | '{}/acc'.format(dataset), acc, iterations) 15 | 16 | 17 | def debug_writing(writer, outputs, labels, inputs, iterations): 18 | tmp_tar = torch.unsqueeze(labels.cpu().data[0], dim=1) 19 | # tmp_out = torch.unsqueeze(outputs.cpu().data[0], dim=1) 20 | 21 | tmp_inp = inputs.cpu().data[0] 22 | tmp_inp[0] += 0.406 23 | tmp_inp[1] += 0.457 24 | tmp_inp[2] += 0.480 25 | 26 | tmp_inp[0] += torch.sum(F.interpolate(tmp_tar, scale_factor=4, mode='bilinear'), dim=0)[0] 27 | tmp_inp.clamp_(0, 1) 28 | 29 | writer.add_image('Data/input', tmp_inp, iterations) 30 | -------------------------------------------------------------------------------- /alphapose/utils/presets/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple_transform import SimpleTransform 2 | from .simple_transform_3d_smpl import SimpleTransform3DSMPL 3 | 4 | __all__ = ['SimpleTransform', 'SimpleTransform3DSMPL'] 5 | -------------------------------------------------------------------------------- /alphapose/utils/registry.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | 4 | class Registry(object): 5 | 6 | def __init__(self, name): 7 | self._name = name 8 | self._module_dict = dict() 9 | 10 | def __repr__(self): 11 | format_str = self.__class__.__name__ + '(name={}, items={})'.format( 12 | self._name, list(self._module_dict.keys())) 13 | return format_str 14 | 15 | @property 16 | def name(self): 17 | return self._name 18 | 19 | @property 20 | def module_dict(self): 21 | return self._module_dict 22 | 23 | def get(self, key): 24 | return self._module_dict.get(key, None) 25 | 26 | def _register_module(self, module_class): 27 | """Register a module. 28 | 29 | Args: 30 | module (:obj:`nn.Module`): Module to be registered. 31 | """ 32 | if not inspect.isclass(module_class): 33 | raise TypeError('module must be a class, but got {}'.format( 34 | type(module_class))) 35 | module_name = module_class.__name__ 36 | if module_name in self._module_dict: 37 | raise KeyError('{} is already registered in {}'.format( 38 | module_name, self.name)) 39 | self._module_dict[module_name] = module_class 40 | 41 | def register_module(self, cls): 42 | self._register_module(cls) 43 | return cls 44 | 45 | 46 | def build_from_cfg(cfg, registry, default_args=None): 47 | """Build a module from config dict. 48 | 49 | Args: 50 | cfg (dict): Config dict. It should at least contain the key "type". 51 | registry (:obj:`Registry`): The registry to search the type from. 52 | default_args (dict, optional): Default initialization arguments. 53 | 54 | Returns: 55 | obj: The constructed object. 56 | """ 57 | assert isinstance(cfg, dict) and 'TYPE' in cfg 58 | assert isinstance(default_args, dict) or default_args is None 59 | args = cfg.copy() 60 | obj_type = args.pop('TYPE') 61 | 62 | if isinstance(obj_type, str): 63 | obj_cls = registry.get(obj_type) 64 | if obj_cls is None: 65 | raise KeyError('{} is not in the {} registry'.format( 66 | obj_type, registry.name)) 67 | elif inspect.isclass(obj_type): 68 | obj_cls = obj_type 69 | else: 70 | raise TypeError('type must be a str or valid type, but got {}'.format( 71 | type(obj_type))) 72 | if default_args is not None: 73 | for name, value in default_args.items(): 74 | args.setdefault(name, value) 75 | return obj_cls(**args) 76 | 77 | 78 | def retrieve_from_cfg(cfg, registry): 79 | """Retrieve a module class from config dict. 80 | 81 | Args: 82 | cfg (dict): Config dict. It should at least contain the key "type". 83 | registry (:obj:`Registry`): The registry to search the type from. 84 | 85 | Returns: 86 | class: The class. 87 | """ 88 | assert isinstance(cfg, dict) and 'TYPE' in cfg 89 | args = cfg.copy() 90 | obj_type = args.pop('TYPE') 91 | 92 | if isinstance(obj_type, str): 93 | obj_cls = registry.get(obj_type) 94 | if obj_cls is None: 95 | raise KeyError('{} is not in the {} registry'.format( 96 | obj_type, registry.name)) 97 | elif inspect.isclass(obj_type): 98 | obj_cls = obj_type 99 | else: 100 | raise TypeError('type must be a str or valid type, but got {}'.format( 101 | type(obj_type))) 102 | 103 | return obj_cls 104 | -------------------------------------------------------------------------------- /alphapose/utils/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_align import roi_align, RoIAlign 2 | 3 | __all__ = ['roi_align', 'RoIAlign'] 4 | -------------------------------------------------------------------------------- /alphapose/utils/roi_align/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | from torch.nn.modules.utils import _pair 5 | 6 | from . import roi_align_cuda 7 | 8 | 9 | class RoIAlignFunction(Function): 10 | 11 | @staticmethod 12 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 13 | out_h, out_w = _pair(out_size) 14 | assert isinstance(out_h, int) and isinstance(out_w, int) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sample_num = sample_num 17 | ctx.save_for_backward(rois) 18 | ctx.feature_size = features.size() 19 | 20 | batch_size, num_channels, data_height, data_width = features.size() 21 | num_rois = rois.size(0) 22 | 23 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 24 | if features.is_cuda: 25 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 26 | sample_num, output) 27 | else: 28 | raise NotImplementedError 29 | 30 | return output 31 | 32 | @staticmethod 33 | @once_differentiable 34 | def backward(ctx, grad_output): 35 | feature_size = ctx.feature_size 36 | spatial_scale = ctx.spatial_scale 37 | sample_num = ctx.sample_num 38 | rois = ctx.saved_tensors[0] 39 | assert (feature_size is not None and grad_output.is_cuda) 40 | 41 | batch_size, num_channels, data_height, data_width = feature_size 42 | out_w = grad_output.size(3) 43 | out_h = grad_output.size(2) 44 | 45 | grad_input = grad_rois = None 46 | if ctx.needs_input_grad[0]: 47 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 48 | data_width) 49 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 50 | out_w, spatial_scale, sample_num, 51 | grad_input) 52 | 53 | return grad_input, grad_rois, None, None, None 54 | 55 | 56 | roi_align = RoIAlignFunction.apply 57 | 58 | 59 | class RoIAlign(nn.Module): 60 | 61 | def __init__(self, 62 | out_size, 63 | spatial_scale=1, 64 | sample_num=0, 65 | use_torchvision=False): 66 | super(RoIAlign, self).__init__() 67 | 68 | self.out_size = out_size 69 | self.spatial_scale = float(spatial_scale) 70 | self.sample_num = int(sample_num) 71 | self.use_torchvision = use_torchvision 72 | 73 | def forward(self, features, rois): 74 | if self.use_torchvision: 75 | from torchvision.ops import roi_align as tv_roi_align 76 | return tv_roi_align(features, rois, _pair(self.out_size), 77 | self.spatial_scale, self.sample_num) 78 | else: 79 | return roi_align(features, rois, self.out_size, self.spatial_scale, 80 | self.sample_num) 81 | 82 | def __repr__(self): 83 | format_str = self.__class__.__name__ 84 | format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format( 85 | self.out_size, self.spatial_scale, self.sample_num) 86 | format_str += ', use_torchvision={})'.format(self.use_torchvision) 87 | return format_str 88 | -------------------------------------------------------------------------------- /alphapose/utils/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int sample_num, 8 | const int channels, const int height, 9 | const int width, const int num_rois, 10 | const int pooled_height, const int pooled_width, 11 | at::Tensor output); 12 | 13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 14 | const float spatial_scale, const int sample_num, 15 | const int channels, const int height, 16 | const int width, const int num_rois, 17 | const int pooled_height, const int pooled_width, 18 | at::Tensor bottom_grad); 19 | 20 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 28 | int pooled_height, int pooled_width, 29 | float spatial_scale, int sample_num, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(rois); 33 | CHECK_INPUT(output); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int num_channels = features.size(1); 45 | int data_height = features.size(2); 46 | int data_width = features.size(3); 47 | 48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 49 | num_channels, data_height, data_width, num_rois, 50 | pooled_height, pooled_width, output); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | int pooled_height, int pooled_width, 57 | float spatial_scale, int sample_num, 58 | at::Tensor bottom_grad) { 59 | CHECK_INPUT(top_grad); 60 | CHECK_INPUT(rois); 61 | CHECK_INPUT(bottom_grad); 62 | 63 | // Number of ROIs 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | if (size_rois != 5) { 67 | printf("wrong roi size\n"); 68 | return 0; 69 | } 70 | 71 | int num_channels = bottom_grad.size(1); 72 | int data_height = bottom_grad.size(2); 73 | int data_width = bottom_grad.size(3); 74 | 75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 76 | num_channels, data_height, data_width, num_rois, 77 | pooled_height, pooled_width, bottom_grad); 78 | 79 | return 1; 80 | } 81 | 82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 85 | } 86 | -------------------------------------------------------------------------------- /alphapose/version.py: -------------------------------------------------------------------------------- 1 | # GENERATED VERSION FILE 2 | # TIME: Thu Jul 21 17:10:51 2022 3 | 4 | __version__ = '0.6.0+29ace8c' 5 | short_version = '0.6.0' 6 | -------------------------------------------------------------------------------- /configs/coco/hardnet/256x192_hard68_lr1e-3_1x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'HarDNetPose' 36 | INIT_WEIGHTS: '' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | FINAL_CONV_KERNEL: 1 40 | NUM_LAYERS: 68 41 | DOWN_RATIO: 4 42 | TRT: False 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | DPG_MILESTONE: 140 63 | DPG_STEP: 64 | - 160 65 | - 190 66 | -------------------------------------------------------------------------------- /configs/coco/hardnet/256x192_hard85_lr1e-3_1x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'HarDNetPose' 36 | INIT_WEIGHTS: '' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | FINAL_CONV_KERNEL: 1 40 | NUM_LAYERS: 85 41 | DOWN_RATIO: 4 42 | TRT: False 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | DPG_MILESTONE: 140 63 | DPG_STEP: 64 | - 160 65 | - 190 66 | -------------------------------------------------------------------------------- /configs/coco/hrnet/256x192_w32_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'PoseHighResolutionNet' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_LAYERS: 50 39 | FINAL_CONV_KERNEL: 1 40 | PRETRAINED_LAYERS: ['*'] 41 | STAGE2: 42 | NUM_MODULES: 1 43 | NUM_BRANCHES: 2 44 | NUM_BLOCKS: [4, 4] 45 | NUM_CHANNELS: [32, 64] 46 | BLOCK: 'BASIC' 47 | FUSE_METHOD: 'SUM' 48 | STAGE3: 49 | NUM_MODULES: 4 50 | NUM_BRANCHES: 3 51 | NUM_BLOCKS: [4, 4, 4] 52 | NUM_CHANNELS: [32, 64, 128] 53 | BLOCK: 'BASIC' 54 | FUSE_METHOD: 'SUM' 55 | STAGE4: 56 | NUM_MODULES: 3 57 | NUM_BRANCHES: 4 58 | NUM_BLOCKS: [4, 4, 4, 4] 59 | NUM_CHANNELS: [32, 64, 128, 256] 60 | BLOCK: 'BASIC' 61 | FUSE_METHOD: 'SUM' 62 | LOSS: 63 | TYPE: 'MSELoss' 64 | DETECTOR: 65 | NAME: 'yolo' 66 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 67 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 68 | NMS_THRES: 0.6 69 | CONFIDENCE: 0.05 70 | TRAIN: 71 | WORLD_SIZE: 4 72 | BATCH_SIZE: 32 73 | BEGIN_EPOCH: 0 74 | END_EPOCH: 270 75 | OPTIMIZER: 'adam' 76 | LR: 0.001 77 | LR_FACTOR: 0.1 78 | LR_STEP: 79 | - 170 80 | - 200 81 | DPG_MILESTONE: 210 82 | DPG_STEP: 83 | - 230 84 | - 250 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res152_lr1e-3_1x-duc.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose_DUC' 36 | BACKBONE: 'se-resnet' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 152 44 | FINAL_CONV_KERNEL: 1 45 | STAGE1: 46 | NUM_CONV: 4 47 | STAGE2: 48 | NUM_CONV: 2 49 | STAGE3: 50 | NUM_CONV: 1 51 | LOSS: 52 | TYPE: 'MSELoss' 53 | DETECTOR: 54 | NAME: 'yolo' 55 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 56 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 57 | NMS_THRES: 0.6 58 | CONFIDENCE: 0.05 59 | TRAIN: 60 | WORLD_SIZE: 4 61 | BATCH_SIZE: 32 62 | BEGIN_EPOCH: 0 63 | END_EPOCH: 200 64 | OPTIMIZER: 'adam' 65 | LR: 0.001 66 | LR_FACTOR: 0.1 67 | LR_STEP: 68 | - 90 69 | - 120 70 | DPG_MILESTONE: 140 71 | DPG_STEP: 72 | - 160 73 | - 190 74 | -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_1x-concat.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'ConcatDataset' 4 | SET_LIST: 5 | - TYPE: 'Mscoco' 6 | MASK_ID: 0 7 | ROOT: './data/coco/' 8 | IMG_PREFIX: 'train2017' 9 | ANN: 'annotations/person_keypoints_train2017.json' 10 | AUG: 11 | FLIP: true 12 | ROT_FACTOR: 40 13 | SCALE_FACTOR: 0.3 14 | NUM_JOINTS_HALF_BODY: 8 15 | PROB_HALF_BODY: -1 16 | - TYPE: 'Mpii' 17 | MASK_ID: 17 18 | ROOT: './data/mpii/' 19 | IMG_PREFIX: 'images' 20 | ANN: 'annot_mpii.json' 21 | AUG: 22 | FLIP: true 23 | ROT_FACTOR: 40 24 | SCALE_FACTOR: 0.3 25 | NUM_JOINTS_HALF_BODY: 8 26 | PROB_HALF_BODY: -1 27 | VAL: 28 | TYPE: 'Mscoco' 29 | ROOT: './data/coco/' 30 | IMG_PREFIX: 'val2017' 31 | ANN: 'annotations/person_keypoints_val2017.json' 32 | TEST: 33 | TYPE: 'Mscoco_det' 34 | ROOT: './data/coco/' 35 | IMG_PREFIX: 'val2017' 36 | DET_FILE: './exp/json/test_det_yolo.json' 37 | ANN: 'annotations/person_keypoints_val2017.json' 38 | DATA_PRESET: 39 | TYPE: 'simple' 40 | SIGMA: 2 41 | NUM_JOINTS: 33 42 | IMAGE_SIZE: 43 | - 256 44 | - 192 45 | HEATMAP_SIZE: 46 | - 64 47 | - 48 48 | MODEL: 49 | TYPE: 'FastPose' 50 | PRETRAINED: '' 51 | TRY_LOAD: '' 52 | NUM_DECONV_FILTERS: 53 | - 256 54 | - 256 55 | - 256 56 | NUM_LAYERS: 50 57 | LOSS: 58 | TYPE: 'MSELoss' 59 | DETECTOR: 60 | NAME: 'yolo' 61 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 62 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 63 | NMS_THRES: 0.6 64 | CONFIDENCE: 0.05 65 | TRAIN: 66 | WORLD_SIZE: 4 67 | BATCH_SIZE: 32 68 | BEGIN_EPOCH: 0 69 | END_EPOCH: 200 70 | OPTIMIZER: 'adam' 71 | LR: 0.001 72 | LR_FACTOR: 0.1 73 | LR_STEP: 74 | - 90 75 | - 120 76 | DPG_MILESTONE: 140 77 | DPG_STEP: 78 | - 160 79 | - 190 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_1x-duc.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose_DUC' 36 | BACKBONE: 'shuffle' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | FINAL_CONV_KERNEL: 1 45 | STAGE1: 46 | NUM_CONV: 4 47 | STAGE2: 48 | NUM_CONV: 2 49 | STAGE3: 50 | NUM_CONV: 1 51 | LOSS: 52 | TYPE: 'MSELoss' 53 | DETECTOR: 54 | NAME: 'yolo' 55 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 56 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 57 | NMS_THRES: 0.6 58 | CONFIDENCE: 0.05 59 | TRAIN: 60 | WORLD_SIZE: 4 61 | BATCH_SIZE: 32 62 | BEGIN_EPOCH: 0 63 | END_EPOCH: 200 64 | OPTIMIZER: 'adam' 65 | LR: 0.001 66 | LR_FACTOR: 0.1 67 | LR_STEP: 68 | - 90 69 | - 120 70 | DPG_MILESTONE: 140 71 | DPG_STEP: 72 | - 160 73 | - 190 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_1x-simple.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'SimplePose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.1 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | DPG_MILESTONE: 140 63 | DPG_STEP: 64 | - 160 65 | - 190 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_1x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | DPG_MILESTONE: 140 63 | DPG_STEP: 64 | - 160 65 | - 190 66 | -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_2x-dcn.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | DCN: 44 | MODULATED: false 45 | DEFORM_GROUP: 1 46 | FALLBACK_ON_STRIDE: false 47 | STAGE_WITH_DCN: 48 | - false 49 | - true 50 | - true 51 | - true 52 | LOSS: 53 | TYPE: 'MSELoss' 54 | DETECTOR: 55 | NAME: 'yolo' 56 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 57 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 58 | NMS_THRES: 0.6 59 | CONFIDENCE: 0.05 60 | TRAIN: 61 | WORLD_SIZE: 4 62 | BATCH_SIZE: 32 63 | BEGIN_EPOCH: 0 64 | END_EPOCH: 270 65 | OPTIMIZER: 'adam' 66 | LR: 0.001 67 | LR_FACTOR: 0.1 68 | LR_STEP: 69 | - 170 70 | - 200 71 | DPG_MILESTONE: 210 72 | DPG_STEP: 73 | - 230 74 | - 250 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_2x-regression.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'L1JointRegression' 27 | SIGMA: 2 28 | NUM_JOINTS: 17 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | LOSS: 45 | TYPE: 'L1JointRegression' 46 | NORM_TYPE: 'sigmoid' 47 | OUTPUT_3D: False 48 | DETECTOR: 49 | NAME: 'yolo' 50 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 51 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 52 | NMS_THRES: 0.6 53 | CONFIDENCE: 0.05 54 | TRAIN: 55 | WORLD_SIZE: 4 56 | BATCH_SIZE: 32 57 | BEGIN_EPOCH: 0 58 | END_EPOCH: 270 59 | OPTIMIZER: 'adam' 60 | LR: 0.001 61 | LR_FACTOR: 0.1 62 | LR_STEP: 63 | - 170 64 | - 200 65 | DPG_MILESTONE: 210 66 | DPG_STEP: 67 | - 230 68 | - 250 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_2x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 270 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 170 61 | - 200 62 | DPG_MILESTONE: 210 63 | DPG_STEP: 64 | - 230 65 | - 250 -------------------------------------------------------------------------------- /configs/coco_wholebody/resnet/256x192_res152_lr1e-3_1x-duc.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'coco_wholebody' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/coco_wholebody_train_v1.0.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'coco_wholebody' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/coco_wholebody_val_v1.0.json' 18 | TEST: 19 | TYPE: 'coco_wholebody_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/coco_wholebody_val_v1.0.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'L1JointRegression' 27 | SIGMA: 2 28 | NUM_JOINTS: 133 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | BACKBONE: 'se-resnet' 38 | PRETRAINED: '' 39 | TRY_LOAD: '' 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_LAYERS: 152 45 | CONV_DIM: 256 46 | FINAL_CONV_KERNEL: 1 47 | STAGE1: 48 | NUM_CONV: 4 49 | STAGE2: 50 | NUM_CONV: 2 51 | STAGE3: 52 | NUM_CONV: 1 53 | LOSS: 54 | TYPE: 'L1JointRegression' 55 | NORM_TYPE: 'sigmoid' 56 | OUTPUT_3D: False 57 | DETECTOR: 58 | NAME: 'yolo' 59 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 60 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 61 | NMS_THRES: 0.6 62 | CONFIDENCE: 0.05 63 | TRAIN: 64 | WORLD_SIZE: 4 65 | BATCH_SIZE: 32 66 | BEGIN_EPOCH: 0 67 | END_EPOCH: 270 68 | OPTIMIZER: 'adam' 69 | LR: 0.001 70 | LR_FACTOR: 0.1 71 | LR_STEP: 72 | - 170 73 | - 200 74 | DPG_MILESTONE: 210 75 | DPG_STEP: 76 | - 230 77 | - 250 78 | -------------------------------------------------------------------------------- /configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-combined.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'coco_wholebody' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/coco_wholebody_train_v1.0.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'coco_wholebody' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/coco_wholebody_val_v1.0.json' 18 | TEST: 19 | TYPE: 'coco_wholebody_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/coco_wholebody_val_v1.0.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'Combined' 27 | SIGMA: 2 28 | NUM_JOINTS: 133 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | CONV_DIM: 256 45 | LOSS: 46 | TYPE: 'Combined' 47 | NORM_TYPE: 'sigmoid' 48 | OUTPUT_3D: False 49 | LOSS_1: 50 | TYPE: 'MSELoss' 51 | LOSS_2: 52 | TYPE: 'L1JointRegression' 53 | NORM_TYPE: 'sigmoid' 54 | OUTPUT_3D: False 55 | DETECTOR: 56 | NAME: 'yolo' 57 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 58 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 59 | NMS_THRES: 0.6 60 | CONFIDENCE: 0.05 61 | TRAIN: 62 | WORLD_SIZE: 4 63 | BATCH_SIZE: 32 64 | BEGIN_EPOCH: 0 65 | END_EPOCH: 270 66 | OPTIMIZER: 'adam' 67 | LR: 0.001 68 | LR_FACTOR: 0.1 69 | LR_STEP: 70 | - 170 71 | - 200 72 | DPG_MILESTONE: 210 73 | DPG_STEP: 74 | - 230 75 | - 250 76 | -------------------------------------------------------------------------------- /configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'coco_wholebody' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/coco_wholebody_train_v1.0.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'coco_wholebody' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/coco_wholebody_val_v1.0.json' 18 | TEST: 19 | TYPE: 'coco_wholebody_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/coco_wholebody_val_v1.0.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'Combined' 27 | SIGMA: 2 28 | NUM_JOINTS: 133 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | CONV_DIM: 256 44 | NUM_LAYERS: 50 45 | DCN: 46 | MODULATED: false 47 | DEFORM_GROUP: 1 48 | FALLBACK_ON_STRIDE: false 49 | STAGE_WITH_DCN: 50 | - false 51 | - true 52 | - true 53 | - true 54 | LOSS: 55 | TYPE: 'Combined' 56 | NORM_TYPE: 'sigmoid' 57 | OUTPUT_3D: False 58 | LOSS_1: 59 | TYPE: 'MSELoss' 60 | LOSS_2: 61 | TYPE: 'L1JointRegression' 62 | NORM_TYPE: 'sigmoid' 63 | OUTPUT_3D: False 64 | DETECTOR: 65 | NAME: 'yolo' 66 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 67 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 68 | NMS_THRES: 0.6 69 | CONFIDENCE: 0.05 70 | TRAIN: 71 | WORLD_SIZE: 4 72 | BATCH_SIZE: 32 73 | BEGIN_EPOCH: 0 74 | END_EPOCH: 320 75 | OPTIMIZER: 'adam' 76 | LR: 0.001 77 | LR_FACTOR: 0.1 78 | LR_STEP: 79 | - 190 80 | - 220 81 | DPG_MILESTONE: 230 82 | DPG_STEP: 83 | - 260 84 | - 280 85 | -------------------------------------------------------------------------------- /configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'coco_wholebody' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/coco_wholebody_train_v1.0.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'coco_wholebody' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/coco_wholebody_val_v1.0.json' 18 | TEST: 19 | TYPE: 'coco_wholebody_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/coco_wholebody_val_v1.0.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'L1JointRegression' 27 | SIGMA: 2 28 | NUM_JOINTS: 133 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | CONV_DIM: 256 45 | DCN: 46 | MODULATED: false 47 | DEFORM_GROUP: 1 48 | FALLBACK_ON_STRIDE: false 49 | STAGE_WITH_DCN: 50 | - false 51 | - true 52 | - true 53 | - true 54 | LOSS: 55 | TYPE: 'L1JointRegression' 56 | NORM_TYPE: 'sigmoid' 57 | OUTPUT_3D: False 58 | DETECTOR: 59 | NAME: 'yolo' 60 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 61 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 62 | NMS_THRES: 0.6 63 | CONFIDENCE: 0.05 64 | TRAIN: 65 | WORLD_SIZE: 4 66 | BATCH_SIZE: 48 67 | BEGIN_EPOCH: 0 68 | END_EPOCH: 270 69 | OPTIMIZER: 'adam' 70 | LR: 0.001 71 | LR_FACTOR: 0.1 72 | LR_STEP: 73 | - 170 74 | - 200 75 | DPG_MILESTONE: 210 76 | DPG_STEP: 77 | - 230 78 | - 250 79 | -------------------------------------------------------------------------------- /configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-regression.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'coco_wholebody' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/coco_wholebody_train_v1.0.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'coco_wholebody' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/coco_wholebody_val_v1.0.json' 18 | TEST: 19 | TYPE: 'coco_wholebody_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/coco_wholebody_val_v1.0.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'L1JointRegression' 27 | SIGMA: 2 28 | NUM_JOINTS: 133 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | LOSS: 45 | TYPE: 'L1JointRegression' 46 | NORM_TYPE: 'sigmoid' 47 | OUTPUT_3D: False 48 | DETECTOR: 49 | NAME: 'yolo' 50 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 51 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 52 | NMS_THRES: 0.6 53 | CONFIDENCE: 0.05 54 | TRAIN: 55 | WORLD_SIZE: 4 56 | BATCH_SIZE: 48 57 | BEGIN_EPOCH: 5 58 | END_EPOCH: 270 59 | OPTIMIZER: 'adam' 60 | LR: 0.001 61 | LR_FACTOR: 0.1 62 | LR_STEP: 63 | - 170 64 | - 200 65 | DPG_MILESTONE: 210 66 | DPG_STEP: 67 | - 230 68 | - 250 69 | -------------------------------------------------------------------------------- /configs/dense_coco/resnet50/256x192_adam_lr1e-3-duc-dcn_1x_crop.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'densecoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017_dense.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 16 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'densecoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017_dense.json' 18 | TEST: 19 | TYPE: 'densecoco' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'test2017' 22 | ANN: 'annotations/person_keypoints_val2017_dense.json' 23 | DATA_PRESET: 24 | TYPE: 'simple' 25 | SIGMA: 2 26 | NUM_JOINTS: 17 27 | NUM_JOINTS_DENSE: 49 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose_DUC_Dense' 36 | BACKBONE: 'se-resnet' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | FINAL_CONV_KERNEL: 1 45 | STAGE1: 46 | NUM_CONV: 4 47 | STAGE2: 48 | NUM_CONV: 2 49 | STAGE3: 50 | NUM_CONV: 1 51 | DCN: 52 | MODULATED: false 53 | DEFORM_GROUP: 1 54 | FALLBACK_ON_STRIDE: false 55 | STAGE_WITH_DCN: 56 | - false 57 | - true 58 | - true 59 | - true 60 | LOSS: 61 | TYPE: 'MSELoss' 62 | DETECTOR: 63 | NAME: 'yolo' 64 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 65 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 66 | NMS_THRES: 0.6 67 | CONFIDENCE: 0.05 68 | TRAIN: 69 | WORLD_SIZE: 4 70 | BATCH_SIZE: 32 71 | BEGIN_EPOCH: 0 72 | END_EPOCH: 200 73 | OPTIMIZER: 'adam' 74 | LR: 0.001 75 | LR_FACTOR: 0.1 76 | LR_STEP: 77 | - 90 78 | - 120 79 | DPG_MILESTONE: 140 80 | DPG_STEP: 81 | - 160 82 | - 190 -------------------------------------------------------------------------------- /configs/halpe_136/resnet/256x192_res152_lr1e-3_1x-duc.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_136' 4 | ROOT: './data/halpe/' 5 | IMG_PREFIX: 'images/train2015' 6 | ANN: 'annotations/halpe_train_v1.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Halpe_136' 15 | ROOT: './data/halpe/' 16 | IMG_PREFIX: 'images/val2017' 17 | ANN: 'annotations/halpe_val_v1.json' 18 | TEST: 19 | TYPE: 'Halpe_136_det' 20 | ROOT: './data/halpe/' 21 | IMG_PREFIX: 'images/val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'L1JointRegression' 27 | SIGMA: 2 28 | NUM_JOINTS: 136 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | BACKBONE: 'se-resnet' 38 | PRETRAINED: '' 39 | TRY_LOAD: '' 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_LAYERS: 152 45 | CONV_DIM: 256 46 | FINAL_CONV_KERNEL: 1 47 | STAGE1: 48 | NUM_CONV: 4 49 | STAGE2: 50 | NUM_CONV: 2 51 | STAGE3: 52 | NUM_CONV: 1 53 | LOSS: 54 | TYPE: 'L1JointRegression' 55 | NORM_TYPE: 'sigmoid' 56 | OUTPUT_3D: False 57 | DETECTOR: 58 | NAME: 'yolo' 59 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 60 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 61 | NMS_THRES: 0.6 62 | CONFIDENCE: 0.05 63 | TRAIN: 64 | WORLD_SIZE: 4 65 | BATCH_SIZE: 32 66 | BEGIN_EPOCH: 0 67 | END_EPOCH: 270 68 | OPTIMIZER: 'adam' 69 | LR: 0.001 70 | LR_FACTOR: 0.1 71 | LR_STEP: 72 | - 170 73 | - 200 74 | DPG_MILESTONE: 210 75 | DPG_STEP: 76 | - 230 77 | - 250 78 | -------------------------------------------------------------------------------- /configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_136' 4 | ROOT: './data/halpe/' 5 | IMG_PREFIX: 'images/train2015' 6 | ANN: 'annotations/halpe_train_v1.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Halpe_136' 15 | ROOT: './data/halpe/' 16 | IMG_PREFIX: 'images/val2017' 17 | ANN: 'annotations/halpe_val_v1.json' 18 | TEST: 19 | TYPE: 'Halpe_136_det' 20 | ROOT: './data/halpe/' 21 | IMG_PREFIX: 'images/val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'Combined' 27 | SIGMA: 2 28 | NUM_JOINTS: 136 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | CONV_DIM: 256 44 | NUM_LAYERS: 50 45 | DCN: 46 | MODULATED: false 47 | DEFORM_GROUP: 1 48 | FALLBACK_ON_STRIDE: false 49 | STAGE_WITH_DCN: 50 | - false 51 | - true 52 | - true 53 | - true 54 | LOSS: 55 | TYPE: 'Combined' 56 | NORM_TYPE: 'sigmoid' 57 | OUTPUT_3D: False 58 | LOSS_1: 59 | TYPE: 'MSELoss' 60 | LOSS_2: 61 | TYPE: 'L1JointRegression' 62 | NORM_TYPE: 'sigmoid' 63 | OUTPUT_3D: False 64 | DETECTOR: 65 | NAME: 'yolo' 66 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 67 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 68 | NMS_THRES: 0.6 69 | CONFIDENCE: 0.05 70 | TRAIN: 71 | WORLD_SIZE: 4 72 | BATCH_SIZE: 48 73 | BEGIN_EPOCH: 0 74 | END_EPOCH: 320 75 | OPTIMIZER: 'adam' 76 | LR: 0.001 77 | LR_FACTOR: 0.1 78 | LR_STEP: 79 | - 190 80 | - 220 81 | DPG_MILESTONE: 230 82 | DPG_STEP: 83 | - 260 84 | - 280 85 | -------------------------------------------------------------------------------- /configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_136' 4 | ROOT: './data/halpe/' 5 | IMG_PREFIX: 'images/train2015' 6 | ANN: 'annotations/halpe_train_v1.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Halpe_136' 15 | ROOT: './data/halpe/' 16 | IMG_PREFIX: 'images/val2017' 17 | ANN: 'annotations/halpe_val_v1.json' 18 | TEST: 19 | TYPE: 'Halpe_136_det' 20 | ROOT: './data/halpe/' 21 | IMG_PREFIX: 'images/val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'L1JointRegression' 27 | SIGMA: 2 28 | NUM_JOINTS: 136 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | CONV_DIM: 256 45 | DCN: 46 | MODULATED: false 47 | DEFORM_GROUP: 1 48 | FALLBACK_ON_STRIDE: false 49 | STAGE_WITH_DCN: 50 | - false 51 | - true 52 | - true 53 | - true 54 | LOSS: 55 | TYPE: 'L1JointRegression' 56 | NORM_TYPE: 'sigmoid' 57 | OUTPUT_3D: False 58 | DETECTOR: 59 | NAME: 'yolo' 60 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 61 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 62 | NMS_THRES: 0.6 63 | CONFIDENCE: 0.05 64 | TRAIN: 65 | WORLD_SIZE: 4 66 | BATCH_SIZE: 48 67 | BEGIN_EPOCH: 0 68 | END_EPOCH: 270 69 | OPTIMIZER: 'adam' 70 | LR: 0.001 71 | LR_FACTOR: 0.1 72 | LR_STEP: 73 | - 170 74 | - 200 75 | DPG_MILESTONE: 210 76 | DPG_STEP: 77 | - 230 78 | - 250 79 | -------------------------------------------------------------------------------- /configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_136' 4 | ROOT: './data/halpe/' 5 | IMG_PREFIX: 'images/train2015' 6 | ANN: 'annotations/halpe_train_v1.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Halpe_136' 15 | ROOT: './data/halpe/' 16 | IMG_PREFIX: 'images/val2017' 17 | ANN: 'annotations/halpe_val_v1.json' 18 | TEST: 19 | TYPE: 'Halpe_136_det' 20 | ROOT: './data/halpe/' 21 | IMG_PREFIX: 'images/val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'L1JointRegression' 27 | SIGMA: 2 28 | NUM_JOINTS: 136 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | CONV_DIM: 256 45 | LOSS: 46 | TYPE: 'L1JointRegression' 47 | NORM_TYPE: 'sigmoid' 48 | OUTPUT_3D: False 49 | DETECTOR: 50 | NAME: 'yolo' 51 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 52 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 53 | NMS_THRES: 0.6 54 | CONFIDENCE: 0.05 55 | TRAIN: 56 | WORLD_SIZE: 4 57 | BATCH_SIZE: 48 58 | BEGIN_EPOCH: 5 59 | END_EPOCH: 270 60 | OPTIMIZER: 'adam' 61 | LR: 0.001 62 | LR_FACTOR: 0.1 63 | LR_STEP: 64 | - 170 65 | - 200 66 | DPG_MILESTONE: 210 67 | DPG_STEP: 68 | - 230 69 | - 250 70 | -------------------------------------------------------------------------------- /configs/halpe_136/resnet/256x192_res50_lr1e-3_2x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_136' 4 | ROOT: './data/halpe/' 5 | IMG_PREFIX: 'images/train2015' 6 | ANN: 'annotations/halpe_train_v1.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Halpe_136' 15 | ROOT: './data/halpe/' 16 | IMG_PREFIX: 'images/val2017' 17 | ANN: 'annotations/halpe_val_v1.json' 18 | TEST: 19 | TYPE: 'Halpe_136_det' 20 | ROOT: './data/halpe/' 21 | IMG_PREFIX: 'images/val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 136 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | CONV_DIM: 256 44 | LOSS: 45 | TYPE: 'MSELoss' 46 | DETECTOR: 47 | NAME: 'yolo' 48 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 49 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 50 | NMS_THRES: 0.6 51 | CONFIDENCE: 0.05 52 | TRAIN: 53 | WORLD_SIZE: 4 54 | BATCH_SIZE: 32 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 270 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 170 62 | - 200 63 | DPG_MILESTONE: 210 64 | DPG_STEP: 65 | - 230 66 | - 250 67 | -------------------------------------------------------------------------------- /configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_26' 4 | ROOT: './data/halpe/' 5 | IMG_PREFIX: 'images/train2015' 6 | ANN: 'annotations/halpe_train_v1.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 11 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Halpe_26' 15 | ROOT: './data/halpe/' 16 | IMG_PREFIX: 'images/val2017' 17 | ANN: 'annotations/halpe_val_v1.json' 18 | TEST: 19 | TYPE: 'Halpe_26_det' 20 | ROOT: './data/halpe/' 21 | IMG_PREFIX: 'images/val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 26 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 48 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 50 61 | - 70 62 | DPG_MILESTONE: 90 63 | DPG_STEP: 64 | - 110 65 | - 130 66 | -------------------------------------------------------------------------------- /configs/halpe_26/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_26' 4 | ROOT: './data/halpe/' 5 | IMG_PREFIX: 'images/train2015' 6 | ANN: 'annotations/halpe_train_v1.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Halpe_26' 15 | ROOT: './data/halpe/' 16 | IMG_PREFIX: 'images/val2017' 17 | ANN: 'annotations/halpe_val_v1.json' 18 | TEST: 19 | TYPE: 'Halpe_26_det' 20 | ROOT: './data/halpe/' 21 | IMG_PREFIX: 'images/val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'L1JointRegression' 27 | SIGMA: 2 28 | NUM_JOINTS: 26 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | CONV_DIM: 256 45 | DCN: 46 | MODULATED: false 47 | DEFORM_GROUP: 1 48 | FALLBACK_ON_STRIDE: false 49 | STAGE_WITH_DCN: 50 | - false 51 | - true 52 | - true 53 | - true 54 | LOSS: 55 | TYPE: 'L1JointRegression' 56 | NORM_TYPE: 'sigmoid' 57 | OUTPUT_3D: False 58 | DETECTOR: 59 | NAME: 'yolo' 60 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 61 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 62 | NMS_THRES: 0.6 63 | CONFIDENCE: 0.05 64 | TRAIN: 65 | WORLD_SIZE: 4 66 | BATCH_SIZE: 48 67 | BEGIN_EPOCH: 0 68 | END_EPOCH: 270 69 | OPTIMIZER: 'adam' 70 | LR: 0.001 71 | LR_FACTOR: 0.1 72 | LR_STEP: 73 | - 170 74 | - 200 75 | DPG_MILESTONE: 210 76 | DPG_STEP: 77 | - 230 78 | - 250 79 | -------------------------------------------------------------------------------- /configs/halpe_26/resnet/256x192_res50_lr1e-3_2x-regression.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_26' 4 | ROOT: './data/halpe/' 5 | IMG_PREFIX: 'images/train2015' 6 | ANN: 'annotations/halpe_train_v1.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Halpe_26' 15 | ROOT: './data/halpe/' 16 | IMG_PREFIX: 'images/val2017' 17 | ANN: 'annotations/halpe_val_v1.json' 18 | TEST: 19 | TYPE: 'Halpe_26_det' 20 | ROOT: './data/halpe/' 21 | IMG_PREFIX: 'images/val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'L1JointRegression' 27 | SIGMA: 2 28 | NUM_JOINTS: 26 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | CONV_DIM: 256 45 | LOSS: 46 | TYPE: 'L1JointRegression' 47 | NORM_TYPE: 'sigmoid' 48 | OUTPUT_3D: False 49 | DETECTOR: 50 | NAME: 'yolo' 51 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 52 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 53 | NMS_THRES: 0.6 54 | CONFIDENCE: 0.05 55 | TRAIN: 56 | WORLD_SIZE: 4 57 | BATCH_SIZE: 48 58 | BEGIN_EPOCH: 5 59 | END_EPOCH: 270 60 | OPTIMIZER: 'adam' 61 | LR: 0.001 62 | LR_FACTOR: 0.1 63 | LR_STEP: 64 | - 170 65 | - 200 66 | DPG_MILESTONE: 210 67 | DPG_STEP: 68 | - 230 69 | - 250 70 | -------------------------------------------------------------------------------- /configs/halpe_26/resnet/256x192_res50_lr1e-3_2x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_26' 4 | ROOT: './data/halpe/' 5 | IMG_PREFIX: 'images/train2015' 6 | ANN: 'annotations/halpe_train_v1.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Halpe_26' 15 | ROOT: './data/halpe/' 16 | IMG_PREFIX: 'images/val2017' 17 | ANN: 'annotations/halpe_val_v1.json' 18 | TEST: 19 | TYPE: 'Halpe_26_det' 20 | ROOT: './data/halpe/' 21 | IMG_PREFIX: 'images/val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 26 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 270 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 170 61 | - 200 62 | DPG_MILESTONE: 210 63 | DPG_STEP: 64 | - 230 65 | - 250 -------------------------------------------------------------------------------- /configs/halpe_68_noface/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_68_noface' 4 | ROOT: './data/halpe/' 5 | IMG_PREFIX: 'images/train2015' 6 | ANN: 'annotations/halpe_train_v1.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Halpe_68_noface' 15 | ROOT: './data/halpe/' 16 | IMG_PREFIX: 'images/val2017' 17 | ANN: 'annotations/halpe_val_v1.json' 18 | TEST: 19 | TYPE: 'Halpe_68_noface_det' 20 | ROOT: './data/halpe/' 21 | IMG_PREFIX: 'images/val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'Combined' 27 | SIGMA: 2 28 | NUM_JOINTS: 68 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | CONV_DIM: 256 44 | NUM_LAYERS: 50 45 | DCN: 46 | MODULATED: false 47 | DEFORM_GROUP: 1 48 | FALLBACK_ON_STRIDE: false 49 | STAGE_WITH_DCN: 50 | - false 51 | - true 52 | - true 53 | - true 54 | LOSS: 55 | TYPE: 'Combined' 56 | NORM_TYPE: 'sigmoid' 57 | OUTPUT_3D: False 58 | LOSS_1: 59 | TYPE: 'MSELoss' 60 | LOSS_2: 61 | TYPE: 'L1JointRegression' 62 | NORM_TYPE: 'sigmoid' 63 | OUTPUT_3D: False 64 | DETECTOR: 65 | NAME: 'yolo' 66 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 67 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 68 | NMS_THRES: 0.6 69 | CONFIDENCE: 0.05 70 | TRAIN: 71 | WORLD_SIZE: 5 72 | BATCH_SIZE: 52 73 | BEGIN_EPOCH: 0 74 | END_EPOCH: 120 75 | OPTIMIZER: 'adam' 76 | LR: 0.0001 77 | LR_FACTOR: 0.1 78 | LR_STEP: 79 | - 10 80 | - 30 81 | DPG_MILESTONE: 40 82 | DPG_STEP: 83 | - 60 84 | - 90 -------------------------------------------------------------------------------- /configs/halpe_coco_wholebody_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_coco_wholebody_136' 4 | ROOT: 5 | - './data/halpe/' 6 | - './data/coco/' 7 | IMG_PREFIX: 8 | - 'images/train2015' 9 | - 'train2017' 10 | ANN: 11 | - 'annotations/halpe_train_v1.json' 12 | - 'annotations/coco_wholebody_train_v1.0.json' 13 | AUG: 14 | FLIP: true 15 | ROT_FACTOR: 45 16 | SCALE_FACTOR: 0.35 17 | NUM_JOINTS_HALF_BODY: 8 18 | PROB_HALF_BODY: 0.3 19 | VAL: 20 | TYPE: 'Halpe_136' 21 | ROOT: './data/halpe/' 22 | IMG_PREFIX: 'images/val2017' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | TEST: 25 | TYPE: 'Halpe_136_det' 26 | ROOT: './data/halpe/' 27 | IMG_PREFIX: 'images/val2017' 28 | DET_FILE: './exp/json/test_det_yolo.json' 29 | ANN: 'annotations/halpe_val_v1.json' 30 | DATA_PRESET: 31 | TYPE: 'simple' 32 | LOSS_TYPE: 'Combined' 33 | SIGMA: 2 34 | NUM_JOINTS: 136 35 | IMAGE_SIZE: 36 | - 256 37 | - 192 38 | HEATMAP_SIZE: 39 | - 64 40 | - 48 41 | MODEL: 42 | TYPE: 'FastPose' 43 | PRETRAINED: '' 44 | TRY_LOAD: '' 45 | NUM_DECONV_FILTERS: 46 | - 256 47 | - 256 48 | - 256 49 | NUM_LAYERS: 50 50 | CONV_DIM: 256 51 | DCN: 52 | MODULATED: false 53 | DEFORM_GROUP: 1 54 | FALLBACK_ON_STRIDE: false 55 | STAGE_WITH_DCN: 56 | - false 57 | - true 58 | - true 59 | - true 60 | LOSS: 61 | TYPE: 'Combined' 62 | NORM_TYPE: 'sigmoid' 63 | OUTPUT_3D: False 64 | LOSS_1: 65 | TYPE: 'MSELoss' 66 | LOSS_2: 67 | TYPE: 'L1JointRegression' 68 | NORM_TYPE: 'sigmoid' 69 | OUTPUT_3D: False 70 | DETECTOR: 71 | NAME: 'yolo' 72 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 73 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 74 | NMS_THRES: 0.6 75 | CONFIDENCE: 0.05 76 | TRAIN: 77 | WORLD_SIZE: 4 78 | BATCH_SIZE: 48 79 | BEGIN_EPOCH: 0 80 | END_EPOCH: 270 81 | OPTIMIZER: 'adam' 82 | LR: 0.001 83 | LR_FACTOR: 0.1 84 | LR_STEP: 85 | - 170 86 | - 200 87 | DPG_MILESTONE: 210 88 | DPG_STEP: 89 | - 230 90 | - 250 91 | -------------------------------------------------------------------------------- /configs/halpe_coco_wholebody_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_coco_wholebody_136' 4 | ROOT: 5 | - './data/halpe/' 6 | - './data/coco/' 7 | IMG_PREFIX: 8 | - 'images/train2015' 9 | - 'train2017' 10 | ANN: 11 | - 'annotations/halpe_train_v1.json' 12 | - 'annotations/coco_wholebody_train_v1.0.json' 13 | AUG: 14 | FLIP: true 15 | ROT_FACTOR: 45 16 | SCALE_FACTOR: 0.35 17 | NUM_JOINTS_HALF_BODY: 8 18 | PROB_HALF_BODY: 0.3 19 | VAL: 20 | TYPE: 'Halpe_136' 21 | ROOT: './data/halpe/' 22 | IMG_PREFIX: 'images/val2017' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | TEST: 25 | TYPE: 'Halpe_136_det' 26 | ROOT: './data/halpe/' 27 | IMG_PREFIX: 'images/val2017' 28 | DET_FILE: './exp/json/test_det_yolo.json' 29 | ANN: 'annotations/halpe_val_v1.json' 30 | DATA_PRESET: 31 | TYPE: 'simple' 32 | LOSS_TYPE: 'L1JointRegression' 33 | SIGMA: 2 34 | NUM_JOINTS: 136 35 | IMAGE_SIZE: 36 | - 256 37 | - 192 38 | HEATMAP_SIZE: 39 | - 64 40 | - 48 41 | MODEL: 42 | TYPE: 'FastPose' 43 | PRETRAINED: '' 44 | TRY_LOAD: '' 45 | NUM_DECONV_FILTERS: 46 | - 256 47 | - 256 48 | - 256 49 | NUM_LAYERS: 50 50 | LOSS: 51 | TYPE: 'L1JointRegression' 52 | NORM_TYPE: 'sigmoid' 53 | OUTPUT_3D: False 54 | DETECTOR: 55 | NAME: 'yolo' 56 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 57 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 58 | NMS_THRES: 0.6 59 | CONFIDENCE: 0.05 60 | TRAIN: 61 | WORLD_SIZE: 4 62 | BATCH_SIZE: 48 63 | BEGIN_EPOCH: 5 64 | END_EPOCH: 270 65 | OPTIMIZER: 'adam' 66 | LR: 0.001 67 | LR_FACTOR: 0.1 68 | LR_STEP: 69 | - 170 70 | - 200 71 | DPG_MILESTONE: 210 72 | DPG_STEP: 73 | - 230 74 | - 250 75 | -------------------------------------------------------------------------------- /configs/single_hand/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'SingleHand' 4 | ROOT: 5 | - './data/halpe/' 6 | - './data/coco/' 7 | IMG_PREFIX: 8 | - 'images/train2015' 9 | - 'train2017' 10 | ANN: 11 | - 'annotations/halpe_train_v1.json' 12 | - 'annotations/coco_wholebody_train_v1.0.json' 13 | AUG: 14 | FLIP: true 15 | ROT_FACTOR: 45 16 | SCALE_FACTOR: 0.35 17 | NUM_JOINTS_HALF_BODY: 0 18 | PROB_HALF_BODY: 0.0 19 | VAL: 20 | TYPE: 'SingleHand' 21 | ROOT: './data/halpe/' 22 | IMG_PREFIX: 'images/val2017' 23 | ANN: 'annotations/halpe_val_v1.json' 24 | TEST: 25 | TYPE: 'SingleHand_det' 26 | ROOT: './data/halpe/' 27 | IMG_PREFIX: 'images/val2017' 28 | DET_FILE: './exp/json/test_det_yolo.json' 29 | ANN: 'annotations/halpe_val_v1.json' 30 | DATA_PRESET: 31 | TYPE: 'simple' 32 | LOSS_TYPE: 'L1JointRegression' 33 | NORM_TYPE: 'sigmoid' 34 | SIGMA: 2 35 | NUM_JOINTS: 21 36 | IMAGE_SIZE: 37 | - 256 38 | - 192 39 | HEATMAP_SIZE: 40 | - 64 41 | - 48 42 | MODEL: 43 | TYPE: 'FastPose' 44 | PRETRAINED: '' 45 | TRY_LOAD: '' 46 | NUM_DECONV_FILTERS: 47 | - 256 48 | - 256 49 | - 256 50 | CONV_DIM: 256 51 | NUM_LAYERS: 50 52 | DCN: 53 | MODULATED: false 54 | DEFORM_GROUP: 1 55 | FALLBACK_ON_STRIDE: false 56 | STAGE_WITH_DCN: 57 | - false 58 | - true 59 | - true 60 | - true 61 | LOSS: 62 | TYPE: 'L1JointRegression' 63 | NORM_TYPE: 'sigmoid' 64 | OUTPUT_3D: False 65 | DETECTOR: 66 | NAME: 'yolo' 67 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 68 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 69 | NMS_THRES: 0.6 70 | CONFIDENCE: 0.05 71 | TRAIN: 72 | WORLD_SIZE: 6 73 | BATCH_SIZE: 48 74 | BEGIN_EPOCH: 0 75 | END_EPOCH: 100 76 | OPTIMIZER: 'adam' 77 | LR: 0.001 78 | LR_FACTOR: 0.1 79 | LR_STEP: 80 | - 15 81 | - 35 82 | DPG_MILESTONE: 50 83 | DPG_STEP: 84 | - 65 85 | - 80 -------------------------------------------------------------------------------- /configs/smpl/256x192_adam_lr1e-3-res34_smpl_24_3d_base_2x_mix.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | DATASET: 'mix_smpl' 3 | SET_LIST: 4 | - ROOT: './data/h36m/' 5 | TEST_SET: 'Sample_20_test_Human36M_smpl' 6 | TRAIN_SET: 'Sample_5_train_Human36M_smpl_leaf_twist' 7 | - ROOT: './data/coco/' 8 | TRAIN_SET: 'train2017' 9 | - ROOT: './data/3dhp/' 10 | TRAIN_SET: 'train_v2' 11 | PROTOCOL: 2 12 | FLIP: True 13 | ROT_FACTOR: 30 14 | SCALE_FACTOR: 0.3 15 | NUM_JOINTS_HALF_BODY: 8 16 | PROB_HALF_BODY: -1 17 | COLOR_FACTOR: 0.2 18 | OCCLUSION: True 19 | DATA_PRESET: 20 | TYPE: 'simple_smpl' 21 | SIGMA: 2 22 | IMAGE_SIZE: 23 | - 256 24 | - 256 25 | HEATMAP_SIZE: 26 | - 64 27 | - 64 28 | MODEL: 29 | TYPE: 'Simple3DPoseBaseSMPLCam' 30 | PRETRAINED: '' 31 | TRY_LOAD: '' 32 | FOCAL_LENGTH: 1000 33 | IMAGE_SIZE: 34 | - 256 35 | - 256 36 | HEATMAP_SIZE: 37 | - 64 38 | - 64 39 | NUM_JOINTS: 29 40 | NUM_DECONV_FILTERS: 41 | - 256 42 | - 256 43 | - 256 44 | NUM_LAYERS: 34 45 | EXTRA: 46 | SIGMA: 2 47 | BACKBONE: 'resnet' 48 | CROP: 'padding' 49 | AUGMENT: 'none' 50 | PRESET: 'simple_smpl_3d' 51 | DEPTH_DIM: 64 52 | POST: 53 | NORM_TYPE: 'softmax' 54 | LOSS: 55 | TYPE: 'L1LossDimSMPL' 56 | ELEMENTS: 57 | BETA_WEIGHT: 1 58 | BETA_REG_WEIGHT: 0 59 | PHI_REG_WEIGHT: 0.0001 60 | LEAF_REG_WEIGHT: 0 61 | TWIST_WEIGHT: 0.01 62 | THETA_WEIGHT: 0.01 63 | UVD24_WEIGHT: 1 64 | XYZ24_WEIGHT: 0 65 | XYZ_SMPL24_WEIGHT: 0 66 | XYZ_SMPL17_WEIGHT: 0 67 | VERTICE_WEIGHT: 0 68 | TEST: 69 | HEATMAP2COORD: 'coord' 70 | TRAIN: 71 | WORLD_SIZE: 8 72 | BATCH_SIZE: 32 73 | BEGIN_EPOCH: 0 74 | END_EPOCH: 200 75 | OPTIMIZER: 'adam' 76 | LR: 0.001 77 | LR_FACTOR: 0.1 78 | LR_STEP: 79 | - 90 80 | - 120 81 | DPG_MILESTONE: 140 82 | DPG_STEP: 83 | - 160 84 | - 190 85 | -------------------------------------------------------------------------------- /detector/apis.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Chao Xu (xuchao.19962007@sjtu.edu.cn) 4 | # ----------------------------------------------------- 5 | 6 | """API of detector""" 7 | from abc import ABC, abstractmethod 8 | 9 | 10 | def get_detector(opt=None): 11 | if opt.detector == 'yolo': 12 | from detector.yolo_api import YOLODetector 13 | from detector.yolo_cfg import cfg 14 | return YOLODetector(cfg, opt) 15 | elif 'yolox' in opt.detector: 16 | from detector.yolox_api import YOLOXDetector 17 | from detector.yolox_cfg import cfg 18 | if opt.detector.lower() == 'yolox': 19 | opt.detector = 'yolox-x' 20 | cfg.MODEL_NAME = opt.detector.lower() 21 | cfg.MODEL_WEIGHTS = f'detector/yolox/data/{opt.detector.lower().replace("-", "_")}.pth' 22 | return YOLOXDetector(cfg, opt) 23 | elif opt.detector == 'tracker': 24 | from detector.tracker_api import Tracker 25 | from detector.tracker_cfg import cfg 26 | return Tracker(cfg, opt) 27 | elif opt.detector.startswith('efficientdet_d'): 28 | from detector.effdet_api import EffDetDetector 29 | from detector.effdet_cfg import cfg 30 | return EffDetDetector(cfg, opt) 31 | else: 32 | raise NotImplementedError 33 | 34 | 35 | class BaseDetector(ABC): 36 | def __init__(self): 37 | pass 38 | 39 | @abstractmethod 40 | def image_preprocess(self, img_name): 41 | pass 42 | 43 | @abstractmethod 44 | def images_detection(self, imgs, orig_dim_list): 45 | pass 46 | 47 | @abstractmethod 48 | def detect_one_img(self, img_name): 49 | pass 50 | -------------------------------------------------------------------------------- /detector/effdet_cfg.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | cfg = edict() 4 | 5 | cfg.NMS_THRES = 0.6 # 0.6(0.713) 0.5(0.707) 6 | cfg.CONFIDENCE = 0.2 # 0.15 0.1 7 | cfg.NUM_CLASSES = 80 8 | cfg.MAX_DETECTIONS = 200 # 100 9 | -------------------------------------------------------------------------------- /detector/efficientdet/README.md: -------------------------------------------------------------------------------- 1 | # A PyTorch implementation of a EfficientDet Object Detector 2 | 3 | Forked and modified from https://github.com/rwightman/efficientdet-pytorch, many thanks! 4 | -------------------------------------------------------------------------------- /detector/efficientdet/effdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .efficientdet import EfficientDet 2 | from .bench import DetBenchEval, DetBenchTrain 3 | from .config.config import get_efficientdet_config 4 | from .helpers import load_checkpoint, load_pretrained -------------------------------------------------------------------------------- /detector/efficientdet/effdet/helpers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | from collections import OrderedDict 4 | try: 5 | from torch.hub import load_state_dict_from_url 6 | except ImportError: 7 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 8 | 9 | 10 | def load_checkpoint(model, checkpoint_path): 11 | if checkpoint_path and os.path.isfile(checkpoint_path): 12 | print("=> Loading checkpoint '{}'".format(checkpoint_path)) 13 | checkpoint = torch.load(checkpoint_path) 14 | if isinstance(checkpoint, dict) and 'state_dict' in checkpoint: 15 | new_state_dict = OrderedDict() 16 | for k, v in checkpoint['state_dict'].items(): 17 | if k.startswith('module'): 18 | name = k[7:] # remove `module.` 19 | else: 20 | name = k 21 | new_state_dict[name] = v 22 | model.load_state_dict(new_state_dict) 23 | else: 24 | model.load_state_dict(checkpoint) 25 | print("=> Loaded checkpoint '{}'".format(checkpoint_path)) 26 | else: 27 | print("=> Error: No checkpoint found at '{}'".format(checkpoint_path)) 28 | raise FileNotFoundError() 29 | 30 | 31 | def load_pretrained(model, url, filter_fn=None, strict=True): 32 | if not url: 33 | print("=> Warning: Pretrained model URL is empty, using random initialization.") 34 | return 35 | state_dict = load_state_dict_from_url(url, progress=False, map_location='cpu') 36 | if filter_fn is not None: 37 | state_dict = filter_fn(state_dict) 38 | model.load_state_dict(state_dict, strict=strict) 39 | -------------------------------------------------------------------------------- /detector/efficientdet/effdet/object_detection/README.md: -------------------------------------------------------------------------------- 1 | # Tensorflow Object Detection 2 | 3 | All of this code is adapted/ported/copied from https://github.com/google/automl/tree/552d0facd14f4fe9205a67fb13ecb5690a4d1c94/efficientdet/object_detection -------------------------------------------------------------------------------- /detector/efficientdet/effdet/object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | # Object detection data loaders and libraries are mostly based on RetinaNet: 16 | # https://github.com/tensorflow/tpu/tree/master/models/official/retinanet 17 | -------------------------------------------------------------------------------- /detector/efficientdet/weights/get_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/efficientdet/weights/get_models.sh -------------------------------------------------------------------------------- /detector/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /detector/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cpu, nms_cuda 5 | from .soft_nms_cpu import soft_nms_cpu 6 | 7 | 8 | def nms(dets, iou_thr, device_id=None): 9 | """Dispatch to either CPU or GPU NMS implementations. 10 | 11 | The input can be either a torch tensor or numpy array. GPU NMS will be used 12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 13 | will be used. The returned type will always be the same as inputs. 14 | 15 | Arguments: 16 | dets (torch.Tensor or np.ndarray): bboxes with scores. 17 | iou_thr (float): IoU threshold for NMS. 18 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 19 | is None, then cpu nms is used, otherwise gpu_nms will be used. 20 | 21 | Returns: 22 | tuple: kept bboxes and indice, which is always the same data type as 23 | the input. 24 | """ 25 | # convert dets (tensor or numpy array) to tensor 26 | if isinstance(dets, torch.Tensor): 27 | is_numpy = False 28 | dets_th = dets.to('cpu') 29 | elif isinstance(dets, np.ndarray): 30 | is_numpy = True 31 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 32 | dets_th = torch.from_numpy(dets).to(device) 33 | else: 34 | raise TypeError( 35 | 'dets must be either a Tensor or numpy array, but got {}'.format( 36 | type(dets))) 37 | 38 | # execute cpu or cuda nms 39 | if dets_th.shape[0] == 0: 40 | inds = dets_th.new_zeros(0, dtype=torch.long) 41 | else: 42 | if dets_th.is_cuda: 43 | inds = nms_cuda.nms(dets_th, iou_thr) 44 | else: 45 | inds = nms_cpu.nms(dets_th, iou_thr) 46 | 47 | if is_numpy: 48 | inds = inds.cpu().numpy() 49 | return dets[inds, :], inds 50 | 51 | 52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 53 | if isinstance(dets, torch.Tensor): 54 | is_tensor = True 55 | dets_np = dets.detach().cpu().numpy() 56 | elif isinstance(dets, np.ndarray): 57 | is_tensor = False 58 | dets_np = dets 59 | else: 60 | raise TypeError( 61 | 'dets must be either a Tensor or numpy array, but got {}'.format( 62 | type(dets))) 63 | 64 | method_codes = {'linear': 1, 'gaussian': 2} 65 | if method not in method_codes: 66 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 67 | new_dets, inds = soft_nms_cpu( 68 | dets_np, 69 | iou_thr, 70 | method=method_codes[method], 71 | sigma=sigma, 72 | min_score=min_score) 73 | 74 | if is_tensor: 75 | return dets.new_tensor(new_dets), dets.new_tensor( 76 | inds, dtype=torch.long) 77 | else: 78 | return new_dets.astype(np.float32), inds.astype(np.int64) 79 | -------------------------------------------------------------------------------- /detector/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | AT_ASSERTM(!dets.device().is_cuda(), "dets must be a CPU tensor"); 7 | 8 | if (dets.numel() == 0) { 9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 10 | } 11 | 12 | auto x1_t = dets.select(1, 0).contiguous(); 13 | auto y1_t = dets.select(1, 1).contiguous(); 14 | auto x2_t = dets.select(1, 2).contiguous(); 15 | auto y2_t = dets.select(1, 3).contiguous(); 16 | auto scores = dets.select(1, 4).contiguous(); 17 | 18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 19 | 20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 21 | 22 | auto ndets = dets.size(0); 23 | at::Tensor suppressed_t = 24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 25 | 26 | auto suppressed = suppressed_t.data_ptr(); 27 | auto order = order_t.data_ptr(); 28 | auto x1 = x1_t.data_ptr(); 29 | auto y1 = y1_t.data_ptr(); 30 | auto x2 = x2_t.data_ptr(); 31 | auto y2 = y2_t.data_ptr(); 32 | auto areas = areas_t.data_ptr(); 33 | 34 | for (int64_t _i = 0; _i < ndets; _i++) { 35 | auto i = order[_i]; 36 | if (suppressed[i] == 1) continue; 37 | auto ix1 = x1[i]; 38 | auto iy1 = y1[i]; 39 | auto ix2 = x2[i]; 40 | auto iy2 = y2[i]; 41 | auto iarea = areas[i]; 42 | 43 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 44 | auto j = order[_j]; 45 | if (suppressed[j] == 1) continue; 46 | auto xx1 = std::max(ix1, x1[j]); 47 | auto yy1 = std::max(iy1, y1[j]); 48 | auto xx2 = std::min(ix2, x2[j]); 49 | auto yy2 = std::min(iy2, y2[j]); 50 | 51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 53 | auto inter = w * h; 54 | auto ovr = inter / (iarea + areas[j] - inter); 55 | if (ovr >= threshold) suppressed[j] = 1; 56 | } 57 | } 58 | return at::nonzero(suppressed_t == 0).squeeze(1); 59 | } 60 | 61 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 62 | at::Tensor result; 63 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { 64 | result = nms_cpu_kernel(dets, threshold); 65 | }); 66 | return result; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("nms", &nms, "non-maximum suppression"); 71 | } 72 | -------------------------------------------------------------------------------- /detector/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } 18 | -------------------------------------------------------------------------------- /detector/tracker/README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | MOT Tracker adapted from [Towards-Realtime-MOT](https://github.com/Zhongdao/Towards-Realtime-MOT), many thanks to their wonderful work! 3 | 4 | #### Getting started 5 | Download detector [JDE-1088x608](https://github.com/Zhongdao/Towards-Realtime-MOT#pretrained-model-and-baseline-models) and place it under `AlphaPose/detector/tracker/data/` 6 | 7 | Enable tracking by setting the detector as tracker: `--detector tracker` -------------------------------------------------------------------------------- /detector/tracker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/tracker/__init__.py -------------------------------------------------------------------------------- /detector/tracker/cfg/ccmcpe.json: -------------------------------------------------------------------------------- 1 | { 2 | "root":"/home/wangzd/datasets/MOT", 3 | "train": 4 | { 5 | "mot17":"./data/mot17.train", 6 | "caltech":"./data/caltech.train", 7 | "citypersons":"./data/citypersons.train", 8 | "cuhksysu":"./data/cuhksysu.train", 9 | "prw":"./data/prw.train", 10 | "eth":"./data/eth.train" 11 | }, 12 | "test_emb": 13 | { 14 | "caltech":"./data/caltech.10k.val", 15 | "cuhksysu":"./data/cuhksysu.val", 16 | "prw":"./data/prw.val" 17 | }, 18 | "test": 19 | { 20 | "mot19":"./data/mot19.train", 21 | "caltech":"./data/caltech.val", 22 | "citypersons":"./data/citypersons.val" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /detector/tracker/preprocess.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | import numpy as np 8 | import cv2 9 | import matplotlib.pyplot as plt 10 | try: 11 | from util import count_parameters as count 12 | from util import convert2cpu as cpu 13 | except ImportError: 14 | from yolo.util import count_parameters as count 15 | from yolo.util import convert2cpu as cpu 16 | from PIL import Image, ImageDraw 17 | 18 | 19 | def letterbox_image(img, img_size=(1088, 608), color=(127.5, 127.5, 127.5)): 20 | # resize a rectangular image to a padded rectangular 21 | height=img_size[1] 22 | width=img_size[0] 23 | shape = img.shape[:2] # shape = [height, width] 24 | ratio = min(float(height)/shape[0], float(width)/shape[1]) 25 | new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height] 26 | dw = (width - new_shape[0]) / 2 # width padding 27 | dh = (height - new_shape[1]) / 2 # height padding 28 | top, bottom = round(dh - 0.1), round(dh + 0.1) 29 | left, right = round(dw - 0.1), round(dw + 0.1) 30 | img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border 31 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded rectangular 32 | return img 33 | 34 | 35 | def prep_image(img, img_size=(1088, 608)): 36 | """ 37 | Prepare image for inputting to the neural network. 38 | 39 | Returns a Variable 40 | """ 41 | 42 | orig_im = cv2.imread(img) 43 | dim = orig_im.shape[1], orig_im.shape[0] 44 | img = (letterbox_image(orig_im, img_size)) 45 | img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() 46 | img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) 47 | return img_, orig_im, dim 48 | 49 | 50 | def prep_frame(img, img_size=(1088, 608)): 51 | """ 52 | Prepare image for inputting to the neural network. 53 | 54 | Returns a Variable 55 | """ 56 | 57 | orig_im = img 58 | dim = orig_im.shape[1], orig_im.shape[0] 59 | img = (letterbox_image(orig_im, img_size)) 60 | img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() 61 | img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) 62 | return img_, orig_im, dim 63 | 64 | -------------------------------------------------------------------------------- /detector/tracker/tracker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/tracker/tracker/__init__.py -------------------------------------------------------------------------------- /detector/tracker/tracker/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed 53 | 54 | -------------------------------------------------------------------------------- /detector/tracker/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/tracker/utils/__init__.py -------------------------------------------------------------------------------- /detector/tracker/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.DEBUG) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | logger = get_logger('root') 19 | -------------------------------------------------------------------------------- /detector/tracker/utils/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from utils import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /detector/tracker/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | def parse_model_cfg(path): 2 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 3 | file = open(path, 'r') 4 | lines = file.read().split('\n') 5 | lines = [x for x in lines if x and not x.startswith('#')] 6 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 7 | module_defs = [] 8 | for line in lines: 9 | if line.startswith('['): # This marks the start of a new block 10 | module_defs.append({}) 11 | module_defs[-1]['type'] = line[1:-1].rstrip() 12 | if module_defs[-1]['type'] == 'convolutional': 13 | module_defs[-1]['batch_normalize'] = 0 14 | else: 15 | key, value = line.split("=") 16 | value = value.strip() 17 | module_defs[-1][key.rstrip()] = value.strip() 18 | 19 | return module_defs 20 | 21 | 22 | def parse_data_cfg(path): 23 | """Parses the data configuration file""" 24 | options = dict() 25 | options['gpus'] = '0' 26 | options['num_workers'] = '10' 27 | with open(path, 'r') as fp: 28 | lines = fp.readlines() 29 | for line in lines: 30 | line = line.strip() 31 | if line == '' or line.startswith('#'): 32 | continue 33 | key, value = line.split('=') 34 | options[key.strip()] = value.strip() 35 | return options 36 | -------------------------------------------------------------------------------- /detector/tracker/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | self.duration = 0. 21 | 22 | def tic(self): 23 | # using time.time instead of time.clock because time time.clock 24 | # does not normalize for multithreading 25 | self.start_time = time.time() 26 | 27 | def toc(self, average=True): 28 | self.diff = time.time() - self.start_time 29 | self.total_time += self.diff 30 | self.calls += 1 31 | self.average_time = self.total_time / self.calls 32 | if average: 33 | self.duration = self.average_time 34 | else: 35 | self.duration = self.diff 36 | return self.duration 37 | 38 | def clear(self): 39 | self.total_time = 0. 40 | self.calls = 0 41 | self.start_time = 0. 42 | self.diff = 0. 43 | self.average_time = 0. 44 | self.duration = 0. 45 | 46 | -------------------------------------------------------------------------------- /detector/tracker/utils/visualization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def tlwhs_to_tlbrs(tlwhs): 6 | tlbrs = np.copy(tlwhs) 7 | if len(tlbrs) == 0: 8 | return tlbrs 9 | tlbrs[:, 2] += tlwhs[:, 0] 10 | tlbrs[:, 3] += tlwhs[:, 1] 11 | return tlbrs 12 | 13 | 14 | def get_color(idx): 15 | idx = idx * 3 16 | color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) 17 | 18 | return color 19 | 20 | 21 | def resize_image(image, max_size=800): 22 | if max(image.shape[:2]) > max_size: 23 | scale = float(max_size) / max(image.shape[:2]) 24 | image = cv2.resize(image, None, fx=scale, fy=scale) 25 | return image 26 | 27 | 28 | def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None): 29 | im = np.ascontiguousarray(np.copy(image)) 30 | im_h, im_w = im.shape[:2] 31 | 32 | top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255 33 | 34 | text_scale = max(1, image.shape[1] / 1600.) 35 | text_thickness = 1 if text_scale > 1.1 else 1 36 | line_thickness = max(1, int(image.shape[1] / 500.)) 37 | 38 | radius = max(5, int(im_w/140.)) 39 | cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)), 40 | (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2) 41 | 42 | for i, tlwh in enumerate(tlwhs): 43 | x1, y1, w, h = tlwh 44 | intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) 45 | obj_id = int(obj_ids[i]) 46 | id_text = '{}'.format(int(obj_id)) 47 | if ids2 is not None: 48 | id_text = id_text + ', {}'.format(int(ids2[i])) 49 | _line_thickness = 1 if obj_id <= 0 else line_thickness 50 | color = get_color(abs(obj_id)) 51 | cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness) 52 | cv2.putText(im, id_text, (intbox[0], intbox[1] + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), 53 | thickness=text_thickness) 54 | return im 55 | 56 | 57 | def plot_trajectory(image, tlwhs, track_ids): 58 | image = image.copy() 59 | for one_tlwhs, track_id in zip(tlwhs, track_ids): 60 | color = get_color(int(track_id)) 61 | for tlwh in one_tlwhs: 62 | x1, y1, w, h = tuple(map(int, tlwh)) 63 | cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2) 64 | 65 | return image 66 | 67 | 68 | def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None): 69 | im = np.copy(image) 70 | text_scale = max(1, image.shape[1] / 800.) 71 | thickness = 2 if text_scale > 1.3 else 1 72 | for i, det in enumerate(tlbrs): 73 | x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int) 74 | if len(det) >= 7: 75 | label = 'det' if det[5] > 0 else 'trk' 76 | if ids is not None: 77 | text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i]) 78 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), 79 | thickness=thickness) 80 | else: 81 | text = '{}# {:.2f}'.format(label, det[6]) 82 | 83 | if scores is not None: 84 | text = '{:.2f}'.format(scores[i]) 85 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), 86 | thickness=thickness) 87 | 88 | cv2.rectangle(im, (x1, y1), (x2, y2), color, 2) 89 | 90 | return im 91 | -------------------------------------------------------------------------------- /detector/tracker_cfg.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | cfg = edict() 4 | cfg.CONFIG = 'detector/tracker/cfg/yolov3.cfg' 5 | cfg.WEIGHTS = 'detector/tracker/data/jde.1088x608.uncertainty.pt' 6 | cfg.IMG_SIZE = (1088, 608) 7 | cfg.NMS_THRES = 0.6 8 | cfg.CONFIDENCE = 0.4 9 | cfg.BUFFER_SIZE = 30 # frame buffer -------------------------------------------------------------------------------- /detector/yolo/README.md: -------------------------------------------------------------------------------- 1 | # A PyTorch implementation of a YOLO v3 Object Detector 2 | 3 | Forked from https://github.com/ayooshkathuria/pytorch-yolo-v3 4 | -------------------------------------------------------------------------------- /detector/yolo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/yolo/__init__.py -------------------------------------------------------------------------------- /detector/yolo/cfg/tiny-yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40200 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=125 115 | activation=linear 116 | 117 | [region] 118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 119 | bias_match=1 120 | classes=20 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .6 134 | random=1 135 | -------------------------------------------------------------------------------- /detector/yolo/detect.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import time 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | import numpy as np 7 | import cv2 8 | from util import * 9 | import argparse 10 | import os 11 | import os.path as osp 12 | from darknet import Darknet 13 | from preprocess import prep_image, inp_to_image 14 | import pandas as pd 15 | import random 16 | import pickle as pkl 17 | import itertools 18 | 19 | 20 | if __name__ == '__main__': 21 | 22 | scales = "1,2,3" 23 | images = "imgs/messi.jpg" 24 | batch_size = 1 25 | confidence = 0.5 26 | nms_thesh = 0.4 27 | 28 | CUDA = torch.cuda.is_available() 29 | 30 | num_classes = 80 31 | classes = load_classes('data/coco.names') 32 | 33 | #Set up the neural network 34 | print("Loading network.....") 35 | model = Darknet("cfg/yolov3-spp.cfg") 36 | model.load_weights("yolov3-spp.weights") 37 | print("Network successfully loaded") 38 | 39 | model.net_info["height"] = "608" 40 | inp_dim = int(model.net_info["height"]) 41 | assert inp_dim % 32 == 0 42 | assert inp_dim > 32 43 | 44 | #If there's a GPU availible, put the model on GPU 45 | if CUDA: 46 | model.cuda() 47 | 48 | #Set the model in evaluation mode 49 | model.eval() 50 | 51 | #Detection phase 52 | try: 53 | imlist = [] 54 | imlist.append(osp.join(osp.realpath('.'), images)) 55 | except FileNotFoundError: 56 | print ("No file or directory with the name {}".format(images)) 57 | exit() 58 | 59 | batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))])) 60 | im_batches = [x[0] for x in batches] 61 | orig_ims = [x[1] for x in batches] 62 | im_dim_list = [x[2] for x in batches] 63 | im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) 64 | 65 | if CUDA: 66 | im_dim_list = im_dim_list.cuda() 67 | 68 | 69 | for batch in im_batches: 70 | #load the image 71 | if CUDA: 72 | batch = batch.cuda() 73 | with torch.no_grad(): 74 | prediction = model(Variable(batch), CUDA) 75 | 76 | prediction = write_results(prediction, confidence, num_classes, nms=True, nms_conf=nms_thesh) 77 | output = prediction 78 | 79 | if CUDA: 80 | torch.cuda.synchronize() 81 | 82 | try: 83 | output 84 | except NameError: 85 | print("No detections were made") 86 | exit() 87 | print(im_dim_list.shape) 88 | im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long()) 89 | 90 | scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1) 91 | 92 | 93 | output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2 94 | output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2 95 | 96 | output[:,1:5] /= scaling_factor 97 | 98 | for i in range(output.shape[0]): 99 | output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0]) 100 | output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1]) 101 | 102 | print(output) 103 | print(output.shape) 104 | -------------------------------------------------------------------------------- /detector/yolo/pallete: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/yolo/pallete -------------------------------------------------------------------------------- /detector/yolo/preprocess.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | import numpy as np 8 | import cv2 9 | import matplotlib.pyplot as plt 10 | try: 11 | from util import count_parameters as count 12 | from util import convert2cpu as cpu 13 | except ImportError: 14 | from yolo.util import count_parameters as count 15 | from yolo.util import convert2cpu as cpu 16 | from PIL import Image, ImageDraw 17 | 18 | 19 | def letterbox_image(img, inp_dim): 20 | '''resize image with unchanged aspect ratio using padding''' 21 | img_w, img_h = img.shape[1], img.shape[0] 22 | w, h = inp_dim 23 | new_w = int(img_w * min(w / img_w, h / img_h)) 24 | new_h = int(img_h * min(w / img_w, h / img_h)) 25 | resized_image = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC) 26 | 27 | canvas = np.full((inp_dim[1], inp_dim[0], 3), 128) 28 | 29 | canvas[(h - new_h) // 2:(h - new_h) // 2 + new_h, (w - new_w) // 2:(w - new_w) // 2 + new_w, :] = resized_image 30 | 31 | return canvas 32 | 33 | 34 | def prep_image(img, inp_dim): 35 | """ 36 | Prepare image for inputting to the neural network. 37 | 38 | Returns a Variable 39 | """ 40 | 41 | orig_im = cv2.imread(img) 42 | dim = orig_im.shape[1], orig_im.shape[0] 43 | img = (letterbox_image(orig_im, (inp_dim, inp_dim))) 44 | img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() 45 | img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) 46 | return img_, orig_im, dim 47 | 48 | 49 | def prep_frame(img, inp_dim): 50 | """ 51 | Prepare image for inputting to the neural network. 52 | 53 | Returns a Variable 54 | """ 55 | 56 | orig_im = img 57 | dim = orig_im.shape[1], orig_im.shape[0] 58 | img = (letterbox_image(orig_im, (inp_dim, inp_dim))) 59 | img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() 60 | img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) 61 | return img_, orig_im, dim 62 | 63 | 64 | def prep_image_pil(img, network_dim): 65 | orig_im = Image.open(img) 66 | img = orig_im.convert('RGB') 67 | dim = img.size 68 | img = img.resize(network_dim) 69 | img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())) 70 | img = img.view(*network_dim, 3).transpose(0, 1).transpose(0, 2).contiguous() 71 | img = img.view(1, 3, *network_dim) 72 | img = img.float().div(255.0) 73 | return (img, orig_im, dim) 74 | 75 | 76 | def inp_to_image(inp): 77 | inp = inp.cpu().squeeze() 78 | inp = inp * 255 79 | try: 80 | inp = inp.data.numpy() 81 | except RuntimeError: 82 | inp = inp.numpy() 83 | inp = inp.transpose(1, 2, 0) 84 | 85 | inp = inp[:, :, ::-1] 86 | return inp 87 | -------------------------------------------------------------------------------- /detector/yolo_cfg.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | cfg = edict() 4 | cfg.CONFIG = 'detector/yolo/cfg/yolov3-spp.cfg' 5 | cfg.WEIGHTS = 'detector/yolo/data/yolov3-spp.weights' 6 | cfg.INP_DIM = 608 7 | cfg.NMS_THRES = 0.6 8 | cfg.CONFIDENCE = 0.1 9 | cfg.NUM_CLASSES = 80 10 | -------------------------------------------------------------------------------- /detector/yolox/README.md: -------------------------------------------------------------------------------- 1 | # An implementation of PyTorch version YOLOX 2 | 3 | Forked and modified from https://github.com/Megvii-BaseDetection/YOLOX 4 | -------------------------------------------------------------------------------- /detector/yolox/exps/default/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | -------------------------------------------------------------------------------- /detector/yolox/exps/default/yolov3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | import torch.nn as nn 8 | 9 | from detector.yolox.yolox.exp import Exp as MyExp 10 | 11 | 12 | class Exp(MyExp): 13 | def __init__(self): 14 | super(Exp, self).__init__() 15 | self.depth = 1.0 16 | self.width = 1.0 17 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 18 | 19 | def get_model(self, sublinear=False): 20 | def init_yolo(M): 21 | for m in M.modules(): 22 | if isinstance(m, nn.BatchNorm2d): 23 | m.eps = 1e-3 24 | m.momentum = 0.03 25 | 26 | if "model" not in self.__dict__: 27 | from detector.yolox.yolox.models import YOLOX, YOLOFPN, YOLOXHead 28 | 29 | backbone = YOLOFPN() 30 | head = YOLOXHead( 31 | self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu" 32 | ) 33 | self.model = YOLOX(backbone, head) 34 | self.model.apply(init_yolo) 35 | self.model.head.initialize_biases(1e-2) 36 | 37 | return self.model 38 | -------------------------------------------------------------------------------- /detector/yolox/exps/default/yolox_l.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | from detector.yolox.yolox.exp import Exp as MyExp 8 | 9 | 10 | class Exp(MyExp): 11 | def __init__(self): 12 | super(Exp, self).__init__() 13 | self.depth = 1.0 14 | self.width = 1.0 15 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 16 | -------------------------------------------------------------------------------- /detector/yolox/exps/default/yolox_m.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | from detector.yolox.yolox.exp import Exp as MyExp 8 | 9 | 10 | class Exp(MyExp): 11 | def __init__(self): 12 | super(Exp, self).__init__() 13 | self.depth = 0.67 14 | self.width = 0.75 15 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 16 | -------------------------------------------------------------------------------- /detector/yolox/exps/default/yolox_nano.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | import torch.nn as nn 8 | 9 | from detector.yolox.yolox.exp import Exp as MyExp 10 | 11 | 12 | class Exp(MyExp): 13 | def __init__(self): 14 | super(Exp, self).__init__() 15 | self.depth = 0.33 16 | self.width = 0.25 17 | self.input_size = (416, 416) 18 | self.random_size = (10, 20) 19 | self.mosaic_scale = (0.5, 1.5) 20 | self.test_size = (416, 416) 21 | self.mosaic_prob = 0.5 22 | self.enable_mixup = False 23 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 24 | 25 | def get_model(self, sublinear=False): 26 | def init_yolo(M): 27 | for m in M.modules(): 28 | if isinstance(m, nn.BatchNorm2d): 29 | m.eps = 1e-3 30 | m.momentum = 0.03 31 | 32 | if "model" not in self.__dict__: 33 | from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead 34 | 35 | in_channels = [256, 512, 1024] 36 | # NANO model use depthwise = True, which is main difference. 37 | backbone = YOLOPAFPN( 38 | self.depth, 39 | self.width, 40 | in_channels=in_channels, 41 | act=self.act, 42 | depthwise=True, 43 | ) 44 | head = YOLOXHead( 45 | self.num_classes, 46 | self.width, 47 | in_channels=in_channels, 48 | act=self.act, 49 | depthwise=True, 50 | ) 51 | self.model = YOLOX(backbone, head) 52 | 53 | self.model.apply(init_yolo) 54 | self.model.head.initialize_biases(1e-2) 55 | return self.model 56 | -------------------------------------------------------------------------------- /detector/yolox/exps/default/yolox_s.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | from detector.yolox.yolox.exp import Exp as MyExp 8 | 9 | 10 | class Exp(MyExp): 11 | def __init__(self): 12 | super(Exp, self).__init__() 13 | self.depth = 0.33 14 | self.width = 0.50 15 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 16 | -------------------------------------------------------------------------------- /detector/yolox/exps/default/yolox_tiny.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | from detector.yolox.yolox.exp import Exp as MyExp 8 | 9 | 10 | class Exp(MyExp): 11 | def __init__(self): 12 | super(Exp, self).__init__() 13 | self.depth = 0.33 14 | self.width = 0.375 15 | self.input_size = (416, 416) 16 | self.mosaic_scale = (0.5, 1.5) 17 | self.random_size = (10, 20) 18 | self.test_size = (416, 416) 19 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 20 | self.enable_mixup = False 21 | -------------------------------------------------------------------------------- /detector/yolox/exps/default/yolox_x.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | from detector.yolox.yolox.exp import Exp as MyExp 8 | 9 | 10 | class Exp(MyExp): 11 | def __init__(self): 12 | super(Exp, self).__init__() 13 | self.depth = 1.33 14 | self.width = 1.25 15 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 16 | -------------------------------------------------------------------------------- /detector/yolox/exps/example/custom/nano.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import os 6 | 7 | import torch.nn as nn 8 | 9 | from yolox.exp import Exp as MyExp 10 | 11 | 12 | class Exp(MyExp): 13 | def __init__(self): 14 | super(Exp, self).__init__() 15 | self.depth = 0.33 16 | self.width = 0.25 17 | self.input_size = (416, 416) 18 | self.mosaic_scale = (0.5, 1.5) 19 | self.random_size = (10, 20) 20 | self.test_size = (416, 416) 21 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 22 | self.enable_mixup = False 23 | 24 | # Define yourself dataset path 25 | self.data_dir = "datasets/coco128" 26 | self.train_ann = "instances_train2017.json" 27 | self.val_ann = "instances_val2017.json" 28 | 29 | self.num_classes = 71 30 | 31 | def get_model(self, sublinear=False): 32 | def init_yolo(M): 33 | for m in M.modules(): 34 | if isinstance(m, nn.BatchNorm2d): 35 | m.eps = 1e-3 36 | m.momentum = 0.03 37 | 38 | if "model" not in self.__dict__: 39 | from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead 40 | 41 | in_channels = [256, 512, 1024] 42 | # NANO model use depthwise = True, which is main difference. 43 | backbone = YOLOPAFPN( 44 | self.depth, self.width, in_channels=in_channels, depthwise=True 45 | ) 46 | head = YOLOXHead( 47 | self.num_classes, self.width, in_channels=in_channels, depthwise=True 48 | ) 49 | self.model = YOLOX(backbone, head) 50 | 51 | self.model.apply(init_yolo) 52 | self.model.head.initialize_biases(1e-2) 53 | return self.model 54 | -------------------------------------------------------------------------------- /detector/yolox/exps/example/custom/yolox_s.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | import os 5 | 6 | from yolox.exp import Exp as MyExp 7 | 8 | 9 | class Exp(MyExp): 10 | def __init__(self): 11 | super(Exp, self).__init__() 12 | self.depth = 0.33 13 | self.width = 0.50 14 | self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] 15 | 16 | # Define yourself dataset path 17 | self.data_dir = "datasets/coco128" 18 | self.train_ann = "instances_train2017.json" 19 | self.val_ann = "instances_val2017.json" 20 | 21 | self.num_classes = 71 22 | 23 | self.max_epoch = 300 24 | self.data_num_workers = 4 25 | self.eval_interval = 1 26 | -------------------------------------------------------------------------------- /detector/yolox/tools/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | -------------------------------------------------------------------------------- /detector/yolox/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .preprocess import prep_image, prep_frame 2 | -------------------------------------------------------------------------------- /detector/yolox/utils/preprocess.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import numpy as np 5 | import cv2 6 | 7 | 8 | def preproc(img, input_size, swap=(2, 0, 1)): 9 | if len(img.shape) == 3: 10 | padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114 11 | else: 12 | padded_img = np.ones(input_size, dtype=np.uint8) * 114 13 | 14 | r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) 15 | resized_img = cv2.resize( 16 | img, 17 | (int(img.shape[1] * r), int(img.shape[0] * r)), 18 | interpolation=cv2.INTER_LINEAR, 19 | ).astype(np.uint8) 20 | padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img 21 | padded_img = padded_img.transpose(swap) 22 | padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) 23 | return padded_img, r 24 | 25 | 26 | def prep_image(img, img_size): 27 | """ 28 | Prepare image for inputting to the neural network. 29 | 30 | Returns a Variable 31 | """ 32 | 33 | orig_im = cv2.imread(img) 34 | dim = orig_im.shape[1], orig_im.shape[0] 35 | img_, _ = preproc(orig_im, img_size) 36 | img_ = torch.from_numpy(img_).unsqueeze(0).float() 37 | 38 | return img_, orig_im, dim 39 | 40 | 41 | def prep_frame(img, img_size): 42 | """ 43 | Prepare image for inputting to the neural network. 44 | 45 | Returns a Variable 46 | """ 47 | 48 | orig_im = img 49 | dim = orig_im.shape[1], orig_im.shape[0] 50 | img_, _ = preproc(orig_im, img_size) 51 | img_ = torch.from_numpy(img_).unsqueeze(0).float() 52 | 53 | return img_, orig_im, dim 54 | -------------------------------------------------------------------------------- /detector/yolox/yolox/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | __version__ = "0.3.0" 5 | -------------------------------------------------------------------------------- /detector/yolox/yolox/exp/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii Inc. All rights reserved. 4 | 5 | from .base_exp import BaseExp 6 | from .build import get_exp 7 | from .yolox_base import Exp 8 | -------------------------------------------------------------------------------- /detector/yolox/yolox/exp/base_exp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii Inc. All rights reserved. 4 | 5 | import ast 6 | import pprint 7 | from abc import ABCMeta, abstractmethod 8 | from typing import Dict 9 | 10 | # from tabulate import tabulate 11 | 12 | import torch 13 | from torch.nn import Module 14 | 15 | # from detector.yolox.yolox.utils import LRScheduler 16 | 17 | 18 | class BaseExp(metaclass=ABCMeta): 19 | """Basic class for any experiment.""" 20 | 21 | def __init__(self): 22 | self.seed = None 23 | self.output_dir = "./YOLOX_outputs" 24 | self.print_interval = 100 25 | self.eval_interval = 10 26 | 27 | @abstractmethod 28 | def get_model(self) -> Module: 29 | pass 30 | 31 | @abstractmethod 32 | def get_data_loader( 33 | self, batch_size: int, is_distributed: bool 34 | ) -> Dict[str, torch.utils.data.DataLoader]: 35 | pass 36 | 37 | @abstractmethod 38 | def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer: 39 | pass 40 | 41 | @abstractmethod 42 | def get_lr_scheduler(self, lr: float, iters_per_epoch: int, **kwargs): 43 | pass 44 | 45 | @abstractmethod 46 | def get_evaluator(self): 47 | pass 48 | 49 | @abstractmethod 50 | def eval(self, model, evaluator, weights): 51 | pass 52 | 53 | def __repr__(self): 54 | from tabulate import tabulate 55 | 56 | table_header = ["keys", "values"] 57 | exp_table = [ 58 | (str(k), pprint.pformat(v)) 59 | for k, v in vars(self).items() 60 | if not k.startswith("_") 61 | ] 62 | return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid") 63 | 64 | def merge(self, cfg_list): 65 | assert len(cfg_list) % 2 == 0 66 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]): 67 | # only update value with same key 68 | if hasattr(self, k): 69 | src_value = getattr(self, k) 70 | src_type = type(src_value) 71 | if src_value is not None and src_type != type(v): 72 | try: 73 | v = src_type(v) 74 | except Exception: 75 | v = ast.literal_eval(v) 76 | setattr(self, k, v) 77 | -------------------------------------------------------------------------------- /detector/yolox/yolox/exp/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii Inc. All rights reserved. 4 | 5 | import importlib 6 | import os 7 | import sys 8 | 9 | 10 | def get_exp_by_file(exp_file): 11 | try: 12 | sys.path.append(os.path.dirname(exp_file)) 13 | current_exp = importlib.import_module(os.path.basename(exp_file).split(".")[0]) 14 | exp = current_exp.Exp() 15 | except Exception: 16 | raise ImportError("{} doesn't contains class named 'Exp'".format(exp_file)) 17 | return exp 18 | 19 | 20 | def get_exp_by_name(exp_name): 21 | exp = exp_name.replace("-", "_") # convert string like "yolox-s" to "yolox_s" 22 | module_name = ".".join(["detector", "yolox", "yolox", "exp", "default", exp]) 23 | exp_object = importlib.import_module(module_name).Exp() 24 | return exp_object 25 | 26 | 27 | def get_exp(exp_file=None, exp_name=None): 28 | """ 29 | get Exp object by file or name. If exp_file and exp_name 30 | are both provided, get Exp by exp_file. 31 | 32 | Args: 33 | exp_file (str): file path of experiment. 34 | exp_name (str): name of experiment. "yolo-s", 35 | """ 36 | assert ( 37 | exp_file is not None or exp_name is not None 38 | ), "plz provide exp file or exp name." 39 | if exp_file is not None: 40 | return get_exp_by_file(exp_file) 41 | else: 42 | return get_exp_by_name(exp_name) 43 | -------------------------------------------------------------------------------- /detector/yolox/yolox/exp/default/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii Inc. All rights reserved. 4 | 5 | # This file is used for package installation and find default exp file 6 | 7 | import importlib 8 | import sys 9 | from pathlib import Path 10 | 11 | _EXP_PATH = Path(__file__).resolve().parent.parent.parent.parent / "exps" / "default" 12 | 13 | if _EXP_PATH.is_dir(): 14 | # This is true only for in-place installation (pip install -e, setup.py develop), 15 | # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230 16 | 17 | class _ExpFinder(importlib.abc.MetaPathFinder): 18 | def find_spec(self, name, path, target=None): 19 | if not name.startswith("detector.yolox.yolox.exp.default"): 20 | return 21 | project_name = name.split(".")[-1] + ".py" 22 | target_file = _EXP_PATH / project_name 23 | if not target_file.is_file(): 24 | return 25 | return importlib.util.spec_from_file_location(name, target_file) 26 | 27 | sys.meta_path.append(_ExpFinder()) 28 | -------------------------------------------------------------------------------- /detector/yolox/yolox/layers/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii Inc. All rights reserved. 4 | 5 | # import torch first to make jit op work without `ImportError of libc10.so` 6 | import torch # noqa 7 | 8 | from .jit_ops import FastCOCOEvalOp, JitOp 9 | 10 | try: 11 | from .fast_coco_eval_api import COCOeval_opt 12 | except ImportError: # exception will be raised when users build yolox from source 13 | pass 14 | -------------------------------------------------------------------------------- /detector/yolox/yolox/models/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii Inc. All rights reserved. 4 | 5 | from .build import * 6 | from .darknet import CSPDarknet, Darknet 7 | from .losses import IOUloss 8 | from .yolo_fpn import YOLOFPN 9 | from .yolo_head import YOLOXHead 10 | from .yolo_pafpn import YOLOPAFPN 11 | from .yolox import YOLOX 12 | -------------------------------------------------------------------------------- /detector/yolox/yolox/models/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import torch 5 | from torch import nn 6 | from torch.hub import load_state_dict_from_url 7 | 8 | __all__ = [ 9 | "create_yolox_model", 10 | "yolox_nano", 11 | "yolox_tiny", 12 | "yolox_s", 13 | "yolox_m", 14 | "yolox_l", 15 | "yolox_x", 16 | "yolov3", 17 | ] 18 | 19 | _CKPT_ROOT_URL = "https://github.com/Megvii-BaseDetection/YOLOX/releases/download" 20 | _CKPT_FULL_PATH = { 21 | "yolox-nano": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_nano.pth", 22 | "yolox-tiny": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_tiny.pth", 23 | "yolox-s": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_s.pth", 24 | "yolox-m": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_m.pth", 25 | "yolox-l": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_l.pth", 26 | "yolox-x": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_x.pth", 27 | "yolov3": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_darknet.pth", 28 | } 29 | 30 | 31 | def create_yolox_model( 32 | name: str, pretrained: bool = True, num_classes: int = 80, device=None 33 | ) -> nn.Module: 34 | """creates and loads a YOLOX model 35 | 36 | Args: 37 | name (str): name of model. for example, "yolox-s", "yolox-tiny". 38 | pretrained (bool): load pretrained weights into the model. Default to True. 39 | num_classes (int): number of model classes. Defalut to 80. 40 | device (str): default device to for model. Defalut to None. 41 | 42 | Returns: 43 | YOLOX model (nn.Module) 44 | """ 45 | from yolox.exp import get_exp, Exp 46 | 47 | if device is None: 48 | device = "cuda:0" if torch.cuda.is_available() else "cpu" 49 | device = torch.device(device) 50 | 51 | assert ( 52 | name in _CKPT_FULL_PATH 53 | ), f"user should use one of value in {_CKPT_FULL_PATH.keys()}" 54 | exp: Exp = get_exp(exp_name=name) 55 | exp.num_classes = num_classes 56 | yolox_model = exp.get_model() 57 | if pretrained and num_classes == 80: 58 | weights_url = _CKPT_FULL_PATH[name] 59 | ckpt = load_state_dict_from_url(weights_url, map_location="cpu") 60 | if "model" in ckpt: 61 | ckpt = ckpt["model"] 62 | yolox_model.load_state_dict(ckpt) 63 | 64 | yolox_model.to(device) 65 | return yolox_model 66 | 67 | 68 | def yolox_nano(pretrained=True, num_classes=80, device=None): 69 | return create_yolox_model("yolox-nano", pretrained, num_classes, device) 70 | 71 | 72 | def yolox_tiny(pretrained=True, num_classes=80, device=None): 73 | return create_yolox_model("yolox-tiny", pretrained, num_classes, device) 74 | 75 | 76 | def yolox_s(pretrained=True, num_classes=80, device=None): 77 | return create_yolox_model("yolox-s", pretrained, num_classes, device) 78 | 79 | 80 | def yolox_m(pretrained=True, num_classes=80, device=None): 81 | return create_yolox_model("yolox-m", pretrained, num_classes, device) 82 | 83 | 84 | def yolox_l(pretrained=True, num_classes=80, device=None): 85 | return create_yolox_model("yolox-l", pretrained, num_classes, device) 86 | 87 | 88 | def yolox_x(pretrained=True, num_classes=80, device=None): 89 | return create_yolox_model("yolox-x", pretrained, num_classes, device) 90 | 91 | 92 | def yolov3(pretrained=True, num_classes=80, device=None): 93 | return create_yolox_model("yolox-tiny", pretrained, num_classes, device) 94 | -------------------------------------------------------------------------------- /detector/yolox/yolox/models/losses.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # Copyright (c) Megvii Inc. All rights reserved. 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | 9 | class IOUloss(nn.Module): 10 | def __init__(self, reduction="none", loss_type="iou"): 11 | super(IOUloss, self).__init__() 12 | self.reduction = reduction 13 | self.loss_type = loss_type 14 | 15 | def forward(self, pred, target): 16 | assert pred.shape[0] == target.shape[0] 17 | 18 | pred = pred.view(-1, 4) 19 | target = target.view(-1, 4) 20 | tl = torch.max( 21 | (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) 22 | ) 23 | br = torch.min( 24 | (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) 25 | ) 26 | 27 | area_p = torch.prod(pred[:, 2:], 1) 28 | area_g = torch.prod(target[:, 2:], 1) 29 | 30 | en = (tl < br).type(tl.type()).prod(dim=1) 31 | area_i = torch.prod(br - tl, 1) * en 32 | area_u = area_p + area_g - area_i 33 | iou = (area_i) / (area_u + 1e-16) 34 | 35 | if self.loss_type == "iou": 36 | loss = 1 - iou**2 37 | elif self.loss_type == "giou": 38 | c_tl = torch.min( 39 | (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) 40 | ) 41 | c_br = torch.max( 42 | (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) 43 | ) 44 | area_c = torch.prod(c_br - c_tl, 1) 45 | giou = iou - (area_c - area_u) / area_c.clamp(1e-16) 46 | loss = 1 - giou.clamp(min=-1.0, max=1.0) 47 | 48 | if self.reduction == "mean": 49 | loss = loss.mean() 50 | elif self.reduction == "sum": 51 | loss = loss.sum() 52 | 53 | return loss 54 | -------------------------------------------------------------------------------- /detector/yolox/yolox/models/yolo_fpn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # Copyright (c) Megvii Inc. All rights reserved. 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .darknet import Darknet 9 | from .network_blocks import BaseConv 10 | 11 | 12 | class YOLOFPN(nn.Module): 13 | """ 14 | YOLOFPN module. Darknet 53 is the default backbone of this model. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | depth=53, 20 | in_features=["dark3", "dark4", "dark5"], 21 | ): 22 | super().__init__() 23 | 24 | self.backbone = Darknet(depth) 25 | self.in_features = in_features 26 | 27 | # out 1 28 | self.out1_cbl = self._make_cbl(512, 256, 1) 29 | self.out1 = self._make_embedding([256, 512], 512 + 256) 30 | 31 | # out 2 32 | self.out2_cbl = self._make_cbl(256, 128, 1) 33 | self.out2 = self._make_embedding([128, 256], 256 + 128) 34 | 35 | # upsample 36 | self.upsample = nn.Upsample(scale_factor=2, mode="nearest") 37 | 38 | def _make_cbl(self, _in, _out, ks): 39 | return BaseConv(_in, _out, ks, stride=1, act="lrelu") 40 | 41 | def _make_embedding(self, filters_list, in_filters): 42 | m = nn.Sequential( 43 | *[ 44 | self._make_cbl(in_filters, filters_list[0], 1), 45 | self._make_cbl(filters_list[0], filters_list[1], 3), 46 | self._make_cbl(filters_list[1], filters_list[0], 1), 47 | self._make_cbl(filters_list[0], filters_list[1], 3), 48 | self._make_cbl(filters_list[1], filters_list[0], 1), 49 | ] 50 | ) 51 | return m 52 | 53 | def load_pretrained_model(self, filename="./weights/darknet53.mix.pth"): 54 | with open(filename, "rb") as f: 55 | state_dict = torch.load(f, map_location="cpu") 56 | print("loading pretrained weights...") 57 | self.backbone.load_state_dict(state_dict) 58 | 59 | def forward(self, inputs): 60 | """ 61 | Args: 62 | inputs (Tensor): input image. 63 | 64 | Returns: 65 | Tuple[Tensor]: FPN output features.. 66 | """ 67 | # backbone 68 | out_features = self.backbone(inputs) 69 | x2, x1, x0 = [out_features[f] for f in self.in_features] 70 | 71 | # yolo branch 1 72 | x1_in = self.out1_cbl(x0) 73 | x1_in = self.upsample(x1_in) 74 | x1_in = torch.cat([x1_in, x1], 1) 75 | out_dark4 = self.out1(x1_in) 76 | 77 | # yolo branch 2 78 | x2_in = self.out2_cbl(out_dark4) 79 | x2_in = self.upsample(x2_in) 80 | x2_in = torch.cat([x2_in, x2], 1) 81 | out_dark3 = self.out2(x2_in) 82 | 83 | outputs = (out_dark3, out_dark4, x0) 84 | return outputs 85 | -------------------------------------------------------------------------------- /detector/yolox/yolox/models/yolox.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # Copyright (c) Megvii Inc. All rights reserved. 4 | 5 | import torch.nn as nn 6 | 7 | from .yolo_head import YOLOXHead 8 | from .yolo_pafpn import YOLOPAFPN 9 | 10 | 11 | class YOLOX(nn.Module): 12 | """ 13 | YOLOX model module. The module list is defined by create_yolov3_modules function. 14 | The network returns loss values from three YOLO layers during training 15 | and detection results during test. 16 | """ 17 | 18 | def __init__(self, backbone=None, head=None): 19 | super().__init__() 20 | if backbone is None: 21 | backbone = YOLOPAFPN() 22 | if head is None: 23 | head = YOLOXHead(80) 24 | 25 | self.backbone = backbone 26 | self.head = head 27 | 28 | def forward(self, x, targets=None): 29 | # fpn output content features of [dark3, dark4, dark5] 30 | fpn_outs = self.backbone(x) 31 | 32 | if self.training: 33 | assert targets is not None 34 | loss, iou_loss, conf_loss, cls_loss, l1_loss, num_fg = self.head( 35 | fpn_outs, targets, x 36 | ) 37 | outputs = { 38 | "total_loss": loss, 39 | "iou_loss": iou_loss, 40 | "l1_loss": l1_loss, 41 | "conf_loss": conf_loss, 42 | "cls_loss": cls_loss, 43 | "num_fg": num_fg, 44 | } 45 | else: 46 | outputs = self.head(fpn_outs) 47 | 48 | return outputs 49 | -------------------------------------------------------------------------------- /detector/yolox/yolox/tools/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii Inc. All rights reserved. 4 | 5 | # This file is used for package installation. Script of train/eval/export will be available. 6 | 7 | import importlib 8 | import sys 9 | from pathlib import Path 10 | 11 | _TOOLS_PATH = Path(__file__).resolve().parent.parent.parent / "tools" 12 | 13 | if _TOOLS_PATH.is_dir(): 14 | # This is true only for in-place installation (pip install -e, setup.py develop), 15 | # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230 16 | 17 | class _PathFinder(importlib.abc.MetaPathFinder): 18 | def find_spec(self, name, path, target=None): 19 | if not name.startswith("yolox.tools."): 20 | return 21 | project_name = name.split(".")[-1] + ".py" 22 | target_file = _TOOLS_PATH / project_name 23 | if not target_file.is_file(): 24 | return 25 | return importlib.util.spec_from_file_location(name, target_file) 26 | 27 | sys.meta_path.append(_PathFinder()) 28 | -------------------------------------------------------------------------------- /detector/yolox/yolox/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii Inc. All rights reserved. 4 | 5 | from .boxes import * 6 | from .compat import meshgrid 7 | -------------------------------------------------------------------------------- /detector/yolox/yolox/utils/compat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import torch 5 | 6 | _TORCH_VER = [int(x) for x in torch.__version__.split(".")[:2]] 7 | 8 | __all__ = ["meshgrid"] 9 | 10 | 11 | def meshgrid(*tensors): 12 | if _TORCH_VER >= [1, 10]: 13 | return torch.meshgrid(*tensors, indexing="ij") 14 | else: 15 | return torch.meshgrid(*tensors) 16 | -------------------------------------------------------------------------------- /detector/yolox_cfg.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | cfg = edict() 4 | cfg.MODEL_NAME = "yolox-x" 5 | cfg.MODEL_WEIGHTS = "detector/yolox/data/yolox_x.pth" 6 | cfg.INP_DIM = 640 7 | cfg.CONF_THRES = 0.1 8 | cfg.NMS_THRES = 0.6 9 | -------------------------------------------------------------------------------- /docs/CrowdPose.md: -------------------------------------------------------------------------------- 1 | ## [CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark](https://arxiv.org/abs/1812.00324) *(accepted to CVPR2019)* 2 | 3 |

4 | 5 |

6 | 7 | ## Introduction 8 | Our proposed method surpasses the state-of-the-art methods on [CrowdPose](https://arxiv.org/abs/1812.00324) dataset by **5** mAP and results on MSCOCO dataset demonstrate the generalization ability of our method (comparatively **0.8** mAP higher). Images in our proposed CrowdPose dataset have a uniform distribution of *Crowd Index* among \[0, 1\]. 9 | 10 | ## Code 11 | We provide [evaluation tools](https://github.com/Jeff-sjtu/CrowdPose) for CrowdPose dataset. Our evaluation tools is developed based on [@cocodataset/cocoapi](https://github.com/cocodataset/cocoapi). The source code of our model is integrated into [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose/tree/pytorch). 12 | 13 | ## Quick Start 14 | Run with `matching` option to use the matching algorithm in CrowdPose. 15 | 16 | - **Input dir**: Run AlphaPose for all images in a folder with: 17 | ``` 18 | # pytorch branch 19 | python3 demo.py --indir ${img_directory} --outdir examples/res --matching 20 | ``` 21 | 22 | ## Dataset 23 | [Train + Validation + Test Images](https://drive.google.com/file/d/1VprytECcLtU4tKP32SYi_7oDRbw7yUTL/view?usp=sharing) (Google Drive) 24 | 25 | [Annotations](https://drive.google.com/open?id=196vsma1uuLLCcUt1NrXp1K8PBU6tVH8w) (Google Drive) 26 | 27 | ## Results 28 | 29 | **Results on CrowdPose Validation:** 30 | 31 | *Compare with state-of-the-art methods* 32 |
33 | 34 | | Method | AP @0.5:0.95 | AP @0.5 | AP @0.75 | AR @0.5:0.95 | AR @0.5 | AR @0.75 | 35 | |:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:| 36 | | Detectron (Mask R-CNN) | 57.2 | 83.5 | 60.3 | 65.9 | 89.3 | 69.4 | 37 | | Simple Pose (Xiao *et al.*) | 60.8 | 81.4 | 65.7 | 67.3 | 86.3 | 71.8 | 38 | | **Ours** | **66.0** | **84.2** | **71.5** | **72.7** | **89.5** | **77.5** | 39 | 40 |
41 | 42 | *Compare with open-source systems* 43 |
44 | 45 | | Method | AP @*Easy* | AP @*Medium* | AP @*Hard* | FPS | 46 | |:-------|:-----:|:-------:|:-------:|:-------:| 47 | | OpenPose (CMU-Pose) | 62.7 | 48.7 | 32.3 | 5.3 | 48 | | Detectron (Mask R-CNN) | 69.4 | 57.9 | 45.8 | 2.9 | 49 | | **Ours** | **75.5** | **66.3** | **57.4** | **10.1** | 50 | 51 |
52 | 53 | **Results on MSCOCO Validation:** 54 |
55 | 56 | | Method | AP @0.5:0.95 | AR @0.5:0.95 | 57 | |:-------|:-----:|:-------:| 58 | | Detectron (Mask R-CNN) | 64.8 | 71.1 | 59 | | Simple Pose (Xiao *et al.*) | 69.8 | 74.1 | 60 | | **AlphaPose** | **70.9** | **76.4** | 61 | 62 |
63 | 64 | ## Contributors 65 | CrowdPose is authored by [Jiefeng Li](http://jeff-leaf.site/), [Can Wang](https://github.com/Canwang-sjtu), [Hao Zhu](https://github.com/BernieZhu), [Yihuan Mao](), [Hao-Shu Fang](https://fang-haoshu.github.io/), and [Cewu Lu](http://www.mvig.org/). 66 | -------------------------------------------------------------------------------- /docs/GETTING_STARTED.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | ## Flags 4 | Checkout the [run.md](./run.md) for all flags. 5 | 6 | ## Example Inference 7 | - **Input dir**: Run AlphaPose for all images in a folder with: 8 | ``` bash 9 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --indir ${img_directory} --outdir ${output_directory} 10 | ``` 11 | - **Choose a different detector**: Default detector is yolov3-spp, it works pretty well, if you want to use yolox series, remember to download their weight according to our installation readme. Options include [yolox-x|yolox-l|yolox-m|yolox-s|yolox-darknet]: 12 | ``` bash 13 | python scripts/demo_inference.py --detector yolox-x --cfg ${cfg_file} --checkpoint ${trained_model} --indir ${img_directory} --outdir ${output_directory} 14 | ``` 15 | - **Video**: Run AlphaPose for a video and save the rendered video with: 16 | ``` bash 17 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --video ${path to video} --outdir examples/res --save_video 18 | ``` 19 | - **Webcam**: Run AlphaPose using default webcam and visualize the results with: 20 | ``` bash 21 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --outdir examples/res --vis --webcam 0 22 | ``` 23 | - **Input list**: Run AlphaPose for images in a list and save the rendered images with: 24 | ``` bash 25 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --list examples/list-coco-demo.txt --indir ${img_directory} --outdir examples/res --save_img 26 | ``` 27 | - **Only-cpu/Multi-gpus**: Run AlphaPose for images in a list by cpu only or multi gpus: 28 | ``` bash 29 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --list examples/list-coco-demo.txt --indir ${img_directory} --outdir examples/res --gpus ${-1(cpu only)/0,1,2,3(multi-gpus)} 30 | ``` 31 | - **Re-ID Track(Experimental)**: Run AlphaPose for tracking persons in a video by human re-id algorithm: 32 | ``` bash 33 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --video ${path to video} --outdir examples/res --pose_track --save_video 34 | ``` 35 | - **Simple Track(Experimental)**: Run AlphaPose for tracking persons in a video by MOT tracking algorithm: 36 | ``` bash 37 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --video ${path to video} --outdir examples/res --detector tracker --save_video 38 | ``` 39 | - **Pose Flow(not ready)**: Run AlphaPose for tracking persons in a video by embedded PoseFlow algorithm: 40 | ``` bash 41 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --video ${path to video} --outdir examples/res --pose_flow --save_video 42 | ``` 43 | 44 | 45 | ## Options 46 | - **Note**: If you meet OOM(out of memory) problem, decreasing the pose estimation batch until the program can run on your computer: 47 | ``` bash 48 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --indir ${img_directory} --outdir examples/res --detbatch 1 --posebatch 30 49 | ``` 50 | - **Getting more accurate**: You can use larger input for pose network to improve performance e.g.: 51 | ``` 52 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --indir ${img_directory} --outdir ${output_directory} --flip 53 | ``` 54 | - **Speeding up**: Checkout the [speed_up.md](./speed_up.md) for more details. 55 | 56 | ## Output format 57 | Checkout the [output.md](./output.md) for more details. 58 | -------------------------------------------------------------------------------- /docs/alphapose_136.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/alphapose_136.gif -------------------------------------------------------------------------------- /docs/alphapose_17.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/alphapose_17.gif -------------------------------------------------------------------------------- /docs/alphapose_26.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/alphapose_26.gif -------------------------------------------------------------------------------- /docs/alphapose_hybrik_smpl.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/alphapose_hybrik_smpl.gif -------------------------------------------------------------------------------- /docs/contributors.md: -------------------------------------------------------------------------------- 1 | AlphaPose - Authors and Contributors 2 | ==================================== 3 | 4 | 5 | 6 | ### Authors 7 | AlphaPose is authored by [Hao-Shu Fang\*](https://fang-haoshu.github.io/), [Jiefeng Li\*](https://jeff-leaf.site/), Hongyang Tang, Chao Xu, Haoyi Zhu, [Yuliang Xiu](http://xiuyuliang.cn/), Yong-Lu Li, and [Cewu Lu](www.mvig.org). Cewu Lu is corresponding author. 8 | 9 | 10 | 11 | ### Contributors 12 | We would also like to thank the following people who have highly contributed to AlphaPose: 13 | 14 | [Hongyang Tang](): AlphaPose Tracking module developer 15 | 16 | [Chao Xu](): AlphaPose pytorch 0.3.0 version contributor 17 | 18 | [Haoyi Zhu](): AlphaPose whole-body keypoints developer 19 | 20 | [Chenxi Wang](): AlphaPose mxnet version main developer 21 | 22 | [Chongwei Liu](): AlphaPose c++ version developer 23 | 24 | [Ruiheng Chang](): AlphaPose master version(old) detection module 25 | -------------------------------------------------------------------------------- /docs/crowdpose.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/crowdpose.gif -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | AlphaPose - Frequently Asked Question (FAQ) 2 | ============================================ 3 | 4 | ## FAQ 5 | 1. [Can't open webcan or video file](#Can't-open-webcan-or-video-file) 6 | 7 | ## FAQ 8 | ### Can't open webcam or video file 9 | **Q:** - I can't open the webcam or video file. 10 | 11 | **A**: Try re-install `opencv-python` with version >= 3.3.1.11 by 12 | ``` 13 | pip3 uninstall opencv_python 14 | pip3 install opencv_python --user 15 | ``` 16 | Many people meet this problem at https://github.com/opencv/opencv/issues/8471. The solution I use is 17 | ``` 18 | sudo cp /build/lib/python3/cv2.cpython-35m-x86_64-linux-gnu.so /usr/local/lib/python3.5/dist-packages/cv2/cv2.cpython-35m-x86_64-linux-gnu.so 19 | ``` 20 | The idea is to replace the cv2.so library provided by pypi with the one compiled from sources. You can check for more info at https://github.com/opencv/opencv/issues/8471. 21 | 22 | ### Can't open webcam 23 | **Q:** - I can't open the webcam with the latest `opencv-python` 24 | 25 | **A**: Check if your device is valid by 26 | ``` 27 | ls /dev/video* 28 | ``` 29 | Usually you can find `video0`, but if you have a device with other index like `video3`, you can run the program by 30 | ``` 31 | # main branch 32 | python scripts/demo_inference.py --cfg ${CONFIG} --checkpoint ${CKPT} --webcam 3 --outdir ${OUTDIR} --detector yolo --vis 33 | 34 | # pytorch branch 35 | python3 webcam_demo.py --webcam 3 --outdir examples/res --vis 36 | ``` 37 | 38 | ### Program crash 39 | **Q1:** - I meet `Killed` when processing heavy task, like large videos or images with crowded persons. 40 | 41 | **A**: Your system meets out of cpu memory and kills the program autoly. Please reduce the length of result buffer by setting the `--qsize` flag. By default length, free cpu memory over 70G+ is recommended in heavy task. 42 | 43 | **Q2:** - I meet segmentation fault when processing heavy task, like large videos or images with crowded persons. 44 | 45 | **A**: The parallelization module `torch.multiprocessing` is prone to shared memory leaks. Its garbage collection mechanism `torch_shm_manager` may cause segmentation fault under long-time heavy load. We found this issue when processing large videos with hundreds of persons. To avoid this issue, you can set `--sp` flag to use multi-thread instead, which sacrifices a little efficiency for more stablity. 46 | -------------------------------------------------------------------------------- /docs/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/logo.jpg -------------------------------------------------------------------------------- /docs/pose.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/pose.gif -------------------------------------------------------------------------------- /docs/posetrack.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/posetrack.gif -------------------------------------------------------------------------------- /docs/posetrack2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/posetrack2.gif -------------------------------------------------------------------------------- /docs/run.md: -------------------------------------------------------------------------------- 1 | AlphaPose Usage & Examples 2 | ==================================== 3 | 4 | Here, we first list the flags and other parameters you can tune. Default parameters work well and you don't need to tune them unless you know what you are doing. 5 | 6 | ## Flags 7 | - `--cfg`: Experiment configure file name 8 | - `--checkpoint`: Experiment checkpoint file name 9 | - `--sp`: Run the program using a single process. Windows users need to turn this flag on. 10 | - `--detector`: Detector you can use, yolo/tracker. For [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX), you should specify the model, i.e. yolox-l or yolox-x, etc. 11 | - `--indir`: Directory of the input images. All the images in the directory will be processed. 12 | - `--list`: A text file list for the input images 13 | - `--image`: Read single image and process. 14 | - `--video`: Read video and process the video frame by frame. 15 | - `--outdir`: Output directory to store the pose estimation results. 16 | - `--vis`: If turned-on, it will render the results and visualize them. 17 | - `--save_img`: If turned-on, it will render the results and save them as images in $outdir/vis. 18 | - `--save_video`: If turned-on, it will render the results and save them as a video. 19 | - `--vis_fast`: If turned on, it will use faster rendering method. Default is false. 20 | - `--format`: The format of the saved results. By default, it will save the output in COCO-like format. Alternative options are 'cmu' and 'open', which saves the results in the format of CMU-Pose or OpenPose. For more details, see [output.md](output.md) 21 | 22 | - `--detbatch`: Batch size for the detection network. 23 | - `--posebatch`: Maximum batch size for the pose estimation network. If you met OOM problem, decrease this value until it fit in the memory. 24 | - `--flip`: Enable flip testing. Can increase the accuracy. 25 | - `--min_box_area`: Min box area to filter out, you can set it like 100 to filter out small people. 26 | - `--gpus`: Choose which cuda device to use by index and input comma to use multi gpus, e.g. 0,1,2,3. (input -1 for cpu only) 27 | 28 | - `--pose_track`: Enable tracking pipeline with human re-id feature, it is currently the best performance pose tracker 29 | - `--pose_flow`: This flag will be depreciated. It enables the old tracking version of PoseFlow. 30 | 31 | All the flags available here: [link](../scripts/demo_inference.py#L22) 32 | 33 | 34 | ## Parameters 35 | 1. yolo detector config is [here](../detector/yolo_cfg.py) 36 | - `CONFIDENCE`: Confidence threshold for human detection. Lower the value can improve the final accuracy but decrease the speed. Default is 0.05. 37 | - `NMS_THRES`: NMS threshold for human detection. Increase the value can improve the final accuracy but decrease the speed. Default is 0.6. 38 | - `INP_DIM`: The input size of detection network. The inp_dim should be multiple of 32. Default is 608. Increase it may improve the accuracy. 39 | -------------------------------------------------------------------------------- /docs/speed_up.md: -------------------------------------------------------------------------------- 1 | AlphaPose - Speeding Up 2 | ============================================ 3 | 4 | 5 | 1. Run AlphaPose for a video, speeding up by increasing the confidence, lowering the NMS threshold, lowering the input resolution of detector in `detector/yolo_cfg.py` 6 | ``` 7 | cfg.NMS_THRES = 0.45 8 | cfg.CONFIDENCE = 0.5 9 | cfg.INP_DIM = 420 10 | ``` 11 | It may miss some people though. 12 | 13 | 2. Increase the detbatch and posebatch by setting the `--detbatch` and `--posebatch` flag if you have large GPU memory. 14 | -------------------------------------------------------------------------------- /docs/step1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/step1.jpg -------------------------------------------------------------------------------- /docs/step2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/step2.jpg -------------------------------------------------------------------------------- /docs/step3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/step3.jpg -------------------------------------------------------------------------------- /docs/step4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/step4.jpg -------------------------------------------------------------------------------- /docs/win_install.md: -------------------------------------------------------------------------------- 1 | AlphaPose - Windows Installation 2 | ============================================ 3 | 4 | Tested on Win10 with GTX 1060 5 | 6 | 1. Download and install Git for Windows: https://git-scm.com/download/win 7 | 2. Download and install Python3(3.6 or 3.7): https://www.python.org/downloads/ 8 | 3. Download and install CUDA toolkit: https://developer.nvidia.com/cuda-downloads 9 | 4. Download and install PyTorch: https://pytorch.org/ 10 | Right click, choose the "Git Bash Here" 11 |
12 | 13 |
14 | Input the command here and press Enter 15 |
16 | 17 |
18 | 5. Download win-64/intel-openmp-2018.0.0-8.tar.bz2: https://anaconda.org/anaconda/intel-openmp/files 19 | put the .dll files in Library\bin into a dir, then append the path of it to the environment variable PATH. 20 | I suggest you to put it in C:\Users\\bin since this dir is already in the PATH 21 | 6. Right click, choose the "Git Bash Here" and then follow the instructions in README to install AlphaPose 22 |
23 | 24 |
25 | 7. Have fun! Now you can run AlphaPose by entering command. Try webcam demo by input 26 | ``` 27 | # pytorch branch 28 | python3 webcam_demo.py --vis 29 | ``` 30 | **Note:** For `demo.py` and `video_demo.py`, you need to turn on the `--sp` flag, like 31 | 32 | ``` 33 | # pytorch branch 34 | python3 demo.py --indir ${img_directory} --outdir examples/res --sp 35 | ``` 36 | 37 |
38 | 39 |
40 | -------------------------------------------------------------------------------- /examples/demo/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/examples/demo/1.jpg -------------------------------------------------------------------------------- /examples/demo/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/examples/demo/2.jpg -------------------------------------------------------------------------------- /examples/demo/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/examples/demo/3.jpg -------------------------------------------------------------------------------- /examples/list-coco-demo.txt: -------------------------------------------------------------------------------- 1 | 000000375530.jpg 2 | 000000244462.jpg 3 | 000000113397.jpg 4 | 000000113408.jpg 5 | 000000375554.jpg 6 | 000000171819.jpg 7 | 000000375566.jpg 8 | 000000244496.jpg 9 | 000000139077.jpg 10 | 000000506656.jpg 11 | 000000375606.jpg 12 | 000000244539.jpg 13 | 000000565045.jpg 14 | 000000113473.jpg 15 | 000000375621.jpg 16 | 000000244550.jpg 17 | 000000492605.jpg 18 | 000000506707.jpg 19 | 000000113493.jpg 20 | 000000215524.jpg 21 | -------------------------------------------------------------------------------- /model_files/J_regressor_h36m.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/model_files/J_regressor_h36m.npy -------------------------------------------------------------------------------- /model_files/h36m_mean_beta.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/model_files/h36m_mean_beta.npy -------------------------------------------------------------------------------- /model_files/smpl_faces.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/model_files/smpl_faces.npy -------------------------------------------------------------------------------- /pretrained_models/get_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/pretrained_models/get_models.sh -------------------------------------------------------------------------------- /scripts/inference.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | CONFIG=$1 4 | CKPT=$2 5 | VIDEO=$3 6 | OUTDIR=${4:-"./examples/res"} 7 | 8 | python scripts/demo_inference.py \ 9 | --cfg ${CONFIG} \ 10 | --checkpoint ${CKPT} \ 11 | --video ${VIDEO} \ 12 | --outdir ${OUTDIR} \ 13 | --detector yolo --save_img --save_video 14 | -------------------------------------------------------------------------------- /scripts/inference_3d.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | CONFIG=$1 4 | CKPT=$2 5 | VIDEO=$3 6 | OUTDIR=${4:-"./examples/res"} 7 | 8 | python scripts/demo_3d_inference.py \ 9 | --cfg ${CONFIG} \ 10 | --checkpoint ${CKPT} \ 11 | --video ${VIDEO} \ 12 | --outdir ${OUTDIR} \ 13 | --detector yolo --save_img --save_video --pose_track 14 | -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | CONFIG=$1 4 | EXPID=${2:-"alphapose"} 5 | 6 | python ./scripts/train.py \ 7 | --exp-id ${EXPID} \ 8 | --cfg ${CONFIG} 9 | -------------------------------------------------------------------------------- /scripts/validate.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | CONFIG=$1 4 | CKPT=$2 5 | BATCH=${3:-"64"} 6 | GPUS=${4:-"0,1,2,3"} 7 | 8 | python ./scripts/validate.py \ 9 | --cfg ${CONFIG} \ 10 | --batch ${BATCH} \ 11 | --gpus $GPUS\ 12 | --flip-test \ 13 | --checkpoint ${CKPT} 14 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [easy_install] 2 | index_url=https://pypi.tuna.tsinghua.edu.cn/simple 3 | -------------------------------------------------------------------------------- /trackers/PoseFlow/parallel_process.py: -------------------------------------------------------------------------------- 1 | # adapted from http://danshiebler.com/2016-09-14-parallel-progress-bar/ 2 | from tqdm import tqdm 3 | from concurrent.futures import ProcessPoolExecutor, as_completed 4 | 5 | def parallel_process(array, function, n_jobs=16, use_kwargs=False, front_num=3): 6 | """ 7 | A parallel version of the map function with a progress bar. 8 | 9 | Args: 10 | array (array-like): An array to iterate over. 11 | function (function): A python function to apply to the elements of array 12 | n_jobs (int, default=16): The number of cores to use 13 | use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of 14 | keyword arguments to function 15 | front_num (int, default=3): The number of iterations to run serially before kicking off the parallel job. 16 | Useful for catching bugs 17 | Returns: 18 | [function(array[0]), function(array[1]), ...] 19 | """ 20 | #We run the first few iterations serially to catch bugs 21 | if front_num > 0: 22 | front = [function(**a) if use_kwargs else function(*a) for a in array[:front_num]] 23 | #If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging. 24 | if n_jobs==1: 25 | return front + [function(**a) if use_kwargs else function(*a) for a in tqdm(array[front_num:])] 26 | #Assemble the workers 27 | with ProcessPoolExecutor(max_workers=n_jobs) as pool: 28 | #Pass the elements of array into function 29 | if use_kwargs: 30 | futures = [pool.submit(function, **a) for a in array[front_num:]] 31 | else: 32 | futures = [pool.submit(function, *a) for a in array[front_num:]] 33 | kwargs = { 34 | 'total': len(futures), 35 | 'unit': 'it', 36 | 'unit_scale': True, 37 | 'leave': True 38 | } 39 | #Print out the progress as tasks complete 40 | for f in tqdm(as_completed(futures), **kwargs): 41 | pass 42 | out = [] 43 | #Get the results from the futures. 44 | for i, future in enumerate(futures): 45 | try: 46 | out.append(future.result()) 47 | except Exception as e: 48 | out.append(e) 49 | return front + out -------------------------------------------------------------------------------- /trackers/PoseFlow/posetrack1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/PoseFlow/posetrack1.gif -------------------------------------------------------------------------------- /trackers/PoseFlow/posetrack2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/PoseFlow/posetrack2.gif -------------------------------------------------------------------------------- /trackers/PoseFlow/posetrack_data: -------------------------------------------------------------------------------- 1 | /home/yuliang/data/posetrack_data/posetrack_data -------------------------------------------------------------------------------- /trackers/PoseFlow/poseval: -------------------------------------------------------------------------------- 1 | /home/yuliang/data/posetrack_data/poseval -------------------------------------------------------------------------------- /trackers/PoseFlow/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.14.5 2 | scipy==1.1.0 3 | opencv_python==3.4.2.16 4 | opencv_contrib_python==3.4.2.16 5 | matplotlib==2.2.2 6 | tqdm==4.23.4 7 | Image==1.5.25 8 | Pillow==5.3.0 9 | munkres==1.0.12 10 | -------------------------------------------------------------------------------- /trackers/README.md: -------------------------------------------------------------------------------- 1 | # Pose Tracking Module for AlphaPose 2 | 3 | AlphaPose provide three different tracking methods for now, you can try different method to see which one is better for you. 4 | 5 | ## 1. Human-ReID based tracking (Recommended) 6 | Currently the best performance tracking model. Paper coming soon. 7 | 8 | #### Getting started 9 | Download human reid model ( [Google drive](https://drive.google.com/file/d/1myNKfr2cXqiHZVXaaG8ZAq_U2UpeOLfG/view?usp=share_link) or [Baidu Pan, code:6a8b](https://pan.baidu.com/s/1IoAHehdjJ0ucQl8p_4hfRw))and place it into `AlphaPose/trackers/weights/`. 10 | 11 | Then simply run alphapose with additional flag `--pose_track` 12 | 13 | You can try different person reid model by modifing `cfg.arch` and `cfg.loadmodel` in `./trackers/tracker_cfg.py`. 14 | 15 | If you want to train your own reid model, please refer to this [project](https://github.com/KaiyangZhou/deep-person-reid) 16 | 17 | #### Demo 18 | ``` bash 19 | ./scripts/inference.sh ${CONFIG} ${CHECKPOINT} ${VIDEO_NAME} ${OUTPUT_DIR}, --pose_track 20 | ``` 21 | #### Todo 22 | - [] Evaluation Tools for PoseTrack 23 | - [] More Models 24 | - [] Training code for [PoseTrack Dataset](https://posetrack.net/) 25 | 26 | ## 2. Detector based human tracking 27 | Use a human detecter with tracking module (JDE). Please refer to [detector/tracker/](../detector/tracker/) 28 | 29 | #### Getting started 30 | Download detector [JDE-1088x608](https://github.com/Zhongdao/Towards-Realtime-MOT#pretrained-model-and-baseline-models) and place it under `AlphaPose/detector/tracker/data/` 31 | 32 | Enable tracking by setting the detector as tracker: `--detector tracker` 33 | #### Demo 34 | ``` bash 35 | ./scripts/inference.sh ${CONFIG} ${CHECKPOINT} ${VIDEO_NAME} ${OUTPUT_DIR}, --detector tracker 36 | ``` 37 | 38 | ## 3. PoseFlow human tracking 39 | This tracker is based on our BMVC 2018 paper PoseFlow, for more info please refer to [PoseFlow/README.md](PoseFlow/) 40 | 41 | #### Getting started 42 | 43 | Simply run alphapose with additional flag `--pose_flow` 44 | -------------------------------------------------------------------------------- /trackers/ReidModels/ResBnLin.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: tanghy 4 | @contact: thutanghy@gmail.com 5 | """ 6 | import torch 7 | from torch import nn 8 | import torch.nn.functional as F 9 | from ReidModels.ResNet import build_resnet_backbone 10 | from ReidModels.bn_linear import BNneckLinear 11 | class SpatialAttn(nn.Module): 12 | """Spatial Attention Layer""" 13 | def __init__(self): 14 | super(SpatialAttn, self).__init__() 15 | 16 | def forward(self, x): 17 | # global cross-channel averaging # e.g. 32,2048,24,8 18 | x = x.mean(1, keepdim=True) # e.g. 32,1,24,8 19 | h = x.size(2) 20 | w = x.size(3) 21 | x = x.view(x.size(0),-1) # e.g. 32,192 22 | z = x 23 | for b in range(x.size(0)): 24 | z[b] /= torch.sum(z[b]) 25 | z = z.view(x.size(0),1,h,w) 26 | return z 27 | class ResModel(nn.Module): 28 | 29 | def __init__(self, n_ID): 30 | super().__init__() 31 | self.backbone = build_resnet_backbone() 32 | self.head = BNneckLinear(n_ID) 33 | self.atten = SpatialAttn() 34 | self.conv1 = nn.Conv2d(17, 17, 1,stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros') 35 | self.pool = nn.AvgPool2d(2, stride=2, padding=0,) 36 | def forward(self, input,posemap,map_weight): 37 | """ 38 | See :class:`ReIDHeads.forward`. 39 | """ 40 | feat = self.backbone(input) 41 | b,c,h,w = feat.shape 42 | att = self.conv1(torch.mul(posemap,map_weight)) 43 | #print('att-1-size={}'.format(att.shape)) 44 | att = F.relu(att) 45 | att = self.pool(att) 46 | att = self.conv1(att) 47 | #print('att-2-size={}'.format(att.shape)) 48 | att = F.softmax(att) 49 | #print('att-3-size={}'.format(att.shape)) 50 | att = self.atten(att) 51 | #print('att-4-size={}'.format(att.shape)) 52 | att = att.expand(b,c,h,w) 53 | _feat = torch.mul(feat,att) 54 | feat = _feat + feat 55 | return self.head(feat) -------------------------------------------------------------------------------- /trackers/ReidModels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/backbone/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/backbone/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/backbone/googlenet.py: -------------------------------------------------------------------------------- 1 | '''GoogLeNet with PyTorch.''' 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from .lrn import SpatialCrossMapLRN 7 | 8 | 9 | class Inception(nn.Module): 10 | def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): 11 | super(Inception, self).__init__() 12 | # 1x1 conv branch 13 | self.b1 = nn.Sequential( 14 | nn.Conv2d(in_planes, n1x1, kernel_size=1), 15 | nn.ReLU(True), 16 | ) 17 | 18 | # 1x1 conv -> 3x3 conv branch 19 | self.b2 = nn.Sequential( 20 | nn.Conv2d(in_planes, n3x3red, kernel_size=1), 21 | nn.ReLU(True), 22 | nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1), 23 | nn.ReLU(True), 24 | ) 25 | 26 | # 1x1 conv -> 5x5 conv branch 27 | self.b3 = nn.Sequential( 28 | nn.Conv2d(in_planes, n5x5red, kernel_size=1), 29 | nn.ReLU(True), 30 | 31 | nn.Conv2d(n5x5red, n5x5, kernel_size=5, padding=2), 32 | nn.ReLU(True), 33 | ) 34 | 35 | # 3x3 pool -> 1x1 conv branch 36 | self.b4 = nn.Sequential( 37 | nn.MaxPool2d(3, stride=1, padding=1), 38 | 39 | nn.Conv2d(in_planes, pool_planes, kernel_size=1), 40 | nn.ReLU(True), 41 | ) 42 | 43 | def forward(self, x): 44 | y1 = self.b1(x) 45 | y2 = self.b2(x) 46 | y3 = self.b3(x) 47 | y4 = self.b4(x) 48 | return torch.cat([y1,y2,y3,y4], 1) 49 | 50 | 51 | class GoogLeNet(nn.Module): 52 | 53 | output_channels = 832 54 | 55 | def __init__(self): 56 | super(GoogLeNet, self).__init__() 57 | self.pre_layers = nn.Sequential( 58 | nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3), 59 | nn.ReLU(True), 60 | 61 | nn.MaxPool2d(3, stride=2, ceil_mode=True), 62 | SpatialCrossMapLRN(5), 63 | 64 | nn.Conv2d(64, 64, 1), 65 | nn.ReLU(True), 66 | 67 | nn.Conv2d(64, 192, 3, padding=1), 68 | nn.ReLU(True), 69 | 70 | SpatialCrossMapLRN(5), 71 | nn.MaxPool2d(3, stride=2, ceil_mode=True), 72 | ) 73 | 74 | self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) 75 | self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) 76 | 77 | self.maxpool = nn.MaxPool2d(3, stride=2, ceil_mode=True) 78 | 79 | self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) 80 | self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) 81 | self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) 82 | self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) 83 | self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) 84 | 85 | def forward(self, x): 86 | out = self.pre_layers(x) 87 | out = self.a3(out) 88 | out = self.b3(out) 89 | out = self.maxpool(out) 90 | out = self.a4(out) 91 | out = self.b4(out) 92 | out = self.c4(out) 93 | out = self.d4(out) 94 | out = self.e4(out) 95 | 96 | return out 97 | -------------------------------------------------------------------------------- /trackers/ReidModels/backbone/lrn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.legacy.nn import SpatialCrossMapLRN as SpatialCrossMapLRNOld 3 | from torch.autograd import Function, Variable 4 | from torch.nn import Module 5 | 6 | 7 | # function interface, internal, do not use this one!!! 8 | class SpatialCrossMapLRNFunc(Function): 9 | 10 | def __init__(self, size, alpha=1e-4, beta=0.75, k=1): 11 | self.size = size 12 | self.alpha = alpha 13 | self.beta = beta 14 | self.k = k 15 | 16 | def forward(self, input): 17 | self.save_for_backward(input) 18 | self.lrn = SpatialCrossMapLRNOld(self.size, self.alpha, self.beta, self.k) 19 | self.lrn.type(input.type()) 20 | return self.lrn.forward(input) 21 | 22 | def backward(self, grad_output): 23 | input, = self.saved_tensors 24 | return self.lrn.backward(input, grad_output) 25 | 26 | 27 | # use this one instead 28 | class SpatialCrossMapLRN(Module): 29 | def __init__(self, size, alpha=1e-4, beta=0.75, k=1): 30 | super(SpatialCrossMapLRN, self).__init__() 31 | self.size = size 32 | self.alpha = alpha 33 | self.beta = beta 34 | self.k = k 35 | 36 | def forward(self, input): 37 | return SpatialCrossMapLRNFunc(self.size, self.alpha, self.beta, self.k)(input) -------------------------------------------------------------------------------- /trackers/ReidModels/backbone/sqeezenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torchvision import models 5 | 6 | 7 | class DilationLayer(nn.Module): 8 | def __init__(self, in_channels, out_channels, kernel_size=3, padding='same_padding', dilation=1, bn=False): 9 | super(DilationLayer, self).__init__() 10 | if padding == 'same_padding': 11 | padding = int((kernel_size - 1) / 2 * dilation) 12 | self.Dconv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, 13 | padding=padding, dilation=dilation) 14 | self.Drelu = nn.ReLU(inplace=True) 15 | self.Dbn = nn.BatchNorm2d(out_channels) if bn else None 16 | 17 | def forward(self, x): 18 | x = self.Dconv(x) 19 | if self.Dbn is not None: 20 | x = self.Dbn(x) 21 | x = self.Drelu(x) 22 | return x 23 | 24 | 25 | class FeatExtractorSqueezeNetx16(nn.Module): 26 | n_feats = [64, 128, 256, 512] 27 | 28 | def __init__(self, pretrained=True): 29 | 30 | super(FeatExtractorSqueezeNetx16, self).__init__() 31 | print("loading layers from squeezenet1_1...") 32 | sq = models.squeezenet1_1(pretrained=pretrained) 33 | 34 | self.conv1 = nn.Sequential( 35 | sq.features[0], 36 | sq.features[1], 37 | ) 38 | self.conv2 = nn.Sequential( 39 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 40 | sq.features[3], 41 | sq.features[4], 42 | ) 43 | self.conv3 = nn.Sequential( 44 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 45 | sq.features[6], 46 | sq.features[7], 47 | ) 48 | self.conv4 = nn.Sequential( 49 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 50 | sq.features[9], 51 | sq.features[10], 52 | sq.features[11], 53 | sq.features[12], 54 | ) 55 | 56 | self.conv1[0].padding = (1, 1) 57 | 58 | def forward(self, x): 59 | x2 = self.conv1(x) 60 | x4 = self.conv2(x2) 61 | x8 = self.conv3(x4) 62 | x16 = self.conv4(x8) 63 | 64 | return x2, x4, x8, x16 65 | -------------------------------------------------------------------------------- /trackers/ReidModels/bn_linear.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: tanghy 4 | @contact: thutanghy@gmail.com 5 | """ 6 | 7 | from torch import nn 8 | import torch.nn.functional as F 9 | 10 | def bn_no_bias(in_features): 11 | bn_layer = nn.BatchNorm1d(in_features) 12 | bn_layer.bias.requires_grad_(False) 13 | return bn_layer 14 | 15 | def weights_init_kaiming(m): 16 | classname = m.__class__.__name__ 17 | if classname.find('Linear') != -1: 18 | nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') 19 | if m.bias is not None: 20 | nn.init.constant_(m.bias, 0.0) 21 | elif classname.find('Conv') != -1: 22 | nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') 23 | if m.bias is not None: 24 | nn.init.constant_(m.bias, 0.0) 25 | elif classname.find('BatchNorm') != -1: 26 | if m.affine: 27 | nn.init.constant_(m.weight, 1.0) 28 | nn.init.constant_(m.bias, 0.0) 29 | 30 | 31 | def weights_init_classifier(m): 32 | classname = m.__class__.__name__ 33 | if classname.find('Linear') != -1: 34 | nn.init.normal_(m.weight, std=0.001) 35 | if m.bias is not None: 36 | nn.init.constant_(m.bias, 0.0) 37 | 38 | class BNneckLinear(nn.Module): 39 | 40 | def __init__(self, nID): 41 | super().__init__() 42 | self._num_classes = nID 43 | 44 | self.gap = nn.AdaptiveAvgPool2d(1) 45 | self.bnneck = bn_no_bias(2048) 46 | self.bnneck.apply(weights_init_kaiming) 47 | 48 | self.classifier = nn.Linear(2048, self._num_classes, bias=False) 49 | self.classifier.apply(weights_init_classifier) 50 | 51 | def forward(self, features): 52 | """ 53 | See :class:`ReIDHeads.forward`. 54 | """ 55 | global_features = self.gap(features) 56 | global_features = global_features.view(global_features.shape[0], -1) 57 | bn_features = self.bnneck(global_features) 58 | 59 | if not self.training: 60 | return F.normalize(bn_features) 61 | 62 | pred_class_logits = self.classifier(bn_features) 63 | return global_features, pred_class_logits -------------------------------------------------------------------------------- /trackers/ReidModels/classification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/classification/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/psroi_pooling/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/psroi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/_ext/psroi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._psroi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | sources = [] 6 | headers = [] 7 | defines = [] 8 | with_cuda = False 9 | 10 | if torch.cuda.is_available(): 11 | print('Including CUDA code.') 12 | sources += ['src/psroi_pooling_cuda.c'] 13 | headers += ['src/psroi_pooling_cuda.h'] 14 | defines += [('WITH_CUDA', None)] 15 | with_cuda = True 16 | 17 | this_file = os.path.dirname(os.path.realpath(__file__)) 18 | print(this_file) 19 | extra_objects = ['src/cuda/psroi_pooling.cu.o'] 20 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 21 | 22 | ffi = create_extension( 23 | '_ext.psroi_pooling', 24 | headers=headers, 25 | sources=sources, 26 | define_macros=defines, 27 | relative_to=__file__, 28 | with_cuda=with_cuda, 29 | extra_objects=extra_objects 30 | ) 31 | 32 | if __name__ == '__main__': 33 | ffi.build() 34 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/psroi_pooling/functions/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/functions/psroi_pooling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import psroi_pooling 4 | 5 | 6 | class PSRoIPoolingFunction(Function): 7 | def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim): 8 | self.pooled_width = int(pooled_width) 9 | self.pooled_height = int(pooled_height) 10 | self.spatial_scale = float(spatial_scale) 11 | self.group_size = int(group_size) 12 | self.output_dim = int(output_dim) 13 | 14 | self.output = None 15 | self.mappingchannel = None 16 | self.rois = None 17 | self.feature_size = None 18 | 19 | def forward(self, features, rois): 20 | batch_size, num_channels, data_height, data_width = features.size() 21 | num_rois = rois.size(0) 22 | 23 | output = features.new().resize_(num_rois, self.output_dim, self.pooled_height, self.pooled_width).zero_() 24 | mappingchannel = torch.IntTensor(num_rois, self.output_dim, self.pooled_height, self.pooled_width).zero_().cuda(features.get_device()) 25 | 26 | rtn = psroi_pooling.psroi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, 27 | self.group_size, self.output_dim, 28 | features, rois, output, mappingchannel) 29 | assert rtn > 0 30 | self.output = output 31 | self.mappingchannel = mappingchannel 32 | self.rois = rois 33 | self.feature_size = features.size() 34 | # print features.max(), features.min() 35 | # print rois.max(), rois.min() 36 | # print output.max(), output.min() 37 | return output 38 | 39 | def backward(self, grad_output): 40 | assert (self.feature_size is not None and grad_output.is_cuda) 41 | 42 | batch_size, num_channels, data_height, data_width = self.feature_size 43 | 44 | grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda() 45 | 46 | psroi_pooling.psroi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, 47 | self.output_dim, 48 | grad_output, self.rois, grad_input, self.mappingchannel) 49 | return grad_input, None 50 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda 4 | 5 | cd src/cuda 6 | echo "Compiling psroi pooling kernels by nvcc..." 7 | ${CUDA_PATH}/bin/nvcc -c -o psroi_pooling.cu.o psroi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../../ 10 | python build.py -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/psroi_pooling/modules/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/modules/psroi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | import sys 3 | from ..functions.psroi_pooling import PSRoIPoolingFunction 4 | 5 | 6 | class PSRoIPool(Module): 7 | def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim): 8 | super(PSRoIPool, self).__init__() 9 | 10 | self.pooled_width = int(pooled_width) 11 | self.pooled_height = int(pooled_height) 12 | self.spatial_scale = float(spatial_scale) 13 | self.group_size = int(group_size) 14 | self.output_dim = int(output_dim) 15 | 16 | def forward(self, features, rois): 17 | return PSRoIPoolingFunction(self.pooled_height, self.pooled_width, self.spatial_scale, self.group_size, 18 | self.output_dim)(features, rois) 19 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/src/cuda/psroi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef PS_ROI_POOLING_KERNEL 2 | #define PS_ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int PSROIPoolForwardLauncher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, const int pooled_width, 11 | const float* bottom_rois, const int group_size, const int output_dim, float* top_data, int* mapping_channel, cudaStream_t stream); 12 | 13 | 14 | int PSROIPoolBackwardLauncher(const float* top_diff, const int* mapping_channel, const int batch_size, const int num_rois, const float spatial_scale, const int channels, const int height, const int width, const int pooled_width, const int pooled_height, const int output_dim, float* bottom_diff, const float* bottom_rois, cudaStream_t stream); 15 | 16 | #ifdef __cplusplus 17 | } 18 | 19 | #endif 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/src/psroi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/psroi_pooling_kernel.h" 4 | 5 | 6 | 7 | extern THCState* state; 8 | 9 | int psroi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, int group_size, int output_dim,THCudaTensor *features, THCudaTensor* rois, THCudaTensor* output, THCudaIntTensor* mappingchannel){ 10 | float* data_in = THCudaTensor_data(state, features); 11 | float* rois_in = THCudaTensor_data(state, rois); 12 | float* output_out = THCudaTensor_data(state, output); 13 | int* mappingchannel_out = THCudaIntTensor_data(state, mappingchannel); 14 | //Get # of Rois 15 | int num_rois = THCudaTensor_size(state, rois, 0); 16 | int size_rois = THCudaTensor_size(state, rois, 1); 17 | if (size_rois!=5) 18 | { 19 | return -1; 20 | } 21 | 22 | //Get # of batch_size 23 | int batch_size = THCudaTensor_size(state, features, 0); 24 | 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | int data_width = THCudaTensor_size(state, features, 3); 27 | int num_channels = THCudaTensor_size(state, features, 1); 28 | 29 | cudaStream_t stream = THCState_getCurrentStream(state); 30 | 31 | // call the gpu kernel for psroi_pooling 32 | PSROIPoolForwardLauncher(data_in, spatial_scale, num_rois, data_height, data_width, num_channels, pooled_height, pooled_width,rois_in, group_size, 33 | output_dim, output_out, mappingchannel_out,stream); 34 | return 1; 35 | } 36 | 37 | 38 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim, 39 | THCudaTensor* top_grad, THCudaTensor* rois, THCudaTensor* bottom_grad, THCudaIntTensor* mappingchannel) 40 | { 41 | float *top_grad_flat = THCudaTensor_data(state, top_grad); 42 | float *rois_flat = THCudaTensor_data(state, rois); 43 | 44 | float *bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 45 | int *mappingchannel_flat = THCudaIntTensor_data(state, mappingchannel); 46 | 47 | // Number of ROIs 48 | int num_rois = THCudaTensor_size(state, rois, 0); 49 | int size_rois = THCudaTensor_size(state, rois, 1); 50 | if (size_rois != 5) 51 | { 52 | return -1; 53 | } 54 | // batch size 55 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 56 | 57 | // data height 58 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 59 | // data width 60 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 61 | // Number of channels 62 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 63 | 64 | cudaStream_t stream = THCState_getCurrentStream(state); 65 | 66 | PSROIPoolBackwardLauncher(top_grad_flat, mappingchannel_flat, batch_size, num_rois, spatial_scale, num_channels, data_height, data_width, pooled_width, pooled_height, output_dim, bottom_grad_flat, rois_flat, stream); 67 | return 1; 68 | } 69 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/src/psroi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int psroi_pooling_forward_cuda( int pooled_height, int pooled_width, float spatial_scale,int group_size, int output_dim, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * mappingchannel); 3 | 4 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * mappingchannel); 6 | -------------------------------------------------------------------------------- /trackers/ReidModels/reid/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from distutils.version import LooseVersion 4 | import torch 5 | from torch.autograd import Variable 6 | 7 | from utils import bbox as bbox_utils 8 | from utils.log import logger 9 | from ReidModels import net_utils 10 | from ReidModels.reid.image_part_aligned import Model 11 | 12 | 13 | def load_reid_model(): 14 | model = Model(n_parts=8) 15 | model.inp_size = (80, 160) 16 | ckpt = 'data/googlenet_part8_all_xavier_ckpt_56.h5' 17 | 18 | net_utils.load_net(ckpt, model) 19 | logger.info('Load ReID model from {}'.format(ckpt)) 20 | 21 | model = model.cuda() 22 | model.eval() 23 | return model 24 | 25 | 26 | def im_preprocess(image): 27 | image = np.asarray(image, np.float32) 28 | image -= np.array([104, 117, 123], dtype=np.float32).reshape(1, 1, -1) 29 | image = image.transpose((2, 0, 1)) 30 | return image 31 | 32 | 33 | def extract_image_patches(image, bboxes): 34 | bboxes = np.round(bboxes).astype(np.int) 35 | bboxes = bbox_utils.clip_boxes(bboxes, image.shape) 36 | patches = [image[box[1]:box[3], box[0]:box[2]] for box in bboxes] 37 | return patches 38 | 39 | 40 | def extract_reid_features(reid_model, image, tlbrs): 41 | if len(tlbrs) == 0: 42 | return torch.FloatTensor() 43 | 44 | patches = extract_image_patches(image, tlbrs) 45 | patches = np.asarray([im_preprocess(cv2.resize(p, reid_model.inp_size)) for p in patches], dtype=np.float32) 46 | 47 | gpu = net_utils.get_device(reid_model) 48 | with torch.no_grad(): 49 | _img = torch.from_numpy(patches) 50 | if gpu: 51 | _img = _img.cuda() 52 | features,id = reid_model(_img).detach() 53 | return features 54 | -------------------------------------------------------------------------------- /trackers/ReidModels/reid/image_part_aligned.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from models.backbone.googlenet import GoogLeNet 6 | 7 | 8 | class Model(nn.Module): 9 | def __init__(self, n_parts=8,n_ID=300): 10 | super(Model, self).__init__() 11 | self.n_parts = n_parts 12 | self.nID = n_ID 13 | 14 | self.feat_conv = GoogLeNet() 15 | self.conv_input_feat = nn.Conv2d(self.feat_conv.output_channels, 512, 1) 16 | # part net 17 | self.conv_att = nn.Conv2d(512, self.n_parts, 1) 18 | 19 | for i in range(self.n_parts): 20 | setattr(self, 'linear_feature{}'.format(i+1), nn.Linear(512, 64)) 21 | self.id_classifer = nn.Linear(512,self.nID) 22 | def forward(self, x): 23 | feature = self.feat_conv(x) 24 | feature = self.conv_input_feat(feature) 25 | 26 | att_weights = torch.sigmoid(self.conv_att(feature)) 27 | 28 | linear_feautres = [] 29 | for i in range(self.n_parts): 30 | masked_feature = feature * torch.unsqueeze(att_weights[:, i], 1) 31 | pooled_feature = F.avg_pool2d(masked_feature, masked_feature.size()[2:4]) 32 | linear_feautres.append( 33 | getattr(self, 'linear_feature{}'.format(i+1))(pooled_feature.view(pooled_feature.size(0), -1)) 34 | ) 35 | 36 | concat_features = torch.cat(linear_feautres, 1) 37 | normed_feature = concat_features / torch.clamp(torch.norm(concat_features, 2, 1, keepdim=True), min=1e-6) 38 | out = self.id_classifer(normed_feature) 39 | return normed_feature,out 40 | -------------------------------------------------------------------------------- /trackers/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | def track(tracker,args,orig_img,inps,boxes,hm,cropped_boxes,im_name,scores): 4 | hm = hm.cpu().data.numpy() 5 | online_targets = tracker.update(orig_img,inps,boxes,hm,cropped_boxes,im_name,scores,_debug=False) 6 | new_boxes,new_scores,new_ids,new_hm,new_crop = [],[],[],[],[] 7 | for t in online_targets: 8 | tlbr = t.tlbr 9 | tid = t.track_id 10 | thm = t.pose 11 | tcrop = t.crop_box 12 | tscore = t.detscore 13 | new_boxes.append(tlbr) 14 | new_crop.append(tcrop) 15 | new_hm.append(thm) 16 | new_ids.append(tid) 17 | new_scores.append(tscore) 18 | 19 | new_hm = torch.Tensor(new_hm).to(args.device) 20 | return new_boxes,new_scores,new_ids,new_hm,new_crop 21 | -------------------------------------------------------------------------------- /trackers/tracker_cfg.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | cfg = edict() 3 | cfg.nid = 1000 4 | cfg.arch = "osnet_ain" # "osnet" or "res50-fc512" 5 | cfg.loadmodel = "trackers/weights/osnet_ain_x1_0_msmt17_256x128_amsgrad_ep50_lr0.0015_coslr_b64_fb10_softmax_labsmth_flip_jitter.pth" 6 | cfg.frame_rate = 30 7 | cfg.track_buffer = 240 8 | cfg.conf_thres = 0.5 9 | cfg.nms_thres = 0.4 10 | cfg.iou_thres = 0.5 11 | -------------------------------------------------------------------------------- /trackers/tracking/README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | Track Association part adapted from [Towards-Realtime-MOT](https://github.com/Zhongdao/Towards-Realtime-MOT), many thanks to their wonderful work! 3 | -------------------------------------------------------------------------------- /trackers/tracking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/tracking/__init__.py -------------------------------------------------------------------------------- /trackers/tracking/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed 53 | 54 | -------------------------------------------------------------------------------- /trackers/tracking/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/tracking/utils/__init__.py -------------------------------------------------------------------------------- /trackers/tracking/utils/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from utils import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /trackers/tracking/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | def parse_model_cfg(path): 2 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 3 | file = open(path, 'r') 4 | lines = file.read().split('\n') 5 | lines = [x for x in lines if x and not x.startswith('#')] 6 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 7 | module_defs = [] 8 | for line in lines: 9 | if line.startswith('['): # This marks the start of a new block 10 | module_defs.append({}) 11 | module_defs[-1]['type'] = line[1:-1].rstrip() 12 | if module_defs[-1]['type'] == 'convolutional': 13 | module_defs[-1]['batch_normalize'] = 0 14 | else: 15 | key, value = line.split("=") 16 | value = value.strip() 17 | if value[0] == '$': 18 | value = module_defs[0].get(value.strip('$'), None) 19 | module_defs[-1][key.rstrip()] = value.strip() 20 | 21 | return module_defs 22 | 23 | 24 | def parse_data_cfg(path): 25 | """Parses the data configuration file""" 26 | options = dict() 27 | options['gpus'] = '0' 28 | options['num_workers'] = '10' 29 | with open(path, 'r') as fp: 30 | lines = fp.readlines() 31 | for line in lines: 32 | line = line.strip() 33 | if line == '' or line.startswith('#'): 34 | continue 35 | key, value = line.split('=') 36 | options[key.strip()] = value.strip() 37 | return options 38 | -------------------------------------------------------------------------------- /trackers/tracking/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | self.duration = 0. 21 | 22 | def tic(self): 23 | # using time.time instead of time.clock because time time.clock 24 | # does not normalize for multithreading 25 | self.start_time = time.time() 26 | 27 | def toc(self, average=True): 28 | self.diff = time.time() - self.start_time 29 | self.total_time += self.diff 30 | self.calls += 1 31 | self.average_time = self.total_time / self.calls 32 | if average: 33 | self.duration = self.average_time 34 | else: 35 | self.duration = self.diff 36 | return self.duration 37 | 38 | def clear(self): 39 | self.total_time = 0. 40 | self.calls = 0 41 | self.start_time = 0. 42 | self.diff = 0. 43 | self.average_time = 0. 44 | self.duration = 0. 45 | 46 | -------------------------------------------------------------------------------- /trackers/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.DEBUG) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | logger = get_logger('root') 19 | -------------------------------------------------------------------------------- /trackers/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | def parse_model_cfg(path): 2 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 3 | file = open(path, 'r') 4 | lines = file.read().split('\n') 5 | lines = [x for x in lines if x and not x.startswith('#')] 6 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 7 | module_defs = [] 8 | for line in lines: 9 | if line.startswith('['): # This marks the start of a new block 10 | module_defs.append({}) 11 | module_defs[-1]['type'] = line[1:-1].rstrip() 12 | if module_defs[-1]['type'] == 'convolutional': 13 | module_defs[-1]['batch_normalize'] = 0 14 | else: 15 | key, value = line.split("=") 16 | value = value.strip() 17 | if value[0] == '$': 18 | value = module_defs[0].get(value.strip('$'), None) 19 | module_defs[-1][key.rstrip()] = value 20 | 21 | return module_defs 22 | 23 | 24 | def parse_data_cfg(path): 25 | """Parses the data configuration file""" 26 | options = dict() 27 | options['gpus'] = '0' 28 | options['num_workers'] = '10' 29 | with open(path, 'r') as fp: 30 | lines = fp.readlines() 31 | for line in lines: 32 | line = line.strip() 33 | if line == '' or line.startswith('#'): 34 | continue 35 | key, value = line.split('=') 36 | options[key.strip()] = value.strip() 37 | return options 38 | -------------------------------------------------------------------------------- /trackers/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | self.duration = 0. 21 | 22 | def tic(self): 23 | # using time.time instead of time.clock because time time.clock 24 | # does not normalize for multithreading 25 | self.start_time = time.time() 26 | 27 | def toc(self, average=True): 28 | self.diff = time.time() - self.start_time 29 | self.total_time += self.diff 30 | self.calls += 1 31 | self.average_time = self.total_time / self.calls 32 | if average: 33 | self.duration = self.average_time 34 | else: 35 | self.duration = self.diff 36 | return self.duration 37 | 38 | def clear(self): 39 | self.total_time = 0. 40 | self.calls = 0 41 | self.start_time = 0. 42 | self.diff = 0. 43 | self.average_time = 0. 44 | self.duration = 0. 45 | 46 | --------------------------------------------------------------------------------