├── .gitignore
├── LICENSE
├── README.md
├── alphapose
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── coco_det.py
    │   ├── coco_wholebody.py
    │   ├── coco_wholebody_det.py
    │   ├── concat_dataset.py
    │   ├── custom.py
    │   ├── halpe_136.py
    │   ├── halpe_136_det.py
    │   ├── halpe_26.py
    │   ├── halpe_26_det.py
    │   ├── halpe_68_noface.py
    │   ├── halpe_68_noface_det.py
    │   ├── halpe_coco_wholebody_136.py
    │   ├── halpe_coco_wholebody_136_det.py
    │   ├── halpe_coco_wholebody_26.py
    │   ├── halpe_coco_wholebody_26_det.py
    │   ├── mpii.py
    │   ├── mscoco.py
    │   ├── single_hand.py
    │   └── single_hand_det.py
    ├── models
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── criterion.py
    │   ├── fastpose.py
    │   ├── fastpose_duc.py
    │   ├── fastpose_duc_dense.py
    │   ├── hardnet.py
    │   ├── hrnet.py
    │   ├── layers
    │   │   ├── DUC.py
    │   │   ├── PixelUnshuffle.py
    │   │   ├── Resnet.py
    │   │   ├── SE_Resnet.py
    │   │   ├── SE_module.py
    │   │   ├── ShuffleResnet.py
    │   │   ├── dcn
    │   │   │   ├── DCN.py
    │   │   │   ├── __init__.py
    │   │   │   ├── deform_conv.py
    │   │   │   ├── deform_pool.py
    │   │   │   └── src
    │   │   │   │   ├── deform_conv_cuda.cpp
    │   │   │   │   ├── deform_conv_cuda_kernel.cu
    │   │   │   │   ├── deform_pool_cuda.cpp
    │   │   │   │   └── deform_pool_cuda_kernel.cu
    │   │   └── smpl
    │   │   │   ├── SMPL.py
    │   │   │   └── lbs.py
    │   ├── simple3dposeSMPLWithCam.py
    │   └── simplepose.py
    ├── opt.py
    ├── utils
    │   ├── __init__.py
    │   ├── bbox.py
    │   ├── config.py
    │   ├── detector.py
    │   ├── env.py
    │   ├── file_detector.py
    │   ├── logger.py
    │   ├── metrics.py
    │   ├── pPose_nms.py
    │   ├── presets
    │   │   ├── __init__.py
    │   │   ├── simple_transform.py
    │   │   └── simple_transform_3d_smpl.py
    │   ├── registry.py
    │   ├── render_pytorch3d.py
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── roi_align.py
    │   │   └── src
    │   │   │   ├── roi_align_cuda.cpp
    │   │   │   └── roi_align_kernel.cu
    │   ├── transforms.py
    │   ├── vis.py
    │   ├── webcam_detector.py
    │   ├── writer.py
    │   └── writer_smpl.py
    └── version.py
├── configs
    ├── coco
    │   ├── hardnet
    │   │   ├── 256x192_hard68_lr1e-3_1x.yaml
    │   │   └── 256x192_hard85_lr1e-3_1x.yaml
    │   ├── hrnet
    │   │   └── 256x192_w32_lr1e-3.yaml
    │   └── resnet
    │   │   ├── 256x192_res152_lr1e-3_1x-duc.yaml
    │   │   ├── 256x192_res50_lr1e-3_1x-concat.yaml
    │   │   ├── 256x192_res50_lr1e-3_1x-duc.yaml
    │   │   ├── 256x192_res50_lr1e-3_1x-simple.yaml
    │   │   ├── 256x192_res50_lr1e-3_1x.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-dcn.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-regression.yaml
    │   │   └── 256x192_res50_lr1e-3_2x.yaml
    ├── coco_wholebody
    │   └── resnet
    │   │   ├── 256x192_res152_lr1e-3_1x-duc.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-combined.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-dcn-combined.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-dcn-regression.yaml
    │   │   └── 256x192_res50_lr1e-3_2x-regression.yaml
    ├── dense_coco
    │   └── resnet50
    │   │   └── 256x192_adam_lr1e-3-duc-dcn_1x_crop.yaml
    ├── halpe_136
    │   └── resnet
    │   │   ├── 256x192_res152_lr1e-3_1x-duc.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-dcn-combined.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-dcn-regression.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-regression.yaml
    │   │   └── 256x192_res50_lr1e-3_2x.yaml
    ├── halpe_26
    │   └── resnet
    │   │   ├── 256x192_res50_lr1e-3_1x.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-dcn-regression.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-regression.yaml
    │   │   └── 256x192_res50_lr1e-3_2x.yaml
    ├── halpe_68_noface
    │   └── resnet
    │   │   └── 256x192_res50_lr1e-3_2x-dcn-combined.yaml
    ├── halpe_coco_wholebody_136
    │   └── resnet
    │   │   ├── 256x192_res50_lr1e-3_2x-dcn-combined.yaml
    │   │   └── 256x192_res50_lr1e-3_2x-regression.yaml
    ├── single_hand
    │   └── resnet
    │   │   └── 256x192_res50_lr1e-3_2x-dcn-regression.yaml
    └── smpl
    │   └── 256x192_adam_lr1e-3-res34_smpl_24_3d_base_2x_mix.yaml
├── detector
    ├── apis.py
    ├── effdet_api.py
    ├── effdet_cfg.py
    ├── efficientdet
    │   ├── README.md
    │   ├── effdet
    │   │   ├── __init__.py
    │   │   ├── anchors.py
    │   │   ├── bench.py
    │   │   ├── config
    │   │   │   └── config.py
    │   │   ├── efficientdet.py
    │   │   ├── helpers.py
    │   │   └── object_detection
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── argmax_matcher.py
    │   │   │   ├── box_coder.py
    │   │   │   ├── box_list.py
    │   │   │   ├── faster_rcnn_box_coder.py
    │   │   │   ├── matcher.py
    │   │   │   ├── region_similarity_calculator.py
    │   │   │   └── target_assigner.py
    │   ├── utils.py
    │   └── weights
    │   │   └── get_models.sh
    ├── nms
    │   ├── __init__.py
    │   ├── nms_wrapper.py
    │   └── src
    │   │   ├── nms_cpu.cpp
    │   │   ├── nms_cuda.cpp
    │   │   ├── nms_kernel.cu
    │   │   ├── soft_nms_cpu.cpp
    │   │   └── soft_nms_cpu.pyx
    ├── tracker
    │   ├── README.md
    │   ├── __init__.py
    │   ├── cfg
    │   │   ├── ccmcpe.json
    │   │   └── yolov3.cfg
    │   ├── models.py
    │   ├── preprocess.py
    │   ├── tracker
    │   │   ├── __init__.py
    │   │   ├── basetrack.py
    │   │   ├── matching.py
    │   │   └── multitracker.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── datasets.py
    │   │   ├── evaluation.py
    │   │   ├── io.py
    │   │   ├── kalman_filter.py
    │   │   ├── log.py
    │   │   ├── nms.py
    │   │   ├── parse_config.py
    │   │   ├── timer.py
    │   │   ├── utils.py
    │   │   └── visualization.py
    ├── tracker_api.py
    ├── tracker_cfg.py
    ├── yolo
    │   ├── README.md
    │   ├── __init__.py
    │   ├── bbox.py
    │   ├── cam_demo.py
    │   ├── cfg
    │   │   ├── tiny-yolo-voc.cfg
    │   │   ├── yolo-voc.cfg
    │   │   ├── yolo.cfg
    │   │   ├── yolov3-spp.cfg
    │   │   └── yolov3.cfg
    │   ├── darknet.py
    │   ├── detect.py
    │   ├── pallete
    │   ├── preprocess.py
    │   ├── util.py
    │   ├── video_demo.py
    │   └── video_demo_half.py
    ├── yolo_api.py
    ├── yolo_cfg.py
    ├── yolox
    │   ├── README.md
    │   ├── exps
    │   │   ├── default
    │   │   │   ├── __init__.py
    │   │   │   ├── yolov3.py
    │   │   │   ├── yolox_l.py
    │   │   │   ├── yolox_m.py
    │   │   │   ├── yolox_nano.py
    │   │   │   ├── yolox_s.py
    │   │   │   ├── yolox_tiny.py
    │   │   │   └── yolox_x.py
    │   │   └── example
    │   │   │   ├── custom
    │   │   │       ├── nano.py
    │   │   │       └── yolox_s.py
    │   │   │   └── yolox_voc
    │   │   │       └── yolox_voc_s.py
    │   ├── tools
    │   │   ├── __init__.py
    │   │   └── demo.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── preprocess.py
    │   └── yolox
    │   │   ├── __init__.py
    │   │   ├── exp
    │   │       ├── __init__.py
    │   │       ├── base_exp.py
    │   │       ├── build.py
    │   │       ├── default
    │   │       │   └── __init__.py
    │   │       └── yolox_base.py
    │   │   ├── layers
    │   │       ├── __init__.py
    │   │       ├── cocoeval
    │   │       │   ├── cocoeval.cpp
    │   │       │   └── cocoeval.h
    │   │       ├── fast_coco_eval_api.py
    │   │       └── jit_ops.py
    │   │   ├── models
    │   │       ├── __init__.py
    │   │       ├── build.py
    │   │       ├── darknet.py
    │   │       ├── losses.py
    │   │       ├── network_blocks.py
    │   │       ├── yolo_fpn.py
    │   │       ├── yolo_head.py
    │   │       ├── yolo_pafpn.py
    │   │       └── yolox.py
    │   │   ├── tools
    │   │       └── __init__.py
    │   │   └── utils
    │   │       ├── __init__.py
    │   │       ├── boxes.py
    │   │       └── compat.py
    ├── yolox_api.py
    └── yolox_cfg.py
├── docs
    ├── CrowdPose.md
    ├── GETTING_STARTED.md
    ├── INSTALL.md
    ├── MODEL_ZOO.md
    ├── alphapose_136.gif
    ├── alphapose_17.gif
    ├── alphapose_26.gif
    ├── alphapose_hybrik_smpl.gif
    ├── contributors.md
    ├── crowdpose.gif
    ├── faq.md
    ├── logo.jpg
    ├── logs
    │   ├── fast_421_res152_256x192.log
    │   ├── fast_421_res50-shuffle_256x192.log
    │   ├── fast_dcn_res50_256x192.log
    │   ├── fast_res50_256x192.log
    │   ├── hrnet_w32_256x192.log
    │   └── simple_res50_256x192.log
    ├── output.md
    ├── pose.gif
    ├── posetrack.gif
    ├── posetrack2.gif
    ├── run.md
    ├── speed_up.md
    ├── step1.jpg
    ├── step2.jpg
    ├── step3.jpg
    ├── step4.jpg
    └── win_install.md
├── examples
    ├── demo
    │   ├── 1.jpg
    │   ├── 2.jpg
    │   └── 3.jpg
    ├── list-coco-demo.txt
    ├── list-coco-minival500.txt
    └── list-coco-val5000.txt
├── model_files
    ├── J_regressor_h36m.npy
    ├── h36m_mean_beta.npy
    └── smpl_faces.npy
├── pretrained_models
    └── get_models.sh
├── scripts
    ├── demo_3d_inference.py
    ├── demo_api.py
    ├── demo_inference.py
    ├── inference.sh
    ├── inference_3d.sh
    ├── train.py
    ├── train.sh
    ├── validate.py
    └── validate.sh
├── setup.cfg
├── setup.py
└── trackers
    ├── PoseFlow
        ├── README.md
        ├── alpha-pose-results-sample.json
        ├── matching.py
        ├── parallel_process.py
        ├── poseflow_infer.py
        ├── posetrack1.gif
        ├── posetrack2.gif
        ├── posetrack_data
        ├── poseval
        ├── requirements.txt
        ├── tracker-baseline.py
        ├── tracker-general.py
        └── utils.py
    ├── README.md
    ├── ReidModels
        ├── ResBnLin.py
        ├── ResNet.py
        ├── __init__.py
        ├── backbone
        │   ├── __init__.py
        │   ├── googlenet.py
        │   ├── lrn.py
        │   └── sqeezenet.py
        ├── bn_linear.py
        ├── classification
        │   ├── __init__.py
        │   ├── classifier.py
        │   └── rfcn_cls.py
        ├── net_utils.py
        ├── osnet.py
        ├── osnet_ain.py
        ├── psroi_pooling
        │   ├── __init__.py
        │   ├── _ext
        │   │   ├── __init__.py
        │   │   └── psroi_pooling
        │   │   │   └── __init__.py
        │   ├── build.py
        │   ├── functions
        │   │   ├── __init__.py
        │   │   └── psroi_pooling.py
        │   ├── make.sh
        │   ├── modules
        │   │   ├── __init__.py
        │   │   └── psroi_pool.py
        │   └── src
        │   │   ├── cuda
        │   │       ├── psroi_pooling_kernel.cu
        │   │       └── psroi_pooling_kernel.h
        │   │   ├── psroi_pooling_cuda.c
        │   │   └── psroi_pooling_cuda.h
        ├── reid
        │   ├── __init__.py
        │   └── image_part_aligned.py
        └── resnet_fc.py
    ├── __init__.py
    ├── tracker_api.py
    ├── tracker_cfg.py
    ├── tracking
        ├── README.md
        ├── __init__.py
        ├── basetrack.py
        ├── matching.py
        └── utils
        │   ├── __init__.py
        │   ├── io.py
        │   ├── kalman_filter.py
        │   ├── nms.py
        │   ├── parse_config.py
        │   ├── timer.py
        │   └── utils.py
    └── utils
        ├── basetransforms.py
        ├── bbox.py
        ├── io.py
        ├── kalman_filter.py
        ├── log.py
        ├── parse_config.py
        ├── timer.py
        ├── transform.py
        └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | local_settings.py
 56 | db.sqlite3
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # Environments
 84 | .env
 85 | .venv
 86 | env/
 87 | venv/
 88 | ENV/
 89 | env.bak/
 90 | venv.bak/
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | .spyproject
 95 | 
 96 | # Rope project settings
 97 | .ropeproject
 98 | 
 99 | # mkdocs documentation
100 | /site
101 | 
102 | # mypy
103 | .mypy_cache/
104 | .vscode
105 | .tensorboard
106 | exp/coco*
107 | *.pth
108 | *.weights
109 | exp/json/test_kpt.json
110 | exp/json/test_gt_kpt.json
111 | exp/json/validate_rcnn_kpt.json
112 | exp/json/validate_gt_kpt.json
113 | data/
114 | tmp/
115 | exp/json
116 | tmp_*/
117 | examples/res*/
118 | data/
119 | exp/
120 | data
121 | 


--------------------------------------------------------------------------------
/alphapose/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 | 
3 | __all__ = ['__version__', 'short_version']
4 | 


--------------------------------------------------------------------------------
/alphapose/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_det import Mscoco_det
 2 | from .concat_dataset import ConcatDataset
 3 | from .custom import CustomDataset
 4 | from .mscoco import Mscoco
 5 | from .mpii import Mpii
 6 | from .coco_wholebody import coco_wholebody
 7 | from .coco_wholebody_det import coco_wholebody_det
 8 | from .halpe_26 import Halpe_26
 9 | from .halpe_136 import Halpe_136
10 | from .halpe_136_det import  Halpe_136_det
11 | from .halpe_26_det import  Halpe_26_det
12 | from .halpe_coco_wholebody_26 import Halpe_coco_wholebody_26
13 | from .halpe_coco_wholebody_26_det import Halpe_coco_wholebody_26_det
14 | from .halpe_coco_wholebody_136 import Halpe_coco_wholebody_136
15 | from .halpe_coco_wholebody_136_det import Halpe_coco_wholebody_136_det
16 | from .halpe_68_noface import Halpe_68_noface
17 | from .halpe_68_noface_det import Halpe_68_noface_det
18 | from .single_hand import SingleHand
19 | from .single_hand_det import SingleHand_det
20 | 
21 | __all__ = ['CustomDataset', 'ConcatDataset', 'Mpii', 'Mscoco', 'Mscoco_det', \
22 | 		   'Halpe_26', 'Halpe_26_det', 'Halpe_136', 'Halpe_136_det', \
23 | 		   'Halpe_coco_wholebody_26', 'Halpe_coco_wholebody_26_det', \
24 | 		   'Halpe_coco_wholebody_136', 'Halpe_coco_wholebody_136_det', \
25 | 		   'Halpe_68_noface', 'Halpe_68_noface_det', 'SingleHand', 'SingleHand_det', \
26 | 		   'coco_wholebody', 'coco_wholebody_det']
27 | 


--------------------------------------------------------------------------------
/alphapose/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import bisect
 7 | 
 8 | import torch
 9 | import torch.utils.data as data
10 | 
11 | from alphapose.models.builder import DATASET, build_dataset
12 | 
13 | 
14 | @DATASET.register_module
15 | class ConcatDataset(data.Dataset):
16 |     """Custom Concat dataset.
17 |     Annotation file must be in `coco` format.
18 | 
19 |     Parameters
20 |     ----------
21 |     train: bool, default is True
22 |         If true, will set as training mode.
23 |     dpg: bool, default is False
24 |         If true, will activate `dpg` for data augmentation.
25 |     skip_empty: bool, default is False
26 |         Whether skip entire image if no valid label is found.
27 |     cfg: dict, dataset configuration.
28 |     """
29 | 
30 |     def __init__(self,
31 |                  train=True,
32 |                  dpg=False,
33 |                  skip_empty=True,
34 |                  **cfg):
35 | 
36 |         self._cfg = cfg
37 |         self._subset_cfg_list = cfg['SET_LIST']
38 |         self._preset_cfg = cfg['PRESET']
39 |         self._mask_id = [item['MASK_ID'] for item in self._subset_cfg_list]
40 | 
41 |         self.num_joints = self._preset_cfg['NUM_JOINTS']
42 | 
43 |         self._subsets = []
44 |         self._subset_size = [0]
45 |         for _subset_cfg in self._subset_cfg_list:
46 |             subset = build_dataset(_subset_cfg, preset_cfg=self._preset_cfg, train=train)
47 |             self._subsets.append(subset)
48 |             self._subset_size.append(len(subset))
49 |         self.cumulative_sizes = self.cumsum(self._subset_size)
50 | 
51 |     def __getitem__(self, idx):
52 |         assert idx >= 0
53 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
54 |         dataset_idx -= 1
55 |         sample_idx = idx - self.cumulative_sizes[dataset_idx]
56 | 
57 |         sample = self._subsets[dataset_idx][sample_idx]
58 |         img, label, label_mask, img_id, bbox = sample
59 | 
60 |         K = label.shape[0]  # num_joints from `_subsets[dataset_idx]`
61 |         expend_label = torch.zeros((self.num_joints, *label.shape[1:]), dtype=label.dtype)
62 |         expend_label_mask = torch.zeros((self.num_joints, *label_mask.shape[1:]), dtype=label_mask.dtype)
63 |         expend_label[self._mask_id[dataset_idx]:self._mask_id[dataset_idx] + K] = label
64 |         expend_label_mask[self._mask_id[dataset_idx]:self._mask_id[dataset_idx] + K] = label_mask
65 | 
66 |         return img, expend_label, expend_label_mask, img_id, bbox
67 | 
68 |     def __len__(self):
69 |         return self.cumulative_sizes[-1]
70 | 
71 |     @staticmethod
72 |     def cumsum(sequence):
73 |         r, s = [], 0
74 |         for e in sequence:
75 |             r.append(e + s)
76 |             s += e
77 |         return r
78 | 


--------------------------------------------------------------------------------
/alphapose/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .fastpose import FastPose
 2 | from .fastpose_duc import FastPose_DUC
 3 | from .hrnet import PoseHighResolutionNet
 4 | from .simplepose import SimplePose
 5 | from .fastpose_duc_dense import FastPose_DUC_Dense
 6 | from .hardnet import HarDNetPose
 7 | from .simple3dposeSMPLWithCam import Simple3DPoseBaseSMPLCam
 8 | from .criterion import L1JointRegression
 9 | 
10 | __all__ = ['FastPose', 'SimplePose', 'PoseHighResolutionNet',
11 |            'FastPose_DUC', 'FastPose_DUC_Dense', 'HarDNetPose',
12 |            'Simple3DPoseBaseSMPLCam',
13 |            'L1JointRegression']
14 | 


--------------------------------------------------------------------------------
/alphapose/models/builder.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from alphapose.utils import Registry, build_from_cfg, retrieve_from_cfg
 4 | 
 5 | 
 6 | SPPE = Registry('sppe')
 7 | LOSS = Registry('loss')
 8 | DATASET = Registry('dataset')
 9 | 
10 | 
11 | def build(cfg, registry, default_args=None):
12 |     if isinstance(cfg, list):
13 |         modules = [
14 |             build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
15 |         ]
16 |         return nn.Sequential(*modules)
17 |     else:
18 |         return build_from_cfg(cfg, registry, default_args)
19 | 
20 | 
21 | def build_sppe(cfg, preset_cfg, **kwargs):
22 |     default_args = {
23 |         'PRESET': preset_cfg,
24 |     }
25 |     for key, value in kwargs.items():
26 |         default_args[key] = value
27 |     return build(cfg, SPPE, default_args=default_args)
28 | 
29 | 
30 | def build_loss(cfg):
31 |     return build(cfg, LOSS)
32 | 
33 | 
34 | def build_dataset(cfg, preset_cfg, **kwargs):
35 |     exec(f'from ..datasets import {cfg.TYPE}')
36 |     default_args = {
37 |         'PRESET': preset_cfg,
38 |     }
39 |     for key, value in kwargs.items():
40 |         default_args[key] = value
41 |     return build(cfg, DATASET, default_args=default_args)
42 | 
43 | 
44 | def retrieve_dataset(cfg):
45 |     exec(f'from ..datasets import {cfg.TYPE}')
46 |     return retrieve_from_cfg(cfg, DATASET)
47 | 


--------------------------------------------------------------------------------
/alphapose/models/fastpose.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | from .builder import SPPE
 9 | from .layers.DUC import DUC
10 | from .layers.SE_Resnet import SEResnet
11 | 
12 | 
13 | @SPPE.register_module
14 | class FastPose(nn.Module):
15 | 
16 |     def __init__(self, norm_layer=nn.BatchNorm2d, **cfg):
17 |         super(FastPose, self).__init__()
18 |         self._preset_cfg = cfg['PRESET']
19 |         if 'CONV_DIM' in cfg.keys():
20 |             self.conv_dim = cfg['CONV_DIM']
21 |         else:
22 |             self.conv_dim = 128
23 |         if 'DCN' in cfg.keys():
24 |             stage_with_dcn = cfg['STAGE_WITH_DCN']
25 |             dcn = cfg['DCN']
26 |             self.preact = SEResnet(
27 |                 f"resnet{cfg['NUM_LAYERS']}", dcn=dcn, stage_with_dcn=stage_with_dcn)
28 |         else:
29 |             self.preact = SEResnet(f"resnet{cfg['NUM_LAYERS']}")
30 | 
31 |         # Imagenet pretrain model
32 |         import torchvision.models as tm   # noqa: F401,F403
33 |         assert cfg['NUM_LAYERS'] in [18, 34, 50, 101, 152]
34 |         x = eval(f"tm.resnet{cfg['NUM_LAYERS']}(pretrained=True)")
35 | 
36 |         model_state = self.preact.state_dict()
37 |         state = {k: v for k, v in x.state_dict().items()
38 |                  if k in self.preact.state_dict() and v.size() == self.preact.state_dict()[k].size()}
39 |         model_state.update(state)
40 |         self.preact.load_state_dict(model_state)
41 | 
42 |         self.suffle1 = nn.PixelShuffle(2)
43 |         self.duc1 = DUC(512, 1024, upscale_factor=2, norm_layer=norm_layer)
44 |         if self.conv_dim == 256:
45 |             self.duc2 = DUC(256, 1024, upscale_factor=2, norm_layer=norm_layer)
46 |         else:
47 |             self.duc2 = DUC(256, 512, upscale_factor=2, norm_layer=norm_layer)
48 |         self.conv_out = nn.Conv2d(
49 |             self.conv_dim, self._preset_cfg['NUM_JOINTS'], kernel_size=3, stride=1, padding=1)
50 | 
51 |     def forward(self, x):
52 |         out = self.preact(x)
53 |         out = self.suffle1(out)
54 |         out = self.duc1(out)
55 |         out = self.duc2(out)
56 | 
57 |         out = self.conv_out(out)
58 |         return out
59 | 
60 |     def _initialize(self):
61 |         for m in self.conv_out.modules():
62 |             if isinstance(m, nn.Conv2d):
63 |                 # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
64 |                 # logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
65 |                 # logger.info('=> init {}.bias as 0'.format(name))
66 |                 nn.init.normal_(m.weight, std=0.001)
67 |                 nn.init.constant_(m.bias, 0)
68 | 


--------------------------------------------------------------------------------
/alphapose/models/layers/DUC.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class DUC(nn.Module):
10 |     '''
11 |     Initialize: inplanes, planes, upscale_factor
12 |     OUTPUT: (planes // upscale_factor^2) * ht * wd
13 |     '''
14 | 
15 |     def __init__(self, inplanes, planes,
16 |                  upscale_factor=2, norm_layer=nn.BatchNorm2d):
17 |         super(DUC, self).__init__()
18 |         self.conv = nn.Conv2d(
19 |             inplanes, planes, kernel_size=3, padding=1, bias=False)
20 |         self.bn = norm_layer(planes, momentum=0.1)
21 |         self.relu = nn.ReLU(inplace=True)
22 |         self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
23 | 
24 |     def forward(self, x):
25 |         x = self.conv(x)
26 |         x = self.bn(x)
27 |         x = self.relu(x)
28 |         x = self.pixel_shuffle(x)
29 |         return x
30 | 


--------------------------------------------------------------------------------
/alphapose/models/layers/PixelUnshuffle.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class PixelUnshuffle(nn.Module):
10 |     '''
11 |     Initialize: inplanes, planes, upscale_factor
12 |     OUTPUT: (planes // upscale_factor^2) * ht * wd
13 |     '''
14 | 
15 |     def __init__(self, downscale_factor=2):
16 |         super(PixelUnshuffle, self).__init__()
17 |         self._r = downscale_factor
18 | 
19 |     def forward(self, x):
20 |         b, c, h, w = x.shape
21 |         out_c = c * (self._r * self._r)
22 |         out_h = h // self._r
23 |         out_w = w // self._r
24 | 
25 |         x_view = x.contiguous().view(b, c, out_h, self._r, out_w, self._r)
26 |         x_prime = x_view.permute(0, 1, 3, 5, 2, 4).contiguous().view(b, out_c, out_h, out_w)
27 | 
28 |         return x_prime
29 | 


--------------------------------------------------------------------------------
/alphapose/models/layers/SE_module.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | from torch import nn
 7 | 
 8 | 
 9 | class SELayer(nn.Module):
10 |     def __init__(self, channel, reduction=1):
11 |         super(SELayer, self).__init__()
12 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
13 |         self.fc = nn.Sequential(
14 |             nn.Linear(channel, channel // reduction),
15 |             nn.ReLU(inplace=True),
16 |             nn.Linear(channel // reduction, channel),
17 |             nn.Sigmoid()
18 |         )
19 | 
20 |     def forward(self, x):
21 |         b, c, _, _ = x.size()
22 |         y = self.avg_pool(x).view(b, c)
23 |         y = self.fc(y).view(b, c, 1, 1)
24 |         return x * y
25 | 


--------------------------------------------------------------------------------
/alphapose/models/layers/dcn/DCN.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | from . import DeformConv, ModulatedDeformConv
 9 | 
10 | 
11 | class DCN(nn.Module):
12 |     '''
13 |     Initialize: inplanes, planes, upscale_factor
14 |     OUTPUT: (planes // upscale_factor^2) * ht * wd
15 |     '''
16 | 
17 |     def __init__(self, inplanes, planes, dcn,
18 |                  kernel_size, stride=1,
19 |                  padding=0, bias=False):
20 |         super(DCN, self).__init__()
21 |         fallback_on_stride = dcn.get('FALLBACK_ON_STRIDE', False)
22 |         self.with_modulated_dcn = dcn.get('MODULATED', False)
23 |         if fallback_on_stride:
24 |             self.conv = nn.Conv2d(inplanes, planes, kernel_size=kernel_size, stride=stride,
25 |                                   padding=padding, bias=bias)
26 |         else:
27 |             self.deformable_groups = dcn.get('DEFORM_GROUP', 1)
28 |             if not self.with_modulated_dcn:
29 |                 conv_op = DeformConv
30 |                 offset_channels = 18
31 |             else:
32 |                 conv_op = ModulatedDeformConv
33 |                 offset_channels = 27
34 | 
35 |             self.conv_offset = nn.Conv2d(
36 |                 inplanes,
37 |                 self.deformable_groups * offset_channels,
38 |                 kernel_size=kernel_size,
39 |                 stride=stride,
40 |                 padding=padding)
41 |             self.conv = conv_op(
42 |                 inplanes,
43 |                 planes,
44 |                 kernel_size=kernel_size,
45 |                 stride=stride,
46 |                 padding=padding,
47 |                 deformable_groups=self.deformable_groups,
48 |                 bias=bias)
49 | 
50 |     def forward(self, x):
51 |         if self.with_modulated_dcn:
52 |             offset_mask = self.conv_offset(x)
53 |             offset = offset_mask[:, :18 * self.deformable_groups, :, :]
54 |             mask = offset_mask[:, -9 * self.deformable_groups:, :, :]
55 |             mask = mask.sigmoid()
56 |             out = self.conv(x, offset, mask)
57 |         else:
58 |             offset = self.conv_offset(x)
59 |             out = self.conv(x, offset)
60 | 
61 |         return out
62 | 


--------------------------------------------------------------------------------
/alphapose/models/layers/dcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv,
 2 |                           ModulatedDeformConvPack, deform_conv,
 3 |                           modulated_deform_conv)
 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
 5 |                           ModulatedDeformRoIPoolingPack, deform_roi_pooling)
 6 | from .DCN import DCN
 7 | 
 8 | __all__ = [
 9 |     'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',
10 |     'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
11 |     'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',
12 |     'deform_roi_pooling', 'DCN'
13 | ]
14 | 


--------------------------------------------------------------------------------
/alphapose/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .registry import Registry, build_from_cfg, retrieve_from_cfg
2 | 
3 | __all__ = [
4 |     'Registry', 'build_from_cfg', 'retrieve_from_cfg'
5 | ]
6 | 


--------------------------------------------------------------------------------
/alphapose/utils/config.py:
--------------------------------------------------------------------------------
1 | import yaml
2 | from easydict import EasyDict as edict
3 | 
4 | 
5 | def update_config(config_file):
6 |     with open(config_file) as f:
7 |         config = edict(yaml.load(f, Loader=yaml.FullLoader))
8 |         return config
9 | 


--------------------------------------------------------------------------------
/alphapose/utils/env.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import os
 7 | import torch
 8 | import torch.distributed as dist
 9 | 
10 | 
11 | def init_dist(opt):
12 |     """Initialize distributed computing environment."""
13 |     opt.ngpus_per_node = torch.cuda.device_count()
14 | 
15 |     torch.cuda.set_device(opt.gpu)
16 | 
17 |     if opt.launcher == 'pytorch':
18 |         _init_dist_pytorch(opt)
19 |     elif opt.launcher == 'mpi':
20 |         _init_dist_mpi(opt)
21 |     elif opt.launcher == 'slurm':
22 |         _init_dist_slurm(opt)
23 |     else:
24 |         raise ValueError('Invalid launcher type: {}'.format(opt.launcher))
25 | 
26 | 
27 | def _init_dist_pytorch(opt, **kwargs):
28 |     """Set up environment."""
29 |     # TODO: use local_rank instead of rank % num_gpus
30 |     opt.rank = opt.rank * opt.ngpus_per_node + opt.gpu
31 |     opt.world_size = opt.world_size
32 |     dist.init_process_group(backend=opt.dist_backend, init_method=opt.dist_url,
33 |                             world_size=opt.world_size, rank=opt.rank)
34 |     print(f"{opt.dist_url}, ws:{opt.world_size}, rank:{opt.rank}")
35 | 
36 |     if opt.rank % opt.ngpus_per_node == 0:
37 |         opt.log = True
38 |     else:
39 |         opt.log = False
40 | 
41 | 
42 | def _init_dist_slurm(opt, port=23348, **kwargs):
43 |     """Set up slurm environment."""
44 |     proc_id = int(os.environ['SLURM_PROCID'])
45 |     ntasks = int(os.environ['SLURM_NTASKS'])
46 |     node_list = os.environ['SLURM_NODELIST']
47 |     num_gpus = torch.cuda.device_count()
48 |     torch.cuda.set_device(proc_id % num_gpus)
49 |     if '[' in node_list:
50 |         beg = node_list.find('[')
51 |         pos1 = node_list.find('-', beg)
52 |         if pos1 < 0:
53 |             pos1 = 1000
54 |         pos2 = node_list.find(',', beg)
55 |         if pos2 < 0:
56 |             pos2 = 1000
57 |         node_list = node_list[:min(pos1, pos2)].replace('[', '')
58 |     addr = node_list[8:].replace('-', '.')
59 |     os.environ['MASTER_PORT'] = str(port)
60 |     os.environ['MASTER_ADDR'] = addr
61 |     os.environ['WORLD_SIZE'] = str(ntasks)
62 |     os.environ['RANK'] = str(proc_id)
63 | 
64 |     opt.ngpus_per_node = num_gpus
65 |     opt.rank = int(proc_id)
66 |     opt.rank = proc_id * num_gpus + opt.gpu
67 |     opt.world_size = int(ntasks) * num_gpus
68 | 
69 |     print(f"tcp://{node_list}:{port}, ws:{opt.world_size}, rank:{opt.rank}, proc_id:{proc_id}")
70 |     dist.init_process_group(backend=opt.dist_backend,
71 |                             init_method=f'tcp://{node_list}:{port}',
72 |                             world_size=opt.world_size,
73 |                             rank=opt.rank)
74 |     if opt.rank == 0:
75 |         opt.log = True
76 |     else:
77 |         opt.log = False
78 | 
79 | 
80 | def _init_dist_mpi(backend, **kwargs):
81 |     raise NotImplementedError
82 | 


--------------------------------------------------------------------------------
/alphapose/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | def board_writing(writer, loss, acc, iterations, dataset='Train'):
11 |     writer.add_scalar(
12 |         '{}/Loss'.format(dataset), loss, iterations)
13 |     writer.add_scalar(
14 |         '{}/acc'.format(dataset), acc, iterations)
15 | 
16 | 
17 | def debug_writing(writer, outputs, labels, inputs, iterations):
18 |     tmp_tar = torch.unsqueeze(labels.cpu().data[0], dim=1)
19 |     # tmp_out = torch.unsqueeze(outputs.cpu().data[0], dim=1)
20 | 
21 |     tmp_inp = inputs.cpu().data[0]
22 |     tmp_inp[0] += 0.406
23 |     tmp_inp[1] += 0.457
24 |     tmp_inp[2] += 0.480
25 | 
26 |     tmp_inp[0] += torch.sum(F.interpolate(tmp_tar, scale_factor=4, mode='bilinear'), dim=0)[0]
27 |     tmp_inp.clamp_(0, 1)
28 | 
29 |     writer.add_image('Data/input', tmp_inp, iterations)
30 | 


--------------------------------------------------------------------------------
/alphapose/utils/presets/__init__.py:
--------------------------------------------------------------------------------
1 | from .simple_transform import SimpleTransform
2 | from .simple_transform_3d_smpl import SimpleTransform3DSMPL
3 | 
4 | __all__ = ['SimpleTransform', 'SimpleTransform3DSMPL']
5 | 


--------------------------------------------------------------------------------
/alphapose/utils/registry.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | 
  3 | 
  4 | class Registry(object):
  5 | 
  6 |     def __init__(self, name):
  7 |         self._name = name
  8 |         self._module_dict = dict()
  9 | 
 10 |     def __repr__(self):
 11 |         format_str = self.__class__.__name__ + '(name={}, items={})'.format(
 12 |             self._name, list(self._module_dict.keys()))
 13 |         return format_str
 14 | 
 15 |     @property
 16 |     def name(self):
 17 |         return self._name
 18 | 
 19 |     @property
 20 |     def module_dict(self):
 21 |         return self._module_dict
 22 | 
 23 |     def get(self, key):
 24 |         return self._module_dict.get(key, None)
 25 | 
 26 |     def _register_module(self, module_class):
 27 |         """Register a module.
 28 | 
 29 |         Args:
 30 |             module (:obj:`nn.Module`): Module to be registered.
 31 |         """
 32 |         if not inspect.isclass(module_class):
 33 |             raise TypeError('module must be a class, but got {}'.format(
 34 |                 type(module_class)))
 35 |         module_name = module_class.__name__
 36 |         if module_name in self._module_dict:
 37 |             raise KeyError('{} is already registered in {}'.format(
 38 |                 module_name, self.name))
 39 |         self._module_dict[module_name] = module_class
 40 | 
 41 |     def register_module(self, cls):
 42 |         self._register_module(cls)
 43 |         return cls
 44 | 
 45 | 
 46 | def build_from_cfg(cfg, registry, default_args=None):
 47 |     """Build a module from config dict.
 48 | 
 49 |     Args:
 50 |         cfg (dict): Config dict. It should at least contain the key "type".
 51 |         registry (:obj:`Registry`): The registry to search the type from.
 52 |         default_args (dict, optional): Default initialization arguments.
 53 | 
 54 |     Returns:
 55 |         obj: The constructed object.
 56 |     """
 57 |     assert isinstance(cfg, dict) and 'TYPE' in cfg
 58 |     assert isinstance(default_args, dict) or default_args is None
 59 |     args = cfg.copy()
 60 |     obj_type = args.pop('TYPE')
 61 | 
 62 |     if isinstance(obj_type, str):
 63 |         obj_cls = registry.get(obj_type)
 64 |         if obj_cls is None:
 65 |             raise KeyError('{} is not in the {} registry'.format(
 66 |                 obj_type, registry.name))
 67 |     elif inspect.isclass(obj_type):
 68 |         obj_cls = obj_type
 69 |     else:
 70 |         raise TypeError('type must be a str or valid type, but got {}'.format(
 71 |             type(obj_type)))
 72 |     if default_args is not None:
 73 |         for name, value in default_args.items():
 74 |             args.setdefault(name, value)
 75 |     return obj_cls(**args)
 76 | 
 77 | 
 78 | def retrieve_from_cfg(cfg, registry):
 79 |     """Retrieve a module class from config dict.
 80 | 
 81 |     Args:
 82 |         cfg (dict): Config dict. It should at least contain the key "type".
 83 |         registry (:obj:`Registry`): The registry to search the type from.
 84 | 
 85 |     Returns:
 86 |         class: The class.
 87 |     """
 88 |     assert isinstance(cfg, dict) and 'TYPE' in cfg
 89 |     args = cfg.copy()
 90 |     obj_type = args.pop('TYPE')
 91 | 
 92 |     if isinstance(obj_type, str):
 93 |         obj_cls = registry.get(obj_type)
 94 |         if obj_cls is None:
 95 |             raise KeyError('{} is not in the {} registry'.format(
 96 |                 obj_type, registry.name))
 97 |     elif inspect.isclass(obj_type):
 98 |         obj_cls = obj_type
 99 |     else:
100 |         raise TypeError('type must be a str or valid type, but got {}'.format(
101 |             type(obj_type)))
102 | 
103 |     return obj_cls
104 | 


--------------------------------------------------------------------------------
/alphapose/utils/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_align import roi_align, RoIAlign
2 | 
3 | __all__ = ['roi_align', 'RoIAlign']
4 | 


--------------------------------------------------------------------------------
/alphapose/utils/roi_align/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | from torch.nn.modules.utils import _pair
 5 | 
 6 | from . import roi_align_cuda
 7 | 
 8 | 
 9 | class RoIAlignFunction(Function):
10 | 
11 |     @staticmethod
12 |     def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
13 |         out_h, out_w = _pair(out_size)
14 |         assert isinstance(out_h, int) and isinstance(out_w, int)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.sample_num = sample_num
17 |         ctx.save_for_backward(rois)
18 |         ctx.feature_size = features.size()
19 | 
20 |         batch_size, num_channels, data_height, data_width = features.size()
21 |         num_rois = rois.size(0)
22 | 
23 |         output = features.new_zeros(num_rois, num_channels, out_h, out_w)
24 |         if features.is_cuda:
25 |             roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,
26 |                                    sample_num, output)
27 |         else:
28 |             raise NotImplementedError
29 | 
30 |         return output
31 | 
32 |     @staticmethod
33 |     @once_differentiable
34 |     def backward(ctx, grad_output):
35 |         feature_size = ctx.feature_size
36 |         spatial_scale = ctx.spatial_scale
37 |         sample_num = ctx.sample_num
38 |         rois = ctx.saved_tensors[0]
39 |         assert (feature_size is not None and grad_output.is_cuda)
40 | 
41 |         batch_size, num_channels, data_height, data_width = feature_size
42 |         out_w = grad_output.size(3)
43 |         out_h = grad_output.size(2)
44 | 
45 |         grad_input = grad_rois = None
46 |         if ctx.needs_input_grad[0]:
47 |             grad_input = rois.new_zeros(batch_size, num_channels, data_height,
48 |                                         data_width)
49 |             roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
50 |                                     out_w, spatial_scale, sample_num,
51 |                                     grad_input)
52 | 
53 |         return grad_input, grad_rois, None, None, None
54 | 
55 | 
56 | roi_align = RoIAlignFunction.apply
57 | 
58 | 
59 | class RoIAlign(nn.Module):
60 | 
61 |     def __init__(self,
62 |                  out_size,
63 |                  spatial_scale=1,
64 |                  sample_num=0,
65 |                  use_torchvision=False):
66 |         super(RoIAlign, self).__init__()
67 | 
68 |         self.out_size = out_size
69 |         self.spatial_scale = float(spatial_scale)
70 |         self.sample_num = int(sample_num)
71 |         self.use_torchvision = use_torchvision
72 | 
73 |     def forward(self, features, rois):
74 |         if self.use_torchvision:
75 |             from torchvision.ops import roi_align as tv_roi_align
76 |             return tv_roi_align(features, rois, _pair(self.out_size),
77 |                                 self.spatial_scale, self.sample_num)
78 |         else:
79 |             return roi_align(features, rois, self.out_size, self.spatial_scale,
80 |                              self.sample_num)
81 | 
82 |     def __repr__(self):
83 |         format_str = self.__class__.__name__
84 |         format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format(
85 |             self.out_size, self.spatial_scale, self.sample_num)
86 |         format_str += ', use_torchvision={})'.format(self.use_torchvision)
87 |         return format_str
88 | 


--------------------------------------------------------------------------------
/alphapose/utils/roi_align/src/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                            const float spatial_scale, const int sample_num,
 8 |                            const int channels, const int height,
 9 |                            const int width, const int num_rois,
10 |                            const int pooled_height, const int pooled_width,
11 |                            at::Tensor output);
12 | 
13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
14 |                             const float spatial_scale, const int sample_num,
15 |                             const int channels, const int height,
16 |                             const int width, const int num_rois,
17 |                             const int pooled_height, const int pooled_width,
18 |                             at::Tensor bottom_grad);
19 | 
20 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 |   TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 |   CHECK_CUDA(x);       \
25 |   CHECK_CONTIGUOUS(x)
26 | 
27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
28 |                            int pooled_height, int pooled_width,
29 |                            float spatial_scale, int sample_num,
30 |                            at::Tensor output) {
31 |   CHECK_INPUT(features);
32 |   CHECK_INPUT(rois);
33 |   CHECK_INPUT(output);
34 | 
35 |   // Number of ROIs
36 |   int num_rois = rois.size(0);
37 |   int size_rois = rois.size(1);
38 | 
39 |   if (size_rois != 5) {
40 |     printf("wrong roi size\n");
41 |     return 0;
42 |   }
43 | 
44 |   int num_channels = features.size(1);
45 |   int data_height = features.size(2);
46 |   int data_width = features.size(3);
47 | 
48 |   ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num,
49 |                          num_channels, data_height, data_width, num_rois,
50 |                          pooled_height, pooled_width, output);
51 | 
52 |   return 1;
53 | }
54 | 
55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
56 |                             int pooled_height, int pooled_width,
57 |                             float spatial_scale, int sample_num,
58 |                             at::Tensor bottom_grad) {
59 |   CHECK_INPUT(top_grad);
60 |   CHECK_INPUT(rois);
61 |   CHECK_INPUT(bottom_grad);
62 | 
63 |   // Number of ROIs
64 |   int num_rois = rois.size(0);
65 |   int size_rois = rois.size(1);
66 |   if (size_rois != 5) {
67 |     printf("wrong roi size\n");
68 |     return 0;
69 |   }
70 | 
71 |   int num_channels = bottom_grad.size(1);
72 |   int data_height = bottom_grad.size(2);
73 |   int data_width = bottom_grad.size(3);
74 | 
75 |   ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num,
76 |                           num_channels, data_height, data_width, num_rois,
77 |                           pooled_height, pooled_width, bottom_grad);
78 | 
79 |   return 1;
80 | }
81 | 
82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
83 |   m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
84 |   m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
85 | }
86 | 


--------------------------------------------------------------------------------
/alphapose/version.py:
--------------------------------------------------------------------------------
1 | # GENERATED VERSION FILE
2 | # TIME: Thu Jul 21 17:10:51 2022
3 | 
4 | __version__ = '0.6.0+29ace8c'
5 | short_version = '0.6.0'
6 | 


--------------------------------------------------------------------------------
/configs/coco/hardnet/256x192_hard68_lr1e-3_1x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'HarDNetPose'
36 |   INIT_WEIGHTS: ''
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   FINAL_CONV_KERNEL: 1
40 |   NUM_LAYERS: 68
41 |   DOWN_RATIO: 4
42 |   TRT: False
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   DPG_MILESTONE: 140
63 |   DPG_STEP:
64 |   - 160
65 |   - 190
66 | 


--------------------------------------------------------------------------------
/configs/coco/hardnet/256x192_hard85_lr1e-3_1x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'HarDNetPose'
36 |   INIT_WEIGHTS: ''
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   FINAL_CONV_KERNEL: 1
40 |   NUM_LAYERS: 85
41 |   DOWN_RATIO: 4
42 |   TRT: False
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   DPG_MILESTONE: 140
63 |   DPG_STEP:
64 |   - 160
65 |   - 190
66 | 


--------------------------------------------------------------------------------
/configs/coco/hrnet/256x192_w32_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'PoseHighResolutionNet'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_LAYERS: 50
39 |   FINAL_CONV_KERNEL: 1
40 |   PRETRAINED_LAYERS: ['*']
41 |   STAGE2:
42 |     NUM_MODULES: 1
43 |     NUM_BRANCHES: 2
44 |     NUM_BLOCKS: [4, 4]
45 |     NUM_CHANNELS: [32, 64]
46 |     BLOCK: 'BASIC'
47 |     FUSE_METHOD: 'SUM'
48 |   STAGE3:
49 |     NUM_MODULES: 4
50 |     NUM_BRANCHES: 3
51 |     NUM_BLOCKS: [4, 4, 4]
52 |     NUM_CHANNELS: [32, 64, 128]
53 |     BLOCK: 'BASIC'
54 |     FUSE_METHOD: 'SUM'
55 |   STAGE4:
56 |     NUM_MODULES: 3
57 |     NUM_BRANCHES: 4
58 |     NUM_BLOCKS: [4, 4, 4, 4]
59 |     NUM_CHANNELS: [32, 64, 128, 256]
60 |     BLOCK: 'BASIC'
61 |     FUSE_METHOD: 'SUM'
62 | LOSS:
63 |   TYPE: 'MSELoss'
64 | DETECTOR:
65 |   NAME: 'yolo'
66 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
67 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
68 |   NMS_THRES: 0.6
69 |   CONFIDENCE: 0.05
70 | TRAIN:
71 |   WORLD_SIZE: 4
72 |   BATCH_SIZE: 32
73 |   BEGIN_EPOCH: 0
74 |   END_EPOCH: 270
75 |   OPTIMIZER: 'adam'
76 |   LR: 0.001
77 |   LR_FACTOR: 0.1
78 |   LR_STEP:
79 |   - 170
80 |   - 200
81 |   DPG_MILESTONE: 210
82 |   DPG_STEP:
83 |   - 230
84 |   - 250


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res152_lr1e-3_1x-duc.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose_DUC'
36 |   BACKBONE: 'se-resnet'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 152
44 |   FINAL_CONV_KERNEL: 1
45 |   STAGE1:
46 |     NUM_CONV: 4
47 |   STAGE2:
48 |     NUM_CONV: 2
49 |   STAGE3:
50 |     NUM_CONV: 1
51 | LOSS:
52 |   TYPE: 'MSELoss'
53 | DETECTOR:
54 |   NAME: 'yolo'
55 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
56 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
57 |   NMS_THRES: 0.6
58 |   CONFIDENCE: 0.05
59 | TRAIN:
60 |   WORLD_SIZE: 4
61 |   BATCH_SIZE: 32
62 |   BEGIN_EPOCH: 0
63 |   END_EPOCH: 200
64 |   OPTIMIZER: 'adam'
65 |   LR: 0.001
66 |   LR_FACTOR: 0.1
67 |   LR_STEP:
68 |   - 90
69 |   - 120
70 |   DPG_MILESTONE: 140
71 |   DPG_STEP:
72 |   - 160
73 |   - 190
74 | 


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_1x-concat.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'ConcatDataset'
 4 |     SET_LIST:
 5 |       - TYPE: 'Mscoco'
 6 |         MASK_ID: 0
 7 |         ROOT: './data/coco/'
 8 |         IMG_PREFIX: 'train2017'
 9 |         ANN: 'annotations/person_keypoints_train2017.json'
10 |         AUG:
11 |           FLIP: true
12 |           ROT_FACTOR: 40
13 |           SCALE_FACTOR: 0.3
14 |           NUM_JOINTS_HALF_BODY: 8
15 |           PROB_HALF_BODY: -1
16 |       - TYPE: 'Mpii'
17 |         MASK_ID: 17
18 |         ROOT: './data/mpii/'
19 |         IMG_PREFIX: 'images'
20 |         ANN: 'annot_mpii.json'
21 |         AUG:
22 |           FLIP: true
23 |           ROT_FACTOR: 40
24 |           SCALE_FACTOR: 0.3
25 |           NUM_JOINTS_HALF_BODY: 8
26 |           PROB_HALF_BODY: -1
27 |   VAL:
28 |     TYPE: 'Mscoco'
29 |     ROOT: './data/coco/'
30 |     IMG_PREFIX: 'val2017'
31 |     ANN: 'annotations/person_keypoints_val2017.json'
32 |   TEST:
33 |     TYPE: 'Mscoco_det'
34 |     ROOT: './data/coco/'
35 |     IMG_PREFIX: 'val2017'
36 |     DET_FILE: './exp/json/test_det_yolo.json'
37 |     ANN: 'annotations/person_keypoints_val2017.json'
38 | DATA_PRESET:
39 |   TYPE: 'simple'
40 |   SIGMA: 2
41 |   NUM_JOINTS: 33
42 |   IMAGE_SIZE:
43 |   - 256
44 |   - 192
45 |   HEATMAP_SIZE:
46 |   - 64
47 |   - 48
48 | MODEL:
49 |   TYPE: 'FastPose'
50 |   PRETRAINED: ''
51 |   TRY_LOAD: ''
52 |   NUM_DECONV_FILTERS:
53 |   - 256
54 |   - 256
55 |   - 256
56 |   NUM_LAYERS: 50
57 | LOSS:
58 |   TYPE: 'MSELoss'
59 | DETECTOR:
60 |   NAME: 'yolo'
61 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
62 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
63 |   NMS_THRES: 0.6
64 |   CONFIDENCE: 0.05
65 | TRAIN:
66 |   WORLD_SIZE: 4
67 |   BATCH_SIZE: 32
68 |   BEGIN_EPOCH: 0
69 |   END_EPOCH: 200
70 |   OPTIMIZER: 'adam'
71 |   LR: 0.001
72 |   LR_FACTOR: 0.1
73 |   LR_STEP:
74 |   - 90
75 |   - 120
76 |   DPG_MILESTONE: 140
77 |   DPG_STEP:
78 |   - 160
79 |   - 190


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_1x-duc.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose_DUC'
36 |   BACKBONE: 'shuffle'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 |   FINAL_CONV_KERNEL: 1
45 |   STAGE1:
46 |     NUM_CONV: 4
47 |   STAGE2:
48 |     NUM_CONV: 2
49 |   STAGE3:
50 |     NUM_CONV: 1
51 | LOSS:
52 |   TYPE: 'MSELoss'
53 | DETECTOR:
54 |   NAME: 'yolo'
55 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
56 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
57 |   NMS_THRES: 0.6
58 |   CONFIDENCE: 0.05
59 | TRAIN:
60 |   WORLD_SIZE: 4
61 |   BATCH_SIZE: 32
62 |   BEGIN_EPOCH: 0
63 |   END_EPOCH: 200
64 |   OPTIMIZER: 'adam'
65 |   LR: 0.001
66 |   LR_FACTOR: 0.1
67 |   LR_STEP:
68 |   - 90
69 |   - 120
70 |   DPG_MILESTONE: 140
71 |   DPG_STEP:
72 |   - 160
73 |   - 190


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_1x-simple.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'SimplePose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.1
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   DPG_MILESTONE: 140
63 |   DPG_STEP:
64 |   - 160
65 |   - 190


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_1x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   DPG_MILESTONE: 140
63 |   DPG_STEP:
64 |   - 160
65 |   - 190
66 | 


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_2x-dcn.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 |   DCN:
44 |     MODULATED: false
45 |     DEFORM_GROUP: 1
46 |     FALLBACK_ON_STRIDE: false
47 |   STAGE_WITH_DCN:
48 |   - false
49 |   - true
50 |   - true
51 |   - true
52 | LOSS:
53 |   TYPE: 'MSELoss'
54 | DETECTOR:
55 |   NAME: 'yolo'
56 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
57 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
58 |   NMS_THRES: 0.6
59 |   CONFIDENCE: 0.05
60 | TRAIN:
61 |   WORLD_SIZE: 4
62 |   BATCH_SIZE: 32
63 |   BEGIN_EPOCH: 0
64 |   END_EPOCH: 270
65 |   OPTIMIZER: 'adam'
66 |   LR: 0.001
67 |   LR_FACTOR: 0.1
68 |   LR_STEP:
69 |   - 170
70 |   - 200
71 |   DPG_MILESTONE: 210
72 |   DPG_STEP:
73 |   - 230
74 |   - 250


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_2x-regression.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'L1JointRegression'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 17
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 | LOSS:
45 |   TYPE: 'L1JointRegression'
46 |   NORM_TYPE: 'sigmoid'
47 |   OUTPUT_3D: False
48 | DETECTOR:
49 |   NAME: 'yolo'
50 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
51 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
52 |   NMS_THRES: 0.6
53 |   CONFIDENCE: 0.05
54 | TRAIN:
55 |   WORLD_SIZE: 4
56 |   BATCH_SIZE: 32
57 |   BEGIN_EPOCH: 0
58 |   END_EPOCH: 270
59 |   OPTIMIZER: 'adam'
60 |   LR: 0.001
61 |   LR_FACTOR: 0.1
62 |   LR_STEP:
63 |   - 170
64 |   - 200
65 |   DPG_MILESTONE: 210
66 |   DPG_STEP:
67 |   - 230
68 |   - 250


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_2x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 270
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 170
61 |   - 200
62 |   DPG_MILESTONE: 210
63 |   DPG_STEP:
64 |   - 230
65 |   - 250


--------------------------------------------------------------------------------
/configs/coco_wholebody/resnet/256x192_res152_lr1e-3_1x-duc.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'coco_wholebody'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/coco_wholebody_train_v1.0.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'coco_wholebody'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/coco_wholebody_val_v1.0.json'
18 |   TEST:
19 |     TYPE: 'coco_wholebody_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/coco_wholebody_val_v1.0.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'L1JointRegression'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 133
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   BACKBONE: 'se-resnet'
38 |   PRETRAINED: ''
39 |   TRY_LOAD: ''
40 |   NUM_DECONV_FILTERS:
41 |   - 256
42 |   - 256
43 |   - 256
44 |   NUM_LAYERS: 152
45 |   CONV_DIM: 256
46 |   FINAL_CONV_KERNEL: 1
47 |   STAGE1:
48 |     NUM_CONV: 4
49 |   STAGE2:
50 |     NUM_CONV: 2
51 |   STAGE3:
52 |     NUM_CONV: 1
53 | LOSS:
54 |   TYPE: 'L1JointRegression'
55 |   NORM_TYPE: 'sigmoid'
56 |   OUTPUT_3D: False
57 | DETECTOR:
58 |   NAME: 'yolo'
59 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
60 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
61 |   NMS_THRES: 0.6
62 |   CONFIDENCE: 0.05
63 | TRAIN:
64 |   WORLD_SIZE: 4
65 |   BATCH_SIZE: 32
66 |   BEGIN_EPOCH: 0
67 |   END_EPOCH: 270
68 |   OPTIMIZER: 'adam'
69 |   LR: 0.001
70 |   LR_FACTOR: 0.1
71 |   LR_STEP:
72 |   - 170
73 |   - 200
74 |   DPG_MILESTONE: 210
75 |   DPG_STEP:
76 |   - 230
77 |   - 250
78 | 


--------------------------------------------------------------------------------
/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-combined.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'coco_wholebody'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/coco_wholebody_train_v1.0.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'coco_wholebody'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/coco_wholebody_val_v1.0.json'
18 |   TEST:
19 |     TYPE: 'coco_wholebody_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/coco_wholebody_val_v1.0.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'Combined'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 133
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 |   CONV_DIM: 256
45 | LOSS:
46 |   TYPE: 'Combined'
47 |   NORM_TYPE: 'sigmoid'
48 |   OUTPUT_3D: False
49 |   LOSS_1:
50 |     TYPE: 'MSELoss'
51 |   LOSS_2:
52 |     TYPE: 'L1JointRegression'
53 |     NORM_TYPE: 'sigmoid'
54 |     OUTPUT_3D: False
55 | DETECTOR:
56 |   NAME: 'yolo'
57 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
58 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
59 |   NMS_THRES: 0.6
60 |   CONFIDENCE: 0.05
61 | TRAIN:
62 |   WORLD_SIZE: 4
63 |   BATCH_SIZE: 32
64 |   BEGIN_EPOCH: 0
65 |   END_EPOCH: 270
66 |   OPTIMIZER: 'adam'
67 |   LR: 0.001
68 |   LR_FACTOR: 0.1
69 |   LR_STEP:
70 |   - 170
71 |   - 200
72 |   DPG_MILESTONE: 210
73 |   DPG_STEP:
74 |   - 230
75 |   - 250
76 | 


--------------------------------------------------------------------------------
/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'coco_wholebody'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/coco_wholebody_train_v1.0.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'coco_wholebody'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/coco_wholebody_val_v1.0.json'
18 |   TEST:
19 |     TYPE: 'coco_wholebody_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/coco_wholebody_val_v1.0.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'Combined'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 133
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   CONV_DIM: 256
44 |   NUM_LAYERS: 50
45 |   DCN:
46 |     MODULATED: false
47 |     DEFORM_GROUP: 1
48 |     FALLBACK_ON_STRIDE: false
49 |   STAGE_WITH_DCN:
50 |   - false
51 |   - true
52 |   - true
53 |   - true
54 | LOSS:
55 |   TYPE: 'Combined'
56 |   NORM_TYPE: 'sigmoid'
57 |   OUTPUT_3D: False
58 |   LOSS_1:
59 |     TYPE: 'MSELoss'
60 |   LOSS_2:
61 |     TYPE: 'L1JointRegression'
62 |     NORM_TYPE: 'sigmoid'
63 |     OUTPUT_3D: False
64 | DETECTOR:
65 |   NAME: 'yolo'
66 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
67 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
68 |   NMS_THRES: 0.6
69 |   CONFIDENCE: 0.05
70 | TRAIN:
71 |   WORLD_SIZE: 4
72 |   BATCH_SIZE: 32
73 |   BEGIN_EPOCH: 0
74 |   END_EPOCH: 320
75 |   OPTIMIZER: 'adam'
76 |   LR: 0.001
77 |   LR_FACTOR: 0.1
78 |   LR_STEP:
79 |   - 190
80 |   - 220
81 |   DPG_MILESTONE: 230
82 |   DPG_STEP:
83 |   - 260
84 |   - 280
85 | 


--------------------------------------------------------------------------------
/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'coco_wholebody'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/coco_wholebody_train_v1.0.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'coco_wholebody'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/coco_wholebody_val_v1.0.json'
18 |   TEST:
19 |     TYPE: 'coco_wholebody_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/coco_wholebody_val_v1.0.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'L1JointRegression'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 133
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 |   CONV_DIM: 256
45 |   DCN:
46 |     MODULATED: false
47 |     DEFORM_GROUP: 1
48 |     FALLBACK_ON_STRIDE: false
49 |   STAGE_WITH_DCN:
50 |   - false
51 |   - true
52 |   - true
53 |   - true
54 | LOSS:
55 |   TYPE: 'L1JointRegression'
56 |   NORM_TYPE: 'sigmoid'
57 |   OUTPUT_3D: False
58 | DETECTOR:
59 |   NAME: 'yolo'
60 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
61 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
62 |   NMS_THRES: 0.6
63 |   CONFIDENCE: 0.05
64 | TRAIN:
65 |   WORLD_SIZE: 4
66 |   BATCH_SIZE: 48
67 |   BEGIN_EPOCH: 0
68 |   END_EPOCH: 270
69 |   OPTIMIZER: 'adam'
70 |   LR: 0.001
71 |   LR_FACTOR: 0.1
72 |   LR_STEP:
73 |   - 170
74 |   - 200
75 |   DPG_MILESTONE: 210
76 |   DPG_STEP:
77 |   - 230
78 |   - 250
79 | 


--------------------------------------------------------------------------------
/configs/coco_wholebody/resnet/256x192_res50_lr1e-3_2x-regression.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'coco_wholebody'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/coco_wholebody_train_v1.0.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'coco_wholebody'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/coco_wholebody_val_v1.0.json'
18 |   TEST:
19 |     TYPE: 'coco_wholebody_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/coco_wholebody_val_v1.0.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'L1JointRegression'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 133
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 | LOSS:
45 |   TYPE: 'L1JointRegression'
46 |   NORM_TYPE: 'sigmoid'
47 |   OUTPUT_3D: False
48 | DETECTOR:
49 |   NAME: 'yolo'
50 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
51 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
52 |   NMS_THRES: 0.6
53 |   CONFIDENCE: 0.05
54 | TRAIN:
55 |   WORLD_SIZE: 4
56 |   BATCH_SIZE: 48
57 |   BEGIN_EPOCH: 5
58 |   END_EPOCH: 270
59 |   OPTIMIZER: 'adam'
60 |   LR: 0.001
61 |   LR_FACTOR: 0.1
62 |   LR_STEP:
63 |   - 170
64 |   - 200
65 |   DPG_MILESTONE: 210
66 |   DPG_STEP:
67 |   - 230
68 |   - 250
69 | 


--------------------------------------------------------------------------------
/configs/dense_coco/resnet50/256x192_adam_lr1e-3-duc-dcn_1x_crop.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'densecoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017_dense.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 16
12 |       PROB_HALF_BODY: -1  
13 |   VAL:
14 |     TYPE: 'densecoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017_dense.json'
18 |   TEST:
19 |     TYPE: 'densecoco'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'test2017'
22 |     ANN: 'annotations/person_keypoints_val2017_dense.json'
23 | DATA_PRESET:
24 |   TYPE: 'simple'
25 |   SIGMA: 2
26 |   NUM_JOINTS: 17
27 |   NUM_JOINTS_DENSE: 49
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:  
35 |   TYPE: 'FastPose_DUC_Dense'
36 |   BACKBONE: 'se-resnet'  
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 |   FINAL_CONV_KERNEL: 1
45 |   STAGE1:
46 |     NUM_CONV: 4
47 |   STAGE2:
48 |     NUM_CONV: 2
49 |   STAGE3:
50 |     NUM_CONV: 1 
51 |   DCN:
52 |     MODULATED: false
53 |     DEFORM_GROUP: 1
54 |     FALLBACK_ON_STRIDE: false
55 |   STAGE_WITH_DCN:
56 |   - false
57 |   - true
58 |   - true
59 |   - true
60 | LOSS:
61 |   TYPE: 'MSELoss'
62 | DETECTOR:
63 |   NAME: 'yolo'
64 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
65 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
66 |   NMS_THRES: 0.6
67 |   CONFIDENCE: 0.05
68 | TRAIN:
69 |   WORLD_SIZE: 4
70 |   BATCH_SIZE: 32
71 |   BEGIN_EPOCH: 0
72 |   END_EPOCH: 200
73 |   OPTIMIZER: 'adam'
74 |   LR: 0.001
75 |   LR_FACTOR: 0.1
76 |   LR_STEP:
77 |   - 90
78 |   - 120
79 |   DPG_MILESTONE: 140
80 |   DPG_STEP:
81 |   - 160
82 |   - 190


--------------------------------------------------------------------------------
/configs/halpe_136/resnet/256x192_res152_lr1e-3_1x-duc.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_136'
 4 |     ROOT: './data/halpe/'
 5 |     IMG_PREFIX: 'images/train2015'
 6 |     ANN: 'annotations/halpe_train_v1.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Halpe_136'
15 |     ROOT: './data/halpe/'
16 |     IMG_PREFIX: 'images/val2017'
17 |     ANN: 'annotations/halpe_val_v1.json'
18 |   TEST:
19 |     TYPE: 'Halpe_136_det'
20 |     ROOT: './data/halpe/'
21 |     IMG_PREFIX: 'images/val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'L1JointRegression'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 136
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   BACKBONE: 'se-resnet'
38 |   PRETRAINED: ''
39 |   TRY_LOAD: ''
40 |   NUM_DECONV_FILTERS:
41 |   - 256
42 |   - 256
43 |   - 256
44 |   NUM_LAYERS: 152
45 |   CONV_DIM: 256
46 |   FINAL_CONV_KERNEL: 1
47 |   STAGE1:
48 |     NUM_CONV: 4
49 |   STAGE2:
50 |     NUM_CONV: 2
51 |   STAGE3:
52 |     NUM_CONV: 1
53 | LOSS:
54 |   TYPE: 'L1JointRegression'
55 |   NORM_TYPE: 'sigmoid'
56 |   OUTPUT_3D: False
57 | DETECTOR:
58 |   NAME: 'yolo'
59 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
60 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
61 |   NMS_THRES: 0.6
62 |   CONFIDENCE: 0.05
63 | TRAIN:
64 |   WORLD_SIZE: 4
65 |   BATCH_SIZE: 32
66 |   BEGIN_EPOCH: 0
67 |   END_EPOCH: 270
68 |   OPTIMIZER: 'adam'
69 |   LR: 0.001
70 |   LR_FACTOR: 0.1
71 |   LR_STEP:
72 |   - 170
73 |   - 200
74 |   DPG_MILESTONE: 210
75 |   DPG_STEP:
76 |   - 230
77 |   - 250
78 | 


--------------------------------------------------------------------------------
/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_136'
 4 |     ROOT: './data/halpe/'
 5 |     IMG_PREFIX: 'images/train2015'
 6 |     ANN: 'annotations/halpe_train_v1.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Halpe_136'
15 |     ROOT: './data/halpe/'
16 |     IMG_PREFIX: 'images/val2017'
17 |     ANN: 'annotations/halpe_val_v1.json'
18 |   TEST:
19 |     TYPE: 'Halpe_136_det'
20 |     ROOT: './data/halpe/'
21 |     IMG_PREFIX: 'images/val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'Combined'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 136
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   CONV_DIM: 256
44 |   NUM_LAYERS: 50
45 |   DCN:
46 |     MODULATED: false
47 |     DEFORM_GROUP: 1
48 |     FALLBACK_ON_STRIDE: false
49 |   STAGE_WITH_DCN:
50 |   - false
51 |   - true
52 |   - true
53 |   - true
54 | LOSS:
55 |   TYPE: 'Combined'
56 |   NORM_TYPE: 'sigmoid'
57 |   OUTPUT_3D: False
58 |   LOSS_1:
59 |     TYPE: 'MSELoss'
60 |   LOSS_2:
61 |     TYPE: 'L1JointRegression'
62 |     NORM_TYPE: 'sigmoid'
63 |     OUTPUT_3D: False
64 | DETECTOR:
65 |   NAME: 'yolo'
66 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
67 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
68 |   NMS_THRES: 0.6
69 |   CONFIDENCE: 0.05
70 | TRAIN:
71 |   WORLD_SIZE: 4
72 |   BATCH_SIZE: 48
73 |   BEGIN_EPOCH: 0
74 |   END_EPOCH: 320
75 |   OPTIMIZER: 'adam'
76 |   LR: 0.001
77 |   LR_FACTOR: 0.1
78 |   LR_STEP:
79 |   - 190
80 |   - 220
81 |   DPG_MILESTONE: 230
82 |   DPG_STEP:
83 |   - 260
84 |   - 280
85 | 


--------------------------------------------------------------------------------
/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_136'
 4 |     ROOT: './data/halpe/'
 5 |     IMG_PREFIX: 'images/train2015'
 6 |     ANN: 'annotations/halpe_train_v1.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Halpe_136'
15 |     ROOT: './data/halpe/'
16 |     IMG_PREFIX: 'images/val2017'
17 |     ANN: 'annotations/halpe_val_v1.json'
18 |   TEST:
19 |     TYPE: 'Halpe_136_det'
20 |     ROOT: './data/halpe/'
21 |     IMG_PREFIX: 'images/val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'L1JointRegression'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 136
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 |   CONV_DIM: 256
45 |   DCN:
46 |     MODULATED: false
47 |     DEFORM_GROUP: 1
48 |     FALLBACK_ON_STRIDE: false
49 |   STAGE_WITH_DCN:
50 |   - false
51 |   - true
52 |   - true
53 |   - true
54 | LOSS:
55 |   TYPE: 'L1JointRegression'
56 |   NORM_TYPE: 'sigmoid'
57 |   OUTPUT_3D: False
58 | DETECTOR:
59 |   NAME: 'yolo'
60 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
61 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
62 |   NMS_THRES: 0.6
63 |   CONFIDENCE: 0.05
64 | TRAIN:
65 |   WORLD_SIZE: 4
66 |   BATCH_SIZE: 48
67 |   BEGIN_EPOCH: 0
68 |   END_EPOCH: 270
69 |   OPTIMIZER: 'adam'
70 |   LR: 0.001
71 |   LR_FACTOR: 0.1
72 |   LR_STEP:
73 |   - 170
74 |   - 200
75 |   DPG_MILESTONE: 210
76 |   DPG_STEP:
77 |   - 230
78 |   - 250
79 | 


--------------------------------------------------------------------------------
/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_136'
 4 |     ROOT: './data/halpe/'
 5 |     IMG_PREFIX: 'images/train2015'
 6 |     ANN: 'annotations/halpe_train_v1.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Halpe_136'
15 |     ROOT: './data/halpe/'
16 |     IMG_PREFIX: 'images/val2017'
17 |     ANN: 'annotations/halpe_val_v1.json'
18 |   TEST:
19 |     TYPE: 'Halpe_136_det'
20 |     ROOT: './data/halpe/'
21 |     IMG_PREFIX: 'images/val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'L1JointRegression'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 136
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 |   CONV_DIM: 256
45 | LOSS:
46 |   TYPE: 'L1JointRegression'
47 |   NORM_TYPE: 'sigmoid'
48 |   OUTPUT_3D: False
49 | DETECTOR:
50 |   NAME: 'yolo'
51 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
52 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
53 |   NMS_THRES: 0.6
54 |   CONFIDENCE: 0.05
55 | TRAIN:
56 |   WORLD_SIZE: 4
57 |   BATCH_SIZE: 48
58 |   BEGIN_EPOCH: 5
59 |   END_EPOCH: 270
60 |   OPTIMIZER: 'adam'
61 |   LR: 0.001
62 |   LR_FACTOR: 0.1
63 |   LR_STEP:
64 |   - 170
65 |   - 200
66 |   DPG_MILESTONE: 210
67 |   DPG_STEP:
68 |   - 230
69 |   - 250
70 | 


--------------------------------------------------------------------------------
/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_136'
 4 |     ROOT: './data/halpe/'
 5 |     IMG_PREFIX: 'images/train2015'
 6 |     ANN: 'annotations/halpe_train_v1.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Halpe_136'
15 |     ROOT: './data/halpe/'
16 |     IMG_PREFIX: 'images/val2017'
17 |     ANN: 'annotations/halpe_val_v1.json'
18 |   TEST:
19 |     TYPE: 'Halpe_136_det'
20 |     ROOT: './data/halpe/'
21 |     IMG_PREFIX: 'images/val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 136
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 |   CONV_DIM: 256
44 | LOSS:
45 |   TYPE: 'MSELoss'
46 | DETECTOR:
47 |   NAME: 'yolo'
48 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
49 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
50 |   NMS_THRES: 0.6
51 |   CONFIDENCE: 0.05
52 | TRAIN:
53 |   WORLD_SIZE: 4
54 |   BATCH_SIZE: 32
55 |   BEGIN_EPOCH: 0
56 |   END_EPOCH: 270
57 |   OPTIMIZER: 'adam'
58 |   LR: 0.001
59 |   LR_FACTOR: 0.1
60 |   LR_STEP:
61 |   - 170
62 |   - 200
63 |   DPG_MILESTONE: 210
64 |   DPG_STEP:
65 |   - 230
66 |   - 250
67 | 


--------------------------------------------------------------------------------
/configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_26'
 4 |     ROOT: './data/halpe/'
 5 |     IMG_PREFIX: 'images/train2015'
 6 |     ANN: 'annotations/halpe_train_v1.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 11
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Halpe_26'
15 |     ROOT: './data/halpe/'
16 |     IMG_PREFIX: 'images/val2017'
17 |     ANN: 'annotations/halpe_val_v1.json'
18 |   TEST:
19 |     TYPE: 'Halpe_26_det'
20 |     ROOT: './data/halpe/'
21 |     IMG_PREFIX: 'images/val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 26
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 48
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 50
61 |   - 70
62 |   DPG_MILESTONE: 90
63 |   DPG_STEP:
64 |   - 110
65 |   - 130
66 | 


--------------------------------------------------------------------------------
/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_26'
 4 |     ROOT: './data/halpe/'
 5 |     IMG_PREFIX: 'images/train2015'
 6 |     ANN: 'annotations/halpe_train_v1.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Halpe_26'
15 |     ROOT: './data/halpe/'
16 |     IMG_PREFIX: 'images/val2017'
17 |     ANN: 'annotations/halpe_val_v1.json'
18 |   TEST:
19 |     TYPE: 'Halpe_26_det'
20 |     ROOT: './data/halpe/'
21 |     IMG_PREFIX: 'images/val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'L1JointRegression'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 26
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 |   CONV_DIM: 256
45 |   DCN:
46 |     MODULATED: false
47 |     DEFORM_GROUP: 1
48 |     FALLBACK_ON_STRIDE: false
49 |   STAGE_WITH_DCN:
50 |   - false
51 |   - true
52 |   - true
53 |   - true
54 | LOSS:
55 |   TYPE: 'L1JointRegression'
56 |   NORM_TYPE: 'sigmoid'
57 |   OUTPUT_3D: False
58 | DETECTOR:
59 |   NAME: 'yolo'
60 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
61 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
62 |   NMS_THRES: 0.6
63 |   CONFIDENCE: 0.05
64 | TRAIN:
65 |   WORLD_SIZE: 4
66 |   BATCH_SIZE: 48
67 |   BEGIN_EPOCH: 0
68 |   END_EPOCH: 270
69 |   OPTIMIZER: 'adam'
70 |   LR: 0.001
71 |   LR_FACTOR: 0.1
72 |   LR_STEP:
73 |   - 170
74 |   - 200
75 |   DPG_MILESTONE: 210
76 |   DPG_STEP:
77 |   - 230
78 |   - 250
79 | 


--------------------------------------------------------------------------------
/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x-regression.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_26'
 4 |     ROOT: './data/halpe/'
 5 |     IMG_PREFIX: 'images/train2015'
 6 |     ANN: 'annotations/halpe_train_v1.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Halpe_26'
15 |     ROOT: './data/halpe/'
16 |     IMG_PREFIX: 'images/val2017'
17 |     ANN: 'annotations/halpe_val_v1.json'
18 |   TEST:
19 |     TYPE: 'Halpe_26_det'
20 |     ROOT: './data/halpe/'
21 |     IMG_PREFIX: 'images/val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'L1JointRegression'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 26
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 |   CONV_DIM: 256
45 | LOSS:
46 |   TYPE: 'L1JointRegression'
47 |   NORM_TYPE: 'sigmoid'
48 |   OUTPUT_3D: False
49 | DETECTOR:
50 |   NAME: 'yolo'
51 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
52 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
53 |   NMS_THRES: 0.6
54 |   CONFIDENCE: 0.05
55 | TRAIN:
56 |   WORLD_SIZE: 4
57 |   BATCH_SIZE: 48
58 |   BEGIN_EPOCH: 5
59 |   END_EPOCH: 270
60 |   OPTIMIZER: 'adam'
61 |   LR: 0.001
62 |   LR_FACTOR: 0.1
63 |   LR_STEP:
64 |   - 170
65 |   - 200
66 |   DPG_MILESTONE: 210
67 |   DPG_STEP:
68 |   - 230
69 |   - 250
70 | 


--------------------------------------------------------------------------------
/configs/halpe_26/resnet/256x192_res50_lr1e-3_2x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_26'
 4 |     ROOT: './data/halpe/'
 5 |     IMG_PREFIX: 'images/train2015'
 6 |     ANN: 'annotations/halpe_train_v1.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Halpe_26'
15 |     ROOT: './data/halpe/'
16 |     IMG_PREFIX: 'images/val2017'
17 |     ANN: 'annotations/halpe_val_v1.json'
18 |   TEST:
19 |     TYPE: 'Halpe_26_det'
20 |     ROOT: './data/halpe/'
21 |     IMG_PREFIX: 'images/val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 26
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 270
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 170
61 |   - 200
62 |   DPG_MILESTONE: 210
63 |   DPG_STEP:
64 |   - 230
65 |   - 250


--------------------------------------------------------------------------------
/configs/halpe_68_noface/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_68_noface'
 4 |     ROOT: './data/halpe/'
 5 |     IMG_PREFIX: 'images/train2015'
 6 |     ANN: 'annotations/halpe_train_v1.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Halpe_68_noface'
15 |     ROOT: './data/halpe/'
16 |     IMG_PREFIX: 'images/val2017'
17 |     ANN: 'annotations/halpe_val_v1.json'
18 |   TEST:
19 |     TYPE: 'Halpe_68_noface_det'
20 |     ROOT: './data/halpe/'
21 |     IMG_PREFIX: 'images/val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'Combined'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 68
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   CONV_DIM: 256
44 |   NUM_LAYERS: 50
45 |   DCN:
46 |     MODULATED: false
47 |     DEFORM_GROUP: 1
48 |     FALLBACK_ON_STRIDE: false
49 |   STAGE_WITH_DCN:
50 |   - false
51 |   - true
52 |   - true
53 |   - true
54 | LOSS:
55 |   TYPE: 'Combined'
56 |   NORM_TYPE: 'sigmoid'
57 |   OUTPUT_3D: False
58 |   LOSS_1:
59 |     TYPE: 'MSELoss'
60 |   LOSS_2:
61 |     TYPE: 'L1JointRegression'
62 |     NORM_TYPE: 'sigmoid'
63 |     OUTPUT_3D: False
64 | DETECTOR:
65 |   NAME: 'yolo'
66 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
67 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
68 |   NMS_THRES: 0.6
69 |   CONFIDENCE: 0.05
70 | TRAIN:
71 |   WORLD_SIZE: 5
72 |   BATCH_SIZE: 52
73 |   BEGIN_EPOCH: 0
74 |   END_EPOCH: 120
75 |   OPTIMIZER: 'adam'
76 |   LR: 0.0001
77 |   LR_FACTOR: 0.1
78 |   LR_STEP:
79 |   - 10
80 |   - 30
81 |   DPG_MILESTONE: 40
82 |   DPG_STEP:
83 |   - 60
84 |   - 90


--------------------------------------------------------------------------------
/configs/halpe_coco_wholebody_136/resnet/256x192_res50_lr1e-3_2x-dcn-combined.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_coco_wholebody_136'
 4 |     ROOT: 
 5 |       - './data/halpe/'
 6 |       - './data/coco/'
 7 |     IMG_PREFIX: 
 8 |       - 'images/train2015'
 9 |       - 'train2017'
10 |     ANN: 
11 |       - 'annotations/halpe_train_v1.json'
12 |       - 'annotations/coco_wholebody_train_v1.0.json'
13 |     AUG:
14 |       FLIP: true
15 |       ROT_FACTOR: 45
16 |       SCALE_FACTOR: 0.35
17 |       NUM_JOINTS_HALF_BODY: 8
18 |       PROB_HALF_BODY: 0.3
19 |   VAL:
20 |     TYPE: 'Halpe_136'
21 |     ROOT: './data/halpe/'
22 |     IMG_PREFIX: 'images/val2017'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 |   TEST:
25 |     TYPE: 'Halpe_136_det'
26 |     ROOT: './data/halpe/'
27 |     IMG_PREFIX: 'images/val2017'
28 |     DET_FILE: './exp/json/test_det_yolo.json'
29 |     ANN: 'annotations/halpe_val_v1.json'
30 | DATA_PRESET:
31 |   TYPE: 'simple'
32 |   LOSS_TYPE: 'Combined'
33 |   SIGMA: 2
34 |   NUM_JOINTS: 136
35 |   IMAGE_SIZE:
36 |   - 256
37 |   - 192
38 |   HEATMAP_SIZE:
39 |   - 64
40 |   - 48
41 | MODEL:
42 |   TYPE: 'FastPose'
43 |   PRETRAINED: ''
44 |   TRY_LOAD: ''
45 |   NUM_DECONV_FILTERS:
46 |   - 256
47 |   - 256
48 |   - 256
49 |   NUM_LAYERS: 50
50 |   CONV_DIM: 256
51 |   DCN:
52 |     MODULATED: false
53 |     DEFORM_GROUP: 1
54 |     FALLBACK_ON_STRIDE: false
55 |   STAGE_WITH_DCN:
56 |   - false
57 |   - true
58 |   - true
59 |   - true
60 | LOSS:
61 |   TYPE: 'Combined'
62 |   NORM_TYPE: 'sigmoid'
63 |   OUTPUT_3D: False
64 |   LOSS_1:
65 |     TYPE: 'MSELoss'
66 |   LOSS_2:
67 |     TYPE: 'L1JointRegression'
68 |     NORM_TYPE: 'sigmoid'
69 |     OUTPUT_3D: False
70 | DETECTOR:
71 |   NAME: 'yolo'
72 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
73 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
74 |   NMS_THRES: 0.6
75 |   CONFIDENCE: 0.05
76 | TRAIN:
77 |   WORLD_SIZE: 4
78 |   BATCH_SIZE: 48
79 |   BEGIN_EPOCH: 0
80 |   END_EPOCH: 270
81 |   OPTIMIZER: 'adam'
82 |   LR: 0.001
83 |   LR_FACTOR: 0.1
84 |   LR_STEP:
85 |   - 170
86 |   - 200
87 |   DPG_MILESTONE: 210
88 |   DPG_STEP:
89 |   - 230
90 |   - 250
91 | 


--------------------------------------------------------------------------------
/configs/halpe_coco_wholebody_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_coco_wholebody_136'
 4 |     ROOT: 
 5 |       - './data/halpe/'
 6 |       - './data/coco/'
 7 |     IMG_PREFIX: 
 8 |       - 'images/train2015'
 9 |       - 'train2017'
10 |     ANN: 
11 |       - 'annotations/halpe_train_v1.json'
12 |       - 'annotations/coco_wholebody_train_v1.0.json'
13 |     AUG:
14 |       FLIP: true
15 |       ROT_FACTOR: 45
16 |       SCALE_FACTOR: 0.35
17 |       NUM_JOINTS_HALF_BODY: 8
18 |       PROB_HALF_BODY: 0.3
19 |   VAL:
20 |     TYPE: 'Halpe_136'
21 |     ROOT: './data/halpe/'
22 |     IMG_PREFIX: 'images/val2017'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 |   TEST:
25 |     TYPE: 'Halpe_136_det'
26 |     ROOT: './data/halpe/'
27 |     IMG_PREFIX: 'images/val2017'
28 |     DET_FILE: './exp/json/test_det_yolo.json'
29 |     ANN: 'annotations/halpe_val_v1.json'
30 | DATA_PRESET:
31 |   TYPE: 'simple'
32 |   LOSS_TYPE: 'L1JointRegression'
33 |   SIGMA: 2
34 |   NUM_JOINTS: 136
35 |   IMAGE_SIZE:
36 |   - 256
37 |   - 192
38 |   HEATMAP_SIZE:
39 |   - 64
40 |   - 48
41 | MODEL:
42 |   TYPE: 'FastPose'
43 |   PRETRAINED: ''
44 |   TRY_LOAD: ''
45 |   NUM_DECONV_FILTERS:
46 |   - 256
47 |   - 256
48 |   - 256
49 |   NUM_LAYERS: 50
50 | LOSS:
51 |   TYPE: 'L1JointRegression'
52 |   NORM_TYPE: 'sigmoid'
53 |   OUTPUT_3D: False
54 | DETECTOR:
55 |   NAME: 'yolo'
56 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
57 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
58 |   NMS_THRES: 0.6
59 |   CONFIDENCE: 0.05
60 | TRAIN:
61 |   WORLD_SIZE: 4
62 |   BATCH_SIZE: 48
63 |   BEGIN_EPOCH: 5
64 |   END_EPOCH: 270
65 |   OPTIMIZER: 'adam'
66 |   LR: 0.001
67 |   LR_FACTOR: 0.1
68 |   LR_STEP:
69 |   - 170
70 |   - 200
71 |   DPG_MILESTONE: 210
72 |   DPG_STEP:
73 |   - 230
74 |   - 250
75 | 


--------------------------------------------------------------------------------
/configs/single_hand/resnet/256x192_res50_lr1e-3_2x-dcn-regression.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'SingleHand'
 4 |     ROOT: 
 5 |       - './data/halpe/'
 6 |       - './data/coco/'
 7 |     IMG_PREFIX: 
 8 |       - 'images/train2015'
 9 |       - 'train2017'
10 |     ANN: 
11 |       - 'annotations/halpe_train_v1.json'
12 |       - 'annotations/coco_wholebody_train_v1.0.json'
13 |     AUG:
14 |       FLIP: true
15 |       ROT_FACTOR: 45
16 |       SCALE_FACTOR: 0.35
17 |       NUM_JOINTS_HALF_BODY: 0
18 |       PROB_HALF_BODY: 0.0
19 |   VAL:
20 |     TYPE: 'SingleHand'
21 |     ROOT: './data/halpe/'
22 |     IMG_PREFIX: 'images/val2017'
23 |     ANN: 'annotations/halpe_val_v1.json'
24 |   TEST:
25 |     TYPE: 'SingleHand_det'
26 |     ROOT: './data/halpe/'
27 |     IMG_PREFIX: 'images/val2017'
28 |     DET_FILE: './exp/json/test_det_yolo.json'
29 |     ANN: 'annotations/halpe_val_v1.json'
30 | DATA_PRESET:
31 |   TYPE: 'simple'
32 |   LOSS_TYPE: 'L1JointRegression'
33 |   NORM_TYPE: 'sigmoid'
34 |   SIGMA: 2
35 |   NUM_JOINTS: 21
36 |   IMAGE_SIZE:
37 |   - 256
38 |   - 192
39 |   HEATMAP_SIZE:
40 |   - 64
41 |   - 48
42 | MODEL:
43 |   TYPE: 'FastPose'
44 |   PRETRAINED: ''
45 |   TRY_LOAD: ''
46 |   NUM_DECONV_FILTERS:
47 |   - 256
48 |   - 256
49 |   - 256
50 |   CONV_DIM: 256
51 |   NUM_LAYERS: 50
52 |   DCN:
53 |     MODULATED: false
54 |     DEFORM_GROUP: 1
55 |     FALLBACK_ON_STRIDE: false
56 |   STAGE_WITH_DCN:
57 |   - false
58 |   - true
59 |   - true
60 |   - true
61 | LOSS:
62 |   TYPE: 'L1JointRegression'
63 |   NORM_TYPE: 'sigmoid'
64 |   OUTPUT_3D: False
65 | DETECTOR:
66 |   NAME: 'yolo'
67 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
68 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
69 |   NMS_THRES: 0.6
70 |   CONFIDENCE: 0.05
71 | TRAIN:
72 |   WORLD_SIZE: 6
73 |   BATCH_SIZE: 48
74 |   BEGIN_EPOCH: 0
75 |   END_EPOCH: 100
76 |   OPTIMIZER: 'adam'
77 |   LR: 0.001
78 |   LR_FACTOR: 0.1
79 |   LR_STEP:
80 |   - 15
81 |   - 35
82 |   DPG_MILESTONE: 50
83 |   DPG_STEP:
84 |   - 65
85 |   - 80


--------------------------------------------------------------------------------
/configs/smpl/256x192_adam_lr1e-3-res34_smpl_24_3d_base_2x_mix.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   DATASET: 'mix_smpl'
 3 |   SET_LIST:
 4 |     - ROOT: './data/h36m/'
 5 |       TEST_SET: 'Sample_20_test_Human36M_smpl'
 6 |       TRAIN_SET: 'Sample_5_train_Human36M_smpl_leaf_twist'
 7 |     - ROOT: './data/coco/'
 8 |       TRAIN_SET: 'train2017'
 9 |     - ROOT: './data/3dhp/'
10 |       TRAIN_SET: 'train_v2'
11 |   PROTOCOL: 2
12 |   FLIP: True
13 |   ROT_FACTOR: 30
14 |   SCALE_FACTOR: 0.3
15 |   NUM_JOINTS_HALF_BODY: 8
16 |   PROB_HALF_BODY: -1
17 |   COLOR_FACTOR: 0.2
18 |   OCCLUSION: True
19 | DATA_PRESET:
20 |   TYPE: 'simple_smpl'
21 |   SIGMA: 2
22 |   IMAGE_SIZE:
23 |   - 256
24 |   - 256
25 |   HEATMAP_SIZE:
26 |   - 64
27 |   - 64
28 | MODEL:
29 |   TYPE: 'Simple3DPoseBaseSMPLCam'
30 |   PRETRAINED: ''
31 |   TRY_LOAD: ''
32 |   FOCAL_LENGTH: 1000
33 |   IMAGE_SIZE:
34 |   - 256
35 |   - 256
36 |   HEATMAP_SIZE:
37 |   - 64
38 |   - 64
39 |   NUM_JOINTS: 29
40 |   NUM_DECONV_FILTERS:
41 |   - 256
42 |   - 256
43 |   - 256
44 |   NUM_LAYERS: 34
45 |   EXTRA:
46 |     SIGMA: 2
47 |     BACKBONE: 'resnet'
48 |     CROP: 'padding'
49 |     AUGMENT: 'none'
50 |     PRESET: 'simple_smpl_3d'
51 |     DEPTH_DIM: 64
52 |   POST:
53 |     NORM_TYPE: 'softmax'
54 | LOSS:
55 |   TYPE: 'L1LossDimSMPL'
56 |   ELEMENTS:
57 |     BETA_WEIGHT: 1
58 |     BETA_REG_WEIGHT: 0
59 |     PHI_REG_WEIGHT: 0.0001
60 |     LEAF_REG_WEIGHT: 0
61 |     TWIST_WEIGHT: 0.01
62 |     THETA_WEIGHT: 0.01
63 |     UVD24_WEIGHT: 1
64 |     XYZ24_WEIGHT: 0
65 |     XYZ_SMPL24_WEIGHT: 0
66 |     XYZ_SMPL17_WEIGHT: 0
67 |     VERTICE_WEIGHT: 0
68 | TEST:
69 |   HEATMAP2COORD: 'coord'
70 | TRAIN:
71 |   WORLD_SIZE: 8
72 |   BATCH_SIZE: 32
73 |   BEGIN_EPOCH: 0
74 |   END_EPOCH: 200
75 |   OPTIMIZER: 'adam'
76 |   LR: 0.001
77 |   LR_FACTOR: 0.1
78 |   LR_STEP:
79 |   - 90
80 |   - 120
81 |   DPG_MILESTONE: 140
82 |   DPG_STEP:
83 |   - 160
84 |   - 190
85 | 


--------------------------------------------------------------------------------
/detector/apis.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Chao Xu (xuchao.19962007@sjtu.edu.cn)
 4 | # -----------------------------------------------------
 5 | 
 6 | """API of detector"""
 7 | from abc import ABC, abstractmethod
 8 | 
 9 | 
10 | def get_detector(opt=None):
11 |     if opt.detector == 'yolo':
12 |         from detector.yolo_api import YOLODetector
13 |         from detector.yolo_cfg import cfg
14 |         return YOLODetector(cfg, opt)
15 |     elif 'yolox' in opt.detector:
16 |         from detector.yolox_api import YOLOXDetector
17 |         from detector.yolox_cfg import cfg
18 |         if opt.detector.lower() == 'yolox':
19 |             opt.detector = 'yolox-x'
20 |         cfg.MODEL_NAME = opt.detector.lower()
21 |         cfg.MODEL_WEIGHTS = f'detector/yolox/data/{opt.detector.lower().replace("-", "_")}.pth'
22 |         return YOLOXDetector(cfg, opt)
23 |     elif opt.detector == 'tracker':
24 |         from detector.tracker_api import Tracker
25 |         from detector.tracker_cfg import cfg
26 |         return Tracker(cfg, opt)
27 |     elif opt.detector.startswith('efficientdet_d'):
28 |         from detector.effdet_api import EffDetDetector
29 |         from detector.effdet_cfg import cfg
30 |         return EffDetDetector(cfg, opt)
31 |     else:
32 |         raise NotImplementedError
33 | 
34 | 
35 | class BaseDetector(ABC):
36 |     def __init__(self):
37 |         pass
38 | 
39 |     @abstractmethod
40 |     def image_preprocess(self, img_name):
41 |         pass
42 | 
43 |     @abstractmethod
44 |     def images_detection(self, imgs, orig_dim_list):
45 |         pass
46 | 
47 |     @abstractmethod
48 |     def detect_one_img(self, img_name):
49 |         pass
50 | 


--------------------------------------------------------------------------------
/detector/effdet_cfg.py:
--------------------------------------------------------------------------------
1 | from easydict import EasyDict as edict
2 | 
3 | cfg = edict()
4 | 
5 | cfg.NMS_THRES =  0.6  # 0.6(0.713) 0.5(0.707)
6 | cfg.CONFIDENCE = 0.2  # 0.15       0.1
7 | cfg.NUM_CLASSES = 80
8 | cfg.MAX_DETECTIONS = 200  # 100
9 | 


--------------------------------------------------------------------------------
/detector/efficientdet/README.md:
--------------------------------------------------------------------------------
1 | # A PyTorch implementation of a EfficientDet Object Detector
2 | 
3 | Forked and modified from https://github.com/rwightman/efficientdet-pytorch, many thanks!
4 | 


--------------------------------------------------------------------------------
/detector/efficientdet/effdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .efficientdet import EfficientDet
2 | from .bench import DetBenchEval, DetBenchTrain
3 | from .config.config import get_efficientdet_config
4 | from .helpers import load_checkpoint, load_pretrained


--------------------------------------------------------------------------------
/detector/efficientdet/effdet/helpers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | from collections import OrderedDict
 4 | try:
 5 |     from torch.hub import load_state_dict_from_url
 6 | except ImportError:
 7 |     from torch.utils.model_zoo import load_url as load_state_dict_from_url
 8 | 
 9 | 
10 | def load_checkpoint(model, checkpoint_path):
11 |     if checkpoint_path and os.path.isfile(checkpoint_path):
12 |         print("=> Loading checkpoint '{}'".format(checkpoint_path))
13 |         checkpoint = torch.load(checkpoint_path)
14 |         if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
15 |             new_state_dict = OrderedDict()
16 |             for k, v in checkpoint['state_dict'].items():
17 |                 if k.startswith('module'):
18 |                     name = k[7:]  # remove `module.`
19 |                 else:
20 |                     name = k
21 |                 new_state_dict[name] = v
22 |             model.load_state_dict(new_state_dict)
23 |         else:
24 |             model.load_state_dict(checkpoint)
25 |         print("=> Loaded checkpoint '{}'".format(checkpoint_path))
26 |     else:
27 |         print("=> Error: No checkpoint found at '{}'".format(checkpoint_path))
28 |         raise FileNotFoundError()
29 | 
30 | 
31 | def load_pretrained(model, url, filter_fn=None, strict=True):
32 |     if not url:
33 |         print("=> Warning: Pretrained model URL is empty, using random initialization.")
34 |         return
35 |     state_dict = load_state_dict_from_url(url, progress=False, map_location='cpu')
36 |     if filter_fn is not None:
37 |         state_dict = filter_fn(state_dict)
38 |     model.load_state_dict(state_dict, strict=strict)
39 | 


--------------------------------------------------------------------------------
/detector/efficientdet/effdet/object_detection/README.md:
--------------------------------------------------------------------------------
1 | # Tensorflow Object Detection
2 | 
3 | All of this code is adapted/ported/copied from https://github.com/google/automl/tree/552d0facd14f4fe9205a67fb13ecb5690a4d1c94/efficientdet/object_detection


--------------------------------------------------------------------------------
/detector/efficientdet/effdet/object_detection/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Research. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | # Object detection data loaders and libraries are mostly based on RetinaNet:
16 | # https://github.com/tensorflow/tpu/tree/master/models/official/retinanet
17 | 


--------------------------------------------------------------------------------
/detector/efficientdet/weights/get_models.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/efficientdet/weights/get_models.sh


--------------------------------------------------------------------------------
/detector/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 | 
3 | __all__ = ['nms', 'soft_nms']
4 | 


--------------------------------------------------------------------------------
/detector/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from . import nms_cpu, nms_cuda
 5 | from .soft_nms_cpu import soft_nms_cpu
 6 | 
 7 | 
 8 | def nms(dets, iou_thr, device_id=None):
 9 |     """Dispatch to either CPU or GPU NMS implementations.
10 | 
11 |     The input can be either a torch tensor or numpy array. GPU NMS will be used
12 |     if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
13 |     will be used. The returned type will always be the same as inputs.
14 | 
15 |     Arguments:
16 |         dets (torch.Tensor or np.ndarray): bboxes with scores.
17 |         iou_thr (float): IoU threshold for NMS.
18 |         device_id (int, optional): when `dets` is a numpy array, if `device_id`
19 |             is None, then cpu nms is used, otherwise gpu_nms will be used.
20 | 
21 |     Returns:
22 |         tuple: kept bboxes and indice, which is always the same data type as
23 |             the input.
24 |     """
25 |     # convert dets (tensor or numpy array) to tensor
26 |     if isinstance(dets, torch.Tensor):
27 |         is_numpy = False
28 |         dets_th = dets.to('cpu')
29 |     elif isinstance(dets, np.ndarray):
30 |         is_numpy = True
31 |         device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
32 |         dets_th = torch.from_numpy(dets).to(device)
33 |     else:
34 |         raise TypeError(
35 |             'dets must be either a Tensor or numpy array, but got {}'.format(
36 |                 type(dets)))
37 | 
38 |     # execute cpu or cuda nms
39 |     if dets_th.shape[0] == 0:
40 |         inds = dets_th.new_zeros(0, dtype=torch.long)
41 |     else:
42 |         if dets_th.is_cuda:
43 |             inds = nms_cuda.nms(dets_th, iou_thr)
44 |         else:
45 |             inds = nms_cpu.nms(dets_th, iou_thr)
46 | 
47 |     if is_numpy:
48 |         inds = inds.cpu().numpy()
49 |     return dets[inds, :], inds
50 | 
51 | 
52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
53 |     if isinstance(dets, torch.Tensor):
54 |         is_tensor = True
55 |         dets_np = dets.detach().cpu().numpy()
56 |     elif isinstance(dets, np.ndarray):
57 |         is_tensor = False
58 |         dets_np = dets
59 |     else:
60 |         raise TypeError(
61 |             'dets must be either a Tensor or numpy array, but got {}'.format(
62 |                 type(dets)))
63 | 
64 |     method_codes = {'linear': 1, 'gaussian': 2}
65 |     if method not in method_codes:
66 |         raise ValueError('Invalid method for SoftNMS: {}'.format(method))
67 |     new_dets, inds = soft_nms_cpu(
68 |         dets_np,
69 |         iou_thr,
70 |         method=method_codes[method],
71 |         sigma=sigma,
72 |         min_score=min_score)
73 | 
74 |     if is_tensor:
75 |         return dets.new_tensor(new_dets), dets.new_tensor(
76 |             inds, dtype=torch.long)
77 |     else:
78 |         return new_dets.astype(np.float32), inds.astype(np.int64)
79 | 


--------------------------------------------------------------------------------
/detector/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | template <typename scalar_t>
 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
 6 |   AT_ASSERTM(!dets.device().is_cuda(), "dets must be a CPU tensor");
 7 | 
 8 |   if (dets.numel() == 0) {
 9 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
10 |   }
11 | 
12 |   auto x1_t = dets.select(1, 0).contiguous();
13 |   auto y1_t = dets.select(1, 1).contiguous();
14 |   auto x2_t = dets.select(1, 2).contiguous();
15 |   auto y2_t = dets.select(1, 3).contiguous();
16 |   auto scores = dets.select(1, 4).contiguous();
17 | 
18 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
19 | 
20 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
21 | 
22 |   auto ndets = dets.size(0);
23 |   at::Tensor suppressed_t =
24 |       at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
25 | 
26 |   auto suppressed = suppressed_t.data_ptr<uint8_t>();
27 |   auto order = order_t.data_ptr<int64_t>();
28 |   auto x1 = x1_t.data_ptr<scalar_t>();
29 |   auto y1 = y1_t.data_ptr<scalar_t>();
30 |   auto x2 = x2_t.data_ptr<scalar_t>();
31 |   auto y2 = y2_t.data_ptr<scalar_t>();
32 |   auto areas = areas_t.data_ptr<scalar_t>();
33 | 
34 |   for (int64_t _i = 0; _i < ndets; _i++) {
35 |     auto i = order[_i];
36 |     if (suppressed[i] == 1) continue;
37 |     auto ix1 = x1[i];
38 |     auto iy1 = y1[i];
39 |     auto ix2 = x2[i];
40 |     auto iy2 = y2[i];
41 |     auto iarea = areas[i];
42 | 
43 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
44 |       auto j = order[_j];
45 |       if (suppressed[j] == 1) continue;
46 |       auto xx1 = std::max(ix1, x1[j]);
47 |       auto yy1 = std::max(iy1, y1[j]);
48 |       auto xx2 = std::min(ix2, x2[j]);
49 |       auto yy2 = std::min(iy2, y2[j]);
50 | 
51 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
52 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
53 |       auto inter = w * h;
54 |       auto ovr = inter / (iarea + areas[j] - inter);
55 |       if (ovr >= threshold) suppressed[j] = 1;
56 |     }
57 |   }
58 |   return at::nonzero(suppressed_t == 0).squeeze(1);
59 | }
60 | 
61 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
62 |   at::Tensor result;
63 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
64 |     result = nms_cpu_kernel<scalar_t>(dets, threshold);
65 |   });
66 |   return result;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("nms", &nms, "non-maximum suppression");
71 | }
72 | 


--------------------------------------------------------------------------------
/detector/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda(dets, threshold);
13 | }
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("nms", &nms, "non-maximum suppression");
17 | }
18 | 


--------------------------------------------------------------------------------
/detector/tracker/README.md:
--------------------------------------------------------------------------------
1 | ## Introduction
2 | MOT Tracker adapted from [Towards-Realtime-MOT](https://github.com/Zhongdao/Towards-Realtime-MOT), many thanks to their wonderful work!
3 | 
4 | #### Getting started
5 | Download detector [JDE-1088x608](https://github.com/Zhongdao/Towards-Realtime-MOT#pretrained-model-and-baseline-models) and place it under `AlphaPose/detector/tracker/data/`
6 | 
7 | Enable tracking by setting the detector as tracker: `--detector tracker`


--------------------------------------------------------------------------------
/detector/tracker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/tracker/__init__.py


--------------------------------------------------------------------------------
/detector/tracker/cfg/ccmcpe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "root":"/home/wangzd/datasets/MOT",
 3 |     "train":
 4 |     {
 5 |         "mot17":"./data/mot17.train",
 6 |         "caltech":"./data/caltech.train",
 7 |         "citypersons":"./data/citypersons.train",
 8 |         "cuhksysu":"./data/cuhksysu.train",
 9 |         "prw":"./data/prw.train",
10 |         "eth":"./data/eth.train"
11 |     },
12 |     "test_emb":
13 |     {
14 |         "caltech":"./data/caltech.10k.val",
15 |         "cuhksysu":"./data/cuhksysu.val",
16 |         "prw":"./data/prw.val"
17 |     },
18 |     "test":
19 |     {
20 |         "mot19":"./data/mot19.train",
21 |         "caltech":"./data/caltech.val",
22 |         "citypersons":"./data/citypersons.val"
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/detector/tracker/preprocess.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | import numpy as np
 8 | import cv2
 9 | import matplotlib.pyplot as plt
10 | try:
11 |     from util import count_parameters as count
12 |     from util import convert2cpu as cpu
13 | except ImportError:
14 |     from yolo.util import count_parameters as count
15 |     from yolo.util import convert2cpu as cpu
16 | from PIL import Image, ImageDraw
17 | 
18 | 
19 | def letterbox_image(img, img_size=(1088, 608), color=(127.5, 127.5, 127.5)):  
20 |     # resize a rectangular image to a padded rectangular 
21 |     height=img_size[1]
22 |     width=img_size[0]
23 |     shape = img.shape[:2]  # shape = [height, width]
24 |     ratio = min(float(height)/shape[0], float(width)/shape[1])
25 |     new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height]
26 |     dw = (width - new_shape[0]) / 2  # width padding
27 |     dh = (height - new_shape[1]) / 2  # height padding
28 |     top, bottom = round(dh - 0.1), round(dh + 0.1)
29 |     left, right = round(dw - 0.1), round(dw + 0.1)
30 |     img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
31 |     img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded rectangular
32 |     return img
33 | 
34 | 
35 | def prep_image(img, img_size=(1088, 608)):
36 |     """
37 |     Prepare image for inputting to the neural network.
38 | 
39 |     Returns a Variable
40 |     """
41 | 
42 |     orig_im = cv2.imread(img)
43 |     dim = orig_im.shape[1], orig_im.shape[0]
44 |     img = (letterbox_image(orig_im, img_size))
45 |     img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
46 |     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
47 |     return img_, orig_im, dim
48 | 
49 | 
50 | def prep_frame(img, img_size=(1088, 608)):
51 |     """
52 |     Prepare image for inputting to the neural network.
53 | 
54 |     Returns a Variable
55 |     """
56 | 
57 |     orig_im = img
58 |     dim = orig_im.shape[1], orig_im.shape[0]
59 |     img = (letterbox_image(orig_im, img_size))
60 |     img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
61 |     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
62 |     return img_, orig_im, dim
63 | 
64 | 


--------------------------------------------------------------------------------
/detector/tracker/tracker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/tracker/tracker/__init__.py


--------------------------------------------------------------------------------
/detector/tracker/tracker/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 
54 | 


--------------------------------------------------------------------------------
/detector/tracker/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/tracker/utils/__init__.py


--------------------------------------------------------------------------------
/detector/tracker/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name='root'):
 5 |     formatter = logging.Formatter(
 6 |         # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
 7 |         fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
 8 | 
 9 |     handler = logging.StreamHandler()
10 |     handler.setFormatter(formatter)
11 | 
12 |     logger = logging.getLogger(name)
13 |     logger.setLevel(logging.DEBUG)
14 |     logger.addHandler(handler)
15 |     return logger
16 | 
17 | 
18 | logger = get_logger('root')
19 | 


--------------------------------------------------------------------------------
/detector/tracker/utils/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from utils import _C
4 | 
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 | 


--------------------------------------------------------------------------------
/detector/tracker/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | def parse_model_cfg(path):
 2 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 3 |     file = open(path, 'r')
 4 |     lines = file.read().split('\n')
 5 |     lines = [x for x in lines if x and not x.startswith('#')]
 6 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
 7 |     module_defs = []
 8 |     for line in lines:
 9 |         if line.startswith('['):  # This marks the start of a new block
10 |             module_defs.append({})
11 |             module_defs[-1]['type'] = line[1:-1].rstrip()
12 |             if module_defs[-1]['type'] == 'convolutional':
13 |                 module_defs[-1]['batch_normalize'] = 0
14 |         else:
15 |             key, value = line.split("=")
16 |             value = value.strip()
17 |             module_defs[-1][key.rstrip()] = value.strip()
18 | 
19 |     return module_defs
20 | 
21 | 
22 | def parse_data_cfg(path):
23 |     """Parses the data configuration file"""
24 |     options = dict()
25 |     options['gpus'] = '0'
26 |     options['num_workers'] = '10'
27 |     with open(path, 'r') as fp:
28 |         lines = fp.readlines()
29 |     for line in lines:
30 |         line = line.strip()
31 |         if line == '' or line.startswith('#'):
32 |             continue
33 |         key, value = line.split('=')
34 |         options[key.strip()] = value.strip()
35 |     return options
36 | 


--------------------------------------------------------------------------------
/detector/tracker/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |         self.duration = 0.
21 | 
22 |     def tic(self):
23 |         # using time.time instead of time.clock because time time.clock
24 |         # does not normalize for multithreading
25 |         self.start_time = time.time()
26 | 
27 |     def toc(self, average=True):
28 |         self.diff = time.time() - self.start_time
29 |         self.total_time += self.diff
30 |         self.calls += 1
31 |         self.average_time = self.total_time / self.calls
32 |         if average:
33 |             self.duration = self.average_time
34 |         else:
35 |             self.duration = self.diff
36 |         return self.duration
37 | 
38 |     def clear(self):
39 |         self.total_time = 0.
40 |         self.calls = 0
41 |         self.start_time = 0.
42 |         self.diff = 0.
43 |         self.average_time = 0.
44 |         self.duration = 0.
45 | 
46 | 


--------------------------------------------------------------------------------
/detector/tracker/utils/visualization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | 
 5 | def tlwhs_to_tlbrs(tlwhs):
 6 |     tlbrs = np.copy(tlwhs)
 7 |     if len(tlbrs) == 0:
 8 |         return tlbrs
 9 |     tlbrs[:, 2] += tlwhs[:, 0]
10 |     tlbrs[:, 3] += tlwhs[:, 1]
11 |     return tlbrs
12 | 
13 | 
14 | def get_color(idx):
15 |     idx = idx * 3
16 |     color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
17 | 
18 |     return color
19 | 
20 | 
21 | def resize_image(image, max_size=800):
22 |     if max(image.shape[:2]) > max_size:
23 |         scale = float(max_size) / max(image.shape[:2])
24 |         image = cv2.resize(image, None, fx=scale, fy=scale)
25 |     return image
26 | 
27 | 
28 | def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None):
29 |     im = np.ascontiguousarray(np.copy(image))
30 |     im_h, im_w = im.shape[:2]
31 | 
32 |     top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
33 | 
34 |     text_scale = max(1, image.shape[1] / 1600.)
35 |     text_thickness = 1 if text_scale > 1.1 else 1
36 |     line_thickness = max(1, int(image.shape[1] / 500.))
37 | 
38 |     radius = max(5, int(im_w/140.))
39 |     cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
40 |                 (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2)
41 | 
42 |     for i, tlwh in enumerate(tlwhs):
43 |         x1, y1, w, h = tlwh
44 |         intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
45 |         obj_id = int(obj_ids[i])
46 |         id_text = '{}'.format(int(obj_id))
47 |         if ids2 is not None:
48 |             id_text = id_text + ', {}'.format(int(ids2[i]))
49 |         _line_thickness = 1 if obj_id <= 0 else line_thickness
50 |         color = get_color(abs(obj_id))
51 |         cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
52 |         cv2.putText(im, id_text, (intbox[0], intbox[1] + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255),
53 |                     thickness=text_thickness)
54 |     return im
55 | 
56 | 
57 | def plot_trajectory(image, tlwhs, track_ids):
58 |     image = image.copy()
59 |     for one_tlwhs, track_id in zip(tlwhs, track_ids):
60 |         color = get_color(int(track_id))
61 |         for tlwh in one_tlwhs:
62 |             x1, y1, w, h = tuple(map(int, tlwh))
63 |             cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2)
64 | 
65 |     return image
66 | 
67 | 
68 | def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None):
69 |     im = np.copy(image)
70 |     text_scale = max(1, image.shape[1] / 800.)
71 |     thickness = 2 if text_scale > 1.3 else 1
72 |     for i, det in enumerate(tlbrs):
73 |         x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int)
74 |         if len(det) >= 7:
75 |             label = 'det' if det[5] > 0 else 'trk'
76 |             if ids is not None:
77 |                 text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i])
78 |                 cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
79 |                             thickness=thickness)
80 |             else:
81 |                 text = '{}# {:.2f}'.format(label, det[6])
82 | 
83 |         if scores is not None:
84 |             text = '{:.2f}'.format(scores[i])
85 |             cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
86 |                         thickness=thickness)
87 | 
88 |         cv2.rectangle(im, (x1, y1), (x2, y2), color, 2)
89 | 
90 |     return im
91 | 


--------------------------------------------------------------------------------
/detector/tracker_cfg.py:
--------------------------------------------------------------------------------
1 | from easydict import EasyDict as edict
2 | 
3 | cfg = edict()
4 | cfg.CONFIG = 'detector/tracker/cfg/yolov3.cfg'
5 | cfg.WEIGHTS = 'detector/tracker/data/jde.1088x608.uncertainty.pt'
6 | cfg.IMG_SIZE =  (1088, 608)
7 | cfg.NMS_THRES =  0.6
8 | cfg.CONFIDENCE = 0.4
9 | cfg.BUFFER_SIZE = 30 # frame buffer


--------------------------------------------------------------------------------
/detector/yolo/README.md:
--------------------------------------------------------------------------------
1 | # A PyTorch implementation of a YOLO v3 Object Detector
2 | 
3 | Forked from https://github.com/ayooshkathuria/pytorch-yolo-v3
4 | 


--------------------------------------------------------------------------------
/detector/yolo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/yolo/__init__.py


--------------------------------------------------------------------------------
/detector/yolo/cfg/tiny-yolo-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | batch=64
  3 | subdivisions=8
  4 | width=416
  5 | height=416
  6 | channels=3
  7 | momentum=0.9
  8 | decay=0.0005
  9 | angle=0
 10 | saturation = 1.5
 11 | exposure = 1.5
 12 | hue=.1
 13 | 
 14 | learning_rate=0.001
 15 | max_batches = 40200
 16 | policy=steps
 17 | steps=-1,100,20000,30000
 18 | scales=.1,10,.1,.1
 19 | 
 20 | [convolutional]
 21 | batch_normalize=1
 22 | filters=16
 23 | size=3
 24 | stride=1
 25 | pad=1
 26 | activation=leaky
 27 | 
 28 | [maxpool]
 29 | size=2
 30 | stride=2
 31 | 
 32 | [convolutional]
 33 | batch_normalize=1
 34 | filters=32
 35 | size=3
 36 | stride=1
 37 | pad=1
 38 | activation=leaky
 39 | 
 40 | [maxpool]
 41 | size=2
 42 | stride=2
 43 | 
 44 | [convolutional]
 45 | batch_normalize=1
 46 | filters=64
 47 | size=3
 48 | stride=1
 49 | pad=1
 50 | activation=leaky
 51 | 
 52 | [maxpool]
 53 | size=2
 54 | stride=2
 55 | 
 56 | [convolutional]
 57 | batch_normalize=1
 58 | filters=128
 59 | size=3
 60 | stride=1
 61 | pad=1
 62 | activation=leaky
 63 | 
 64 | [maxpool]
 65 | size=2
 66 | stride=2
 67 | 
 68 | [convolutional]
 69 | batch_normalize=1
 70 | filters=256
 71 | size=3
 72 | stride=1
 73 | pad=1
 74 | activation=leaky
 75 | 
 76 | [maxpool]
 77 | size=2
 78 | stride=2
 79 | 
 80 | [convolutional]
 81 | batch_normalize=1
 82 | filters=512
 83 | size=3
 84 | stride=1
 85 | pad=1
 86 | activation=leaky
 87 | 
 88 | [maxpool]
 89 | size=2
 90 | stride=1
 91 | 
 92 | [convolutional]
 93 | batch_normalize=1
 94 | filters=1024
 95 | size=3
 96 | stride=1
 97 | pad=1
 98 | activation=leaky
 99 | 
100 | ###########
101 | 
102 | [convolutional]
103 | batch_normalize=1
104 | size=3
105 | stride=1
106 | pad=1
107 | filters=1024
108 | activation=leaky
109 | 
110 | [convolutional]
111 | size=1
112 | stride=1
113 | pad=1
114 | filters=125
115 | activation=linear
116 | 
117 | [region]
118 | anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52
119 | bias_match=1
120 | classes=20
121 | coords=4
122 | num=5
123 | softmax=1
124 | jitter=.2
125 | rescore=1
126 | 
127 | object_scale=5
128 | noobject_scale=1
129 | class_scale=1
130 | coord_scale=1
131 | 
132 | absolute=1
133 | thresh = .6
134 | random=1
135 | 


--------------------------------------------------------------------------------
/detector/yolo/detect.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import time
  3 | import torch 
  4 | import torch.nn as nn
  5 | from torch.autograd import Variable
  6 | import numpy as np
  7 | import cv2 
  8 | from util import *
  9 | import argparse
 10 | import os 
 11 | import os.path as osp
 12 | from darknet import Darknet
 13 | from preprocess import prep_image, inp_to_image
 14 | import pandas as pd
 15 | import random 
 16 | import pickle as pkl
 17 | import itertools
 18 | 
 19 | 
 20 | if __name__ == '__main__':
 21 | 
 22 |     scales = "1,2,3"
 23 |     images = "imgs/messi.jpg"
 24 |     batch_size = 1
 25 |     confidence = 0.5
 26 |     nms_thesh = 0.4
 27 | 
 28 |     CUDA = torch.cuda.is_available()
 29 | 
 30 |     num_classes = 80
 31 |     classes = load_classes('data/coco.names') 
 32 | 
 33 |     #Set up the neural network
 34 |     print("Loading network.....")
 35 |     model = Darknet("cfg/yolov3-spp.cfg")
 36 |     model.load_weights("yolov3-spp.weights")
 37 |     print("Network successfully loaded")
 38 | 
 39 |     model.net_info["height"] = "608"
 40 |     inp_dim = int(model.net_info["height"])
 41 |     assert inp_dim % 32 == 0
 42 |     assert inp_dim > 32
 43 | 
 44 |     #If there's a GPU availible, put the model on GPU
 45 |     if CUDA:
 46 |         model.cuda()
 47 | 
 48 |     #Set the model in evaluation mode
 49 |     model.eval()
 50 | 
 51 |     #Detection phase
 52 |     try:
 53 |         imlist = []
 54 |         imlist.append(osp.join(osp.realpath('.'), images))
 55 |     except FileNotFoundError:
 56 |         print ("No file or directory with the name {}".format(images))
 57 |         exit()
 58 | 
 59 |     batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
 60 |     im_batches = [x[0] for x in batches]
 61 |     orig_ims = [x[1] for x in batches]
 62 |     im_dim_list = [x[2] for x in batches]
 63 |     im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
 64 | 
 65 |     if CUDA:
 66 |         im_dim_list = im_dim_list.cuda()
 67 | 
 68 | 
 69 |     for batch in im_batches:
 70 |         #load the image
 71 |         if CUDA:
 72 |             batch = batch.cuda()
 73 |         with torch.no_grad():
 74 |             prediction = model(Variable(batch), CUDA)
 75 | 
 76 |         prediction = write_results(prediction, confidence, num_classes, nms=True, nms_conf=nms_thesh)
 77 |         output = prediction
 78 | 
 79 |         if CUDA:
 80 |             torch.cuda.synchronize()
 81 | 
 82 |     try:
 83 |         output
 84 |     except NameError:
 85 |         print("No detections were made")
 86 |         exit()
 87 |     print(im_dim_list.shape)
 88 |     im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
 89 | 
 90 |     scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
 91 | 
 92 | 
 93 |     output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
 94 |     output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
 95 | 
 96 |     output[:,1:5] /= scaling_factor
 97 | 
 98 |     for i in range(output.shape[0]):
 99 |         output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
100 |         output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
101 | 
102 |     print(output)
103 |     print(output.shape)
104 | 


--------------------------------------------------------------------------------
/detector/yolo/pallete:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/detector/yolo/pallete


--------------------------------------------------------------------------------
/detector/yolo/preprocess.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | import numpy as np
 8 | import cv2
 9 | import matplotlib.pyplot as plt
10 | try:
11 |     from util import count_parameters as count
12 |     from util import convert2cpu as cpu
13 | except ImportError:
14 |     from yolo.util import count_parameters as count
15 |     from yolo.util import convert2cpu as cpu
16 | from PIL import Image, ImageDraw
17 | 
18 | 
19 | def letterbox_image(img, inp_dim):
20 |     '''resize image with unchanged aspect ratio using padding'''
21 |     img_w, img_h = img.shape[1], img.shape[0]
22 |     w, h = inp_dim
23 |     new_w = int(img_w * min(w / img_w, h / img_h))
24 |     new_h = int(img_h * min(w / img_w, h / img_h))
25 |     resized_image = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
26 | 
27 |     canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
28 | 
29 |     canvas[(h - new_h) // 2:(h - new_h) // 2 + new_h, (w - new_w) // 2:(w - new_w) // 2 + new_w, :] = resized_image
30 | 
31 |     return canvas
32 | 
33 | 
34 | def prep_image(img, inp_dim):
35 |     """
36 |     Prepare image for inputting to the neural network.
37 | 
38 |     Returns a Variable
39 |     """
40 | 
41 |     orig_im = cv2.imread(img)
42 |     dim = orig_im.shape[1], orig_im.shape[0]
43 |     img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
44 |     img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
45 |     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
46 |     return img_, orig_im, dim
47 | 
48 | 
49 | def prep_frame(img, inp_dim):
50 |     """
51 |     Prepare image for inputting to the neural network.
52 | 
53 |     Returns a Variable
54 |     """
55 | 
56 |     orig_im = img
57 |     dim = orig_im.shape[1], orig_im.shape[0]
58 |     img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
59 |     img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
60 |     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
61 |     return img_, orig_im, dim
62 | 
63 | 
64 | def prep_image_pil(img, network_dim):
65 |     orig_im = Image.open(img)
66 |     img = orig_im.convert('RGB')
67 |     dim = img.size
68 |     img = img.resize(network_dim)
69 |     img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
70 |     img = img.view(*network_dim, 3).transpose(0, 1).transpose(0, 2).contiguous()
71 |     img = img.view(1, 3, *network_dim)
72 |     img = img.float().div(255.0)
73 |     return (img, orig_im, dim)
74 | 
75 | 
76 | def inp_to_image(inp):
77 |     inp = inp.cpu().squeeze()
78 |     inp = inp * 255
79 |     try:
80 |         inp = inp.data.numpy()
81 |     except RuntimeError:
82 |         inp = inp.numpy()
83 |     inp = inp.transpose(1, 2, 0)
84 | 
85 |     inp = inp[:, :, ::-1]
86 |     return inp
87 | 


--------------------------------------------------------------------------------
/detector/yolo_cfg.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | cfg = edict()
 4 | cfg.CONFIG = 'detector/yolo/cfg/yolov3-spp.cfg'
 5 | cfg.WEIGHTS = 'detector/yolo/data/yolov3-spp.weights'
 6 | cfg.INP_DIM =  608
 7 | cfg.NMS_THRES =  0.6
 8 | cfg.CONFIDENCE = 0.1
 9 | cfg.NUM_CLASSES = 80
10 | 


--------------------------------------------------------------------------------
/detector/yolox/README.md:
--------------------------------------------------------------------------------
1 | # An implementation of PyTorch version YOLOX
2 | 
3 | Forked and modified from https://github.com/Megvii-BaseDetection/YOLOX
4 | 


--------------------------------------------------------------------------------
/detector/yolox/exps/default/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 


--------------------------------------------------------------------------------
/detector/yolox/exps/default/yolov3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | import torch.nn as nn
 8 | 
 9 | from detector.yolox.yolox.exp import Exp as MyExp
10 | 
11 | 
12 | class Exp(MyExp):
13 |     def __init__(self):
14 |         super(Exp, self).__init__()
15 |         self.depth = 1.0
16 |         self.width = 1.0
17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
18 | 
19 |     def get_model(self, sublinear=False):
20 |         def init_yolo(M):
21 |             for m in M.modules():
22 |                 if isinstance(m, nn.BatchNorm2d):
23 |                     m.eps = 1e-3
24 |                     m.momentum = 0.03
25 | 
26 |         if "model" not in self.__dict__:
27 |             from detector.yolox.yolox.models import YOLOX, YOLOFPN, YOLOXHead
28 | 
29 |             backbone = YOLOFPN()
30 |             head = YOLOXHead(
31 |                 self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu"
32 |             )
33 |             self.model = YOLOX(backbone, head)
34 |         self.model.apply(init_yolo)
35 |         self.model.head.initialize_biases(1e-2)
36 | 
37 |         return self.model
38 | 


--------------------------------------------------------------------------------
/detector/yolox/exps/default/yolox_l.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from detector.yolox.yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 1.0
14 |         self.width = 1.0
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/detector/yolox/exps/default/yolox_m.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from detector.yolox.yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.67
14 |         self.width = 0.75
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/detector/yolox/exps/default/yolox_nano.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | import torch.nn as nn
 8 | 
 9 | from detector.yolox.yolox.exp import Exp as MyExp
10 | 
11 | 
12 | class Exp(MyExp):
13 |     def __init__(self):
14 |         super(Exp, self).__init__()
15 |         self.depth = 0.33
16 |         self.width = 0.25
17 |         self.input_size = (416, 416)
18 |         self.random_size = (10, 20)
19 |         self.mosaic_scale = (0.5, 1.5)
20 |         self.test_size = (416, 416)
21 |         self.mosaic_prob = 0.5
22 |         self.enable_mixup = False
23 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
24 | 
25 |     def get_model(self, sublinear=False):
26 |         def init_yolo(M):
27 |             for m in M.modules():
28 |                 if isinstance(m, nn.BatchNorm2d):
29 |                     m.eps = 1e-3
30 |                     m.momentum = 0.03
31 | 
32 |         if "model" not in self.__dict__:
33 |             from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
34 | 
35 |             in_channels = [256, 512, 1024]
36 |             # NANO model use depthwise = True, which is main difference.
37 |             backbone = YOLOPAFPN(
38 |                 self.depth,
39 |                 self.width,
40 |                 in_channels=in_channels,
41 |                 act=self.act,
42 |                 depthwise=True,
43 |             )
44 |             head = YOLOXHead(
45 |                 self.num_classes,
46 |                 self.width,
47 |                 in_channels=in_channels,
48 |                 act=self.act,
49 |                 depthwise=True,
50 |             )
51 |             self.model = YOLOX(backbone, head)
52 | 
53 |         self.model.apply(init_yolo)
54 |         self.model.head.initialize_biases(1e-2)
55 |         return self.model
56 | 


--------------------------------------------------------------------------------
/detector/yolox/exps/default/yolox_s.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from detector.yolox.yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.33
14 |         self.width = 0.50
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/detector/yolox/exps/default/yolox_tiny.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from detector.yolox.yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.33
14 |         self.width = 0.375
15 |         self.input_size = (416, 416)
16 |         self.mosaic_scale = (0.5, 1.5)
17 |         self.random_size = (10, 20)
18 |         self.test_size = (416, 416)
19 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
20 |         self.enable_mixup = False
21 | 


--------------------------------------------------------------------------------
/detector/yolox/exps/default/yolox_x.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from detector.yolox.yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 1.33
14 |         self.width = 1.25
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/detector/yolox/exps/example/custom/nano.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | import torch.nn as nn
 8 | 
 9 | from yolox.exp import Exp as MyExp
10 | 
11 | 
12 | class Exp(MyExp):
13 |     def __init__(self):
14 |         super(Exp, self).__init__()
15 |         self.depth = 0.33
16 |         self.width = 0.25
17 |         self.input_size = (416, 416)
18 |         self.mosaic_scale = (0.5, 1.5)
19 |         self.random_size = (10, 20)
20 |         self.test_size = (416, 416)
21 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
22 |         self.enable_mixup = False
23 | 
24 |         # Define yourself dataset path
25 |         self.data_dir = "datasets/coco128"
26 |         self.train_ann = "instances_train2017.json"
27 |         self.val_ann = "instances_val2017.json"
28 | 
29 |         self.num_classes = 71
30 | 
31 |     def get_model(self, sublinear=False):
32 |         def init_yolo(M):
33 |             for m in M.modules():
34 |                 if isinstance(m, nn.BatchNorm2d):
35 |                     m.eps = 1e-3
36 |                     m.momentum = 0.03
37 | 
38 |         if "model" not in self.__dict__:
39 |             from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
40 | 
41 |             in_channels = [256, 512, 1024]
42 |             # NANO model use depthwise = True, which is main difference.
43 |             backbone = YOLOPAFPN(
44 |                 self.depth, self.width, in_channels=in_channels, depthwise=True
45 |             )
46 |             head = YOLOXHead(
47 |                 self.num_classes, self.width, in_channels=in_channels, depthwise=True
48 |             )
49 |             self.model = YOLOX(backbone, head)
50 | 
51 |         self.model.apply(init_yolo)
52 |         self.model.head.initialize_biases(1e-2)
53 |         return self.model
54 | 


--------------------------------------------------------------------------------
/detector/yolox/exps/example/custom/yolox_s.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | import os
 5 | 
 6 | from yolox.exp import Exp as MyExp
 7 | 
 8 | 
 9 | class Exp(MyExp):
10 |     def __init__(self):
11 |         super(Exp, self).__init__()
12 |         self.depth = 0.33
13 |         self.width = 0.50
14 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
15 | 
16 |         # Define yourself dataset path
17 |         self.data_dir = "datasets/coco128"
18 |         self.train_ann = "instances_train2017.json"
19 |         self.val_ann = "instances_val2017.json"
20 | 
21 |         self.num_classes = 71
22 | 
23 |         self.max_epoch = 300
24 |         self.data_num_workers = 4
25 |         self.eval_interval = 1
26 | 


--------------------------------------------------------------------------------
/detector/yolox/tools/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 


--------------------------------------------------------------------------------
/detector/yolox/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .preprocess import prep_image, prep_frame
2 | 


--------------------------------------------------------------------------------
/detector/yolox/utils/preprocess.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import torch
 4 | import numpy as np
 5 | import cv2
 6 | 
 7 | 
 8 | def preproc(img, input_size, swap=(2, 0, 1)):
 9 |     if len(img.shape) == 3:
10 |         padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
11 |     else:
12 |         padded_img = np.ones(input_size, dtype=np.uint8) * 114
13 | 
14 |     r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
15 |     resized_img = cv2.resize(
16 |         img,
17 |         (int(img.shape[1] * r), int(img.shape[0] * r)),
18 |         interpolation=cv2.INTER_LINEAR,
19 |     ).astype(np.uint8)
20 |     padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
21 |     padded_img = padded_img.transpose(swap)
22 |     padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
23 |     return padded_img, r
24 | 
25 | 
26 | def prep_image(img, img_size):
27 |     """
28 |     Prepare image for inputting to the neural network.
29 | 
30 |     Returns a Variable
31 |     """
32 | 
33 |     orig_im = cv2.imread(img)
34 |     dim = orig_im.shape[1], orig_im.shape[0]
35 |     img_, _ = preproc(orig_im, img_size)
36 |     img_ = torch.from_numpy(img_).unsqueeze(0).float()
37 | 
38 |     return img_, orig_im, dim
39 | 
40 | 
41 | def prep_frame(img, img_size):
42 |     """
43 |     Prepare image for inputting to the neural network.
44 | 
45 |     Returns a Variable
46 |     """
47 | 
48 |     orig_im = img
49 |     dim = orig_im.shape[1], orig_im.shape[0]
50 |     img_, _ = preproc(orig_im, img_size)
51 |     img_ = torch.from_numpy(img_).unsqueeze(0).float()
52 | 
53 |     return img_, orig_im, dim
54 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | __version__ = "0.3.0"
5 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/exp/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii Inc. All rights reserved.
4 | 
5 | from .base_exp import BaseExp
6 | from .build import get_exp
7 | from .yolox_base import Exp
8 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/exp/base_exp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | import ast
 6 | import pprint
 7 | from abc import ABCMeta, abstractmethod
 8 | from typing import Dict
 9 | 
10 | # from tabulate import tabulate
11 | 
12 | import torch
13 | from torch.nn import Module
14 | 
15 | # from detector.yolox.yolox.utils import LRScheduler
16 | 
17 | 
18 | class BaseExp(metaclass=ABCMeta):
19 |     """Basic class for any experiment."""
20 | 
21 |     def __init__(self):
22 |         self.seed = None
23 |         self.output_dir = "./YOLOX_outputs"
24 |         self.print_interval = 100
25 |         self.eval_interval = 10
26 | 
27 |     @abstractmethod
28 |     def get_model(self) -> Module:
29 |         pass
30 | 
31 |     @abstractmethod
32 |     def get_data_loader(
33 |         self, batch_size: int, is_distributed: bool
34 |     ) -> Dict[str, torch.utils.data.DataLoader]:
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer:
39 |         pass
40 | 
41 |     @abstractmethod
42 |     def get_lr_scheduler(self, lr: float, iters_per_epoch: int, **kwargs):
43 |         pass
44 | 
45 |     @abstractmethod
46 |     def get_evaluator(self):
47 |         pass
48 | 
49 |     @abstractmethod
50 |     def eval(self, model, evaluator, weights):
51 |         pass
52 | 
53 |     def __repr__(self):
54 |         from tabulate import tabulate
55 | 
56 |         table_header = ["keys", "values"]
57 |         exp_table = [
58 |             (str(k), pprint.pformat(v))
59 |             for k, v in vars(self).items()
60 |             if not k.startswith("_")
61 |         ]
62 |         return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid")
63 | 
64 |     def merge(self, cfg_list):
65 |         assert len(cfg_list) % 2 == 0
66 |         for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
67 |             # only update value with same key
68 |             if hasattr(self, k):
69 |                 src_value = getattr(self, k)
70 |                 src_type = type(src_value)
71 |                 if src_value is not None and src_type != type(v):
72 |                     try:
73 |                         v = src_type(v)
74 |                     except Exception:
75 |                         v = ast.literal_eval(v)
76 |                 setattr(self, k, v)
77 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/exp/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | import importlib
 6 | import os
 7 | import sys
 8 | 
 9 | 
10 | def get_exp_by_file(exp_file):
11 |     try:
12 |         sys.path.append(os.path.dirname(exp_file))
13 |         current_exp = importlib.import_module(os.path.basename(exp_file).split(".")[0])
14 |         exp = current_exp.Exp()
15 |     except Exception:
16 |         raise ImportError("{} doesn't contains class named 'Exp'".format(exp_file))
17 |     return exp
18 | 
19 | 
20 | def get_exp_by_name(exp_name):
21 |     exp = exp_name.replace("-", "_")  # convert string like "yolox-s" to "yolox_s"
22 |     module_name = ".".join(["detector", "yolox", "yolox", "exp", "default", exp])
23 |     exp_object = importlib.import_module(module_name).Exp()
24 |     return exp_object
25 | 
26 | 
27 | def get_exp(exp_file=None, exp_name=None):
28 |     """
29 |     get Exp object by file or name. If exp_file and exp_name
30 |     are both provided, get Exp by exp_file.
31 | 
32 |     Args:
33 |         exp_file (str): file path of experiment.
34 |         exp_name (str): name of experiment. "yolo-s",
35 |     """
36 |     assert (
37 |         exp_file is not None or exp_name is not None
38 |     ), "plz provide exp file or exp name."
39 |     if exp_file is not None:
40 |         return get_exp_by_file(exp_file)
41 |     else:
42 |         return get_exp_by_name(exp_name)
43 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/exp/default/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | # This file is used for package installation and find default exp file
 6 | 
 7 | import importlib
 8 | import sys
 9 | from pathlib import Path
10 | 
11 | _EXP_PATH = Path(__file__).resolve().parent.parent.parent.parent / "exps" / "default"
12 | 
13 | if _EXP_PATH.is_dir():
14 |     # This is true only for in-place installation (pip install -e, setup.py develop),
15 |     # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
16 | 
17 |     class _ExpFinder(importlib.abc.MetaPathFinder):
18 |         def find_spec(self, name, path, target=None):
19 |             if not name.startswith("detector.yolox.yolox.exp.default"):
20 |                 return
21 |             project_name = name.split(".")[-1] + ".py"
22 |             target_file = _EXP_PATH / project_name
23 |             if not target_file.is_file():
24 |                 return
25 |             return importlib.util.spec_from_file_location(name, target_file)
26 | 
27 |     sys.meta_path.append(_ExpFinder())
28 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | # import torch first to make jit op work without `ImportError of libc10.so`
 6 | import torch  # noqa
 7 | 
 8 | from .jit_ops import FastCOCOEvalOp, JitOp
 9 | 
10 | try:
11 |     from .fast_coco_eval_api import COCOeval_opt
12 | except ImportError:  #  exception will be raised when users build yolox from source
13 |     pass
14 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | from .build import *
 6 | from .darknet import CSPDarknet, Darknet
 7 | from .losses import IOUloss
 8 | from .yolo_fpn import YOLOFPN
 9 | from .yolo_head import YOLOXHead
10 | from .yolo_pafpn import YOLOPAFPN
11 | from .yolox import YOLOX
12 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/models/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | from torch.hub import load_state_dict_from_url
 7 | 
 8 | __all__ = [
 9 |     "create_yolox_model",
10 |     "yolox_nano",
11 |     "yolox_tiny",
12 |     "yolox_s",
13 |     "yolox_m",
14 |     "yolox_l",
15 |     "yolox_x",
16 |     "yolov3",
17 | ]
18 | 
19 | _CKPT_ROOT_URL = "https://github.com/Megvii-BaseDetection/YOLOX/releases/download"
20 | _CKPT_FULL_PATH = {
21 |     "yolox-nano": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_nano.pth",
22 |     "yolox-tiny": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_tiny.pth",
23 |     "yolox-s": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_s.pth",
24 |     "yolox-m": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_m.pth",
25 |     "yolox-l": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_l.pth",
26 |     "yolox-x": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_x.pth",
27 |     "yolov3": f"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_darknet.pth",
28 | }
29 | 
30 | 
31 | def create_yolox_model(
32 |     name: str, pretrained: bool = True, num_classes: int = 80, device=None
33 | ) -> nn.Module:
34 |     """creates and loads a YOLOX model
35 | 
36 |     Args:
37 |         name (str): name of model. for example, "yolox-s", "yolox-tiny".
38 |         pretrained (bool): load pretrained weights into the model. Default to True.
39 |         num_classes (int): number of model classes. Defalut to 80.
40 |         device (str): default device to for model. Defalut to None.
41 | 
42 |     Returns:
43 |         YOLOX model (nn.Module)
44 |     """
45 |     from yolox.exp import get_exp, Exp
46 | 
47 |     if device is None:
48 |         device = "cuda:0" if torch.cuda.is_available() else "cpu"
49 |     device = torch.device(device)
50 | 
51 |     assert (
52 |         name in _CKPT_FULL_PATH
53 |     ), f"user should use one of value in {_CKPT_FULL_PATH.keys()}"
54 |     exp: Exp = get_exp(exp_name=name)
55 |     exp.num_classes = num_classes
56 |     yolox_model = exp.get_model()
57 |     if pretrained and num_classes == 80:
58 |         weights_url = _CKPT_FULL_PATH[name]
59 |         ckpt = load_state_dict_from_url(weights_url, map_location="cpu")
60 |         if "model" in ckpt:
61 |             ckpt = ckpt["model"]
62 |         yolox_model.load_state_dict(ckpt)
63 | 
64 |     yolox_model.to(device)
65 |     return yolox_model
66 | 
67 | 
68 | def yolox_nano(pretrained=True, num_classes=80, device=None):
69 |     return create_yolox_model("yolox-nano", pretrained, num_classes, device)
70 | 
71 | 
72 | def yolox_tiny(pretrained=True, num_classes=80, device=None):
73 |     return create_yolox_model("yolox-tiny", pretrained, num_classes, device)
74 | 
75 | 
76 | def yolox_s(pretrained=True, num_classes=80, device=None):
77 |     return create_yolox_model("yolox-s", pretrained, num_classes, device)
78 | 
79 | 
80 | def yolox_m(pretrained=True, num_classes=80, device=None):
81 |     return create_yolox_model("yolox-m", pretrained, num_classes, device)
82 | 
83 | 
84 | def yolox_l(pretrained=True, num_classes=80, device=None):
85 |     return create_yolox_model("yolox-l", pretrained, num_classes, device)
86 | 
87 | 
88 | def yolox_x(pretrained=True, num_classes=80, device=None):
89 |     return create_yolox_model("yolox-x", pretrained, num_classes, device)
90 | 
91 | 
92 | def yolov3(pretrained=True, num_classes=80, device=None):
93 |     return create_yolox_model("yolox-tiny", pretrained, num_classes, device)
94 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/models/losses.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class IOUloss(nn.Module):
10 |     def __init__(self, reduction="none", loss_type="iou"):
11 |         super(IOUloss, self).__init__()
12 |         self.reduction = reduction
13 |         self.loss_type = loss_type
14 | 
15 |     def forward(self, pred, target):
16 |         assert pred.shape[0] == target.shape[0]
17 | 
18 |         pred = pred.view(-1, 4)
19 |         target = target.view(-1, 4)
20 |         tl = torch.max(
21 |             (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
22 |         )
23 |         br = torch.min(
24 |             (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
25 |         )
26 | 
27 |         area_p = torch.prod(pred[:, 2:], 1)
28 |         area_g = torch.prod(target[:, 2:], 1)
29 | 
30 |         en = (tl < br).type(tl.type()).prod(dim=1)
31 |         area_i = torch.prod(br - tl, 1) * en
32 |         area_u = area_p + area_g - area_i
33 |         iou = (area_i) / (area_u + 1e-16)
34 | 
35 |         if self.loss_type == "iou":
36 |             loss = 1 - iou**2
37 |         elif self.loss_type == "giou":
38 |             c_tl = torch.min(
39 |                 (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
40 |             )
41 |             c_br = torch.max(
42 |                 (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
43 |             )
44 |             area_c = torch.prod(c_br - c_tl, 1)
45 |             giou = iou - (area_c - area_u) / area_c.clamp(1e-16)
46 |             loss = 1 - giou.clamp(min=-1.0, max=1.0)
47 | 
48 |         if self.reduction == "mean":
49 |             loss = loss.mean()
50 |         elif self.reduction == "sum":
51 |             loss = loss.sum()
52 | 
53 |         return loss
54 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/models/yolo_fpn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from .darknet import Darknet
 9 | from .network_blocks import BaseConv
10 | 
11 | 
12 | class YOLOFPN(nn.Module):
13 |     """
14 |     YOLOFPN module. Darknet 53 is the default backbone of this model.
15 |     """
16 | 
17 |     def __init__(
18 |         self,
19 |         depth=53,
20 |         in_features=["dark3", "dark4", "dark5"],
21 |     ):
22 |         super().__init__()
23 | 
24 |         self.backbone = Darknet(depth)
25 |         self.in_features = in_features
26 | 
27 |         # out 1
28 |         self.out1_cbl = self._make_cbl(512, 256, 1)
29 |         self.out1 = self._make_embedding([256, 512], 512 + 256)
30 | 
31 |         # out 2
32 |         self.out2_cbl = self._make_cbl(256, 128, 1)
33 |         self.out2 = self._make_embedding([128, 256], 256 + 128)
34 | 
35 |         # upsample
36 |         self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
37 | 
38 |     def _make_cbl(self, _in, _out, ks):
39 |         return BaseConv(_in, _out, ks, stride=1, act="lrelu")
40 | 
41 |     def _make_embedding(self, filters_list, in_filters):
42 |         m = nn.Sequential(
43 |             *[
44 |                 self._make_cbl(in_filters, filters_list[0], 1),
45 |                 self._make_cbl(filters_list[0], filters_list[1], 3),
46 |                 self._make_cbl(filters_list[1], filters_list[0], 1),
47 |                 self._make_cbl(filters_list[0], filters_list[1], 3),
48 |                 self._make_cbl(filters_list[1], filters_list[0], 1),
49 |             ]
50 |         )
51 |         return m
52 | 
53 |     def load_pretrained_model(self, filename="./weights/darknet53.mix.pth"):
54 |         with open(filename, "rb") as f:
55 |             state_dict = torch.load(f, map_location="cpu")
56 |         print("loading pretrained weights...")
57 |         self.backbone.load_state_dict(state_dict)
58 | 
59 |     def forward(self, inputs):
60 |         """
61 |         Args:
62 |             inputs (Tensor): input image.
63 | 
64 |         Returns:
65 |             Tuple[Tensor]: FPN output features..
66 |         """
67 |         #  backbone
68 |         out_features = self.backbone(inputs)
69 |         x2, x1, x0 = [out_features[f] for f in self.in_features]
70 | 
71 |         #  yolo branch 1
72 |         x1_in = self.out1_cbl(x0)
73 |         x1_in = self.upsample(x1_in)
74 |         x1_in = torch.cat([x1_in, x1], 1)
75 |         out_dark4 = self.out1(x1_in)
76 | 
77 |         #  yolo branch 2
78 |         x2_in = self.out2_cbl(out_dark4)
79 |         x2_in = self.upsample(x2_in)
80 |         x2_in = torch.cat([x2_in, x2], 1)
81 |         out_dark3 = self.out2(x2_in)
82 | 
83 |         outputs = (out_dark3, out_dark4, x0)
84 |         return outputs
85 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/models/yolox.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | import torch.nn as nn
 6 | 
 7 | from .yolo_head import YOLOXHead
 8 | from .yolo_pafpn import YOLOPAFPN
 9 | 
10 | 
11 | class YOLOX(nn.Module):
12 |     """
13 |     YOLOX model module. The module list is defined by create_yolov3_modules function.
14 |     The network returns loss values from three YOLO layers during training
15 |     and detection results during test.
16 |     """
17 | 
18 |     def __init__(self, backbone=None, head=None):
19 |         super().__init__()
20 |         if backbone is None:
21 |             backbone = YOLOPAFPN()
22 |         if head is None:
23 |             head = YOLOXHead(80)
24 | 
25 |         self.backbone = backbone
26 |         self.head = head
27 | 
28 |     def forward(self, x, targets=None):
29 |         # fpn output content features of [dark3, dark4, dark5]
30 |         fpn_outs = self.backbone(x)
31 | 
32 |         if self.training:
33 |             assert targets is not None
34 |             loss, iou_loss, conf_loss, cls_loss, l1_loss, num_fg = self.head(
35 |                 fpn_outs, targets, x
36 |             )
37 |             outputs = {
38 |                 "total_loss": loss,
39 |                 "iou_loss": iou_loss,
40 |                 "l1_loss": l1_loss,
41 |                 "conf_loss": conf_loss,
42 |                 "cls_loss": cls_loss,
43 |                 "num_fg": num_fg,
44 |             }
45 |         else:
46 |             outputs = self.head(fpn_outs)
47 | 
48 |         return outputs
49 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | # This file is used for package installation. Script of train/eval/export will be available.
 6 | 
 7 | import importlib
 8 | import sys
 9 | from pathlib import Path
10 | 
11 | _TOOLS_PATH = Path(__file__).resolve().parent.parent.parent / "tools"
12 | 
13 | if _TOOLS_PATH.is_dir():
14 |     # This is true only for in-place installation (pip install -e, setup.py develop),
15 |     # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
16 | 
17 |     class _PathFinder(importlib.abc.MetaPathFinder):
18 |         def find_spec(self, name, path, target=None):
19 |             if not name.startswith("yolox.tools."):
20 |                 return
21 |             project_name = name.split(".")[-1] + ".py"
22 |             target_file = _TOOLS_PATH / project_name
23 |             if not target_file.is_file():
24 |                 return
25 |             return importlib.util.spec_from_file_location(name, target_file)
26 | 
27 |     sys.meta_path.append(_PathFinder())
28 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii Inc. All rights reserved.
4 | 
5 | from .boxes import *
6 | from .compat import meshgrid
7 | 


--------------------------------------------------------------------------------
/detector/yolox/yolox/utils/compat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import torch
 5 | 
 6 | _TORCH_VER = [int(x) for x in torch.__version__.split(".")[:2]]
 7 | 
 8 | __all__ = ["meshgrid"]
 9 | 
10 | 
11 | def meshgrid(*tensors):
12 |     if _TORCH_VER >= [1, 10]:
13 |         return torch.meshgrid(*tensors, indexing="ij")
14 |     else:
15 |         return torch.meshgrid(*tensors)
16 | 


--------------------------------------------------------------------------------
/detector/yolox_cfg.py:
--------------------------------------------------------------------------------
1 | from easydict import EasyDict as edict
2 | 
3 | cfg = edict()
4 | cfg.MODEL_NAME = "yolox-x"
5 | cfg.MODEL_WEIGHTS = "detector/yolox/data/yolox_x.pth"
6 | cfg.INP_DIM = 640
7 | cfg.CONF_THRES = 0.1
8 | cfg.NMS_THRES = 0.6
9 | 


--------------------------------------------------------------------------------
/docs/CrowdPose.md:
--------------------------------------------------------------------------------
 1 | ## [CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark](https://arxiv.org/abs/1812.00324) *(accepted to CVPR2019)*
 2 | 
 3 | <p align='center'>
 4 |     <img src="crowdpose.gif", width="360">
 5 | </p>
 6 | 
 7 | ## Introduction
 8 | Our proposed method surpasses the state-of-the-art methods on [CrowdPose](https://arxiv.org/abs/1812.00324) dataset by **5** mAP and results on MSCOCO dataset demonstrate the generalization ability of our method (comparatively **0.8** mAP higher). Images in our proposed CrowdPose dataset have a uniform distribution of *Crowd Index* among \[0, 1\].
 9 | 
10 | ## Code
11 | We provide [evaluation tools](https://github.com/Jeff-sjtu/CrowdPose) for CrowdPose dataset. Our evaluation tools is developed based on [@cocodataset/cocoapi](https://github.com/cocodataset/cocoapi). The source code of our model is integrated into [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose/tree/pytorch).
12 | 
13 | ## Quick Start
14 | Run with `matching` option to use the matching algorithm in CrowdPose.
15 | 
16 | - **Input dir**:  Run AlphaPose for all images in a folder with:
17 | ```
18 | # pytorch branch
19 | python3 demo.py --indir ${img_directory} --outdir examples/res --matching
20 | ```
21 | 
22 | ## Dataset
23 | [Train + Validation + Test Images](https://drive.google.com/file/d/1VprytECcLtU4tKP32SYi_7oDRbw7yUTL/view?usp=sharing) (Google Drive)
24 | 
25 | [Annotations](https://drive.google.com/open?id=196vsma1uuLLCcUt1NrXp1K8PBU6tVH8w) (Google Drive)
26 | 
27 | ## Results
28 | 
29 | **Results on CrowdPose Validation:**
30 | 
31 | *Compare with state-of-the-art methods*
32 | <center>
33 | 
34 | | Method | AP @0.5:0.95 | AP @0.5 | AP @0.75 | AR @0.5:0.95 | AR @0.5 | AR @0.75 |
35 | |:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:|
36 | | Detectron (Mask R-CNN) | 57.2 | 83.5 | 60.3 | 65.9 | 89.3 | 69.4 |
37 | | Simple Pose (Xiao *et al.*) | 60.8 | 81.4 | 65.7 | 67.3 | 86.3 | 71.8 |
38 | | **Ours** | **66.0** | **84.2** | **71.5** | **72.7** | **89.5** | **77.5** |
39 | 
40 | </center>
41 | 
42 | *Compare with open-source systems*
43 | <center>
44 | 
45 | | Method | AP @*Easy* | AP @*Medium* | AP @*Hard* | FPS |
46 | |:-------|:-----:|:-------:|:-------:|:-------:|
47 | | OpenPose (CMU-Pose) | 62.7 | 48.7 | 32.3 | 5.3 |
48 | | Detectron (Mask R-CNN) | 69.4 | 57.9 | 45.8 | 2.9 |
49 | | **Ours** | **75.5** | **66.3** | **57.4** | **10.1** |
50 | 
51 | </center>
52 | 
53 | **Results on MSCOCO Validation:**
54 | <center>
55 | 
56 | | Method | AP @0.5:0.95 | AR @0.5:0.95 |
57 | |:-------|:-----:|:-------:|
58 | | Detectron (Mask R-CNN) | 64.8 | 71.1 |
59 | | Simple Pose (Xiao *et al.*) | 69.8 | 74.1 |
60 | | **AlphaPose** | **70.9** | **76.4** |
61 | 
62 | </center>
63 | 
64 | ## Contributors
65 |  CrowdPose is authored by [Jiefeng Li](http://jeff-leaf.site/), [Can Wang](https://github.com/Canwang-sjtu), [Hao Zhu](https://github.com/BernieZhu), [Yihuan Mao](), [Hao-Shu Fang](https://fang-haoshu.github.io/), and [Cewu Lu](http://www.mvig.org/). 
66 | 


--------------------------------------------------------------------------------
/docs/GETTING_STARTED.md:
--------------------------------------------------------------------------------
 1 | # Getting Started
 2 | 
 3 | ## Flags
 4 | Checkout the [run.md](./run.md) for all flags.
 5 | 
 6 | ## Example Inference
 7 | - **Input dir**: Run AlphaPose for all images in a folder with:
 8 | ``` bash
 9 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --indir ${img_directory} --outdir ${output_directory}
10 | ```
11 | - **Choose a different detector**: Default detector is yolov3-spp, it works pretty well, if you want to use yolox series, remember to download their weight according to our installation readme. Options include [yolox-x|yolox-l|yolox-m|yolox-s|yolox-darknet]:
12 | ``` bash
13 | python scripts/demo_inference.py --detector yolox-x --cfg ${cfg_file} --checkpoint ${trained_model} --indir ${img_directory} --outdir ${output_directory}
14 | ```
15 | - **Video**:  Run AlphaPose for a video and save the rendered video with:
16 | ``` bash
17 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --video ${path to video} --outdir examples/res --save_video
18 | ```
19 | - **Webcam**:  Run AlphaPose using default webcam and visualize the results with:
20 | ``` bash
21 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --outdir examples/res --vis --webcam 0
22 | ```
23 | - **Input list**:  Run AlphaPose for images in a list and save the rendered images with:
24 | ``` bash
25 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --list examples/list-coco-demo.txt --indir ${img_directory} --outdir examples/res --save_img
26 | ```
27 | - **Only-cpu/Multi-gpus**: Run AlphaPose for images in a list by cpu only or multi gpus:
28 | ``` bash
29 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --list examples/list-coco-demo.txt --indir ${img_directory} --outdir examples/res --gpus ${-1(cpu only)/0,1,2,3(multi-gpus)}
30 | ```
31 | - **Re-ID Track(Experimental)**: Run AlphaPose for tracking persons in a video by human re-id algorithm:
32 | ``` bash
33 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --video ${path to video} --outdir examples/res --pose_track --save_video
34 | ```
35 | - **Simple Track(Experimental)**: Run AlphaPose for tracking persons in a video by MOT tracking algorithm:
36 | ``` bash
37 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --video ${path to video} --outdir examples/res --detector tracker --save_video
38 | ```
39 | - **Pose Flow(not ready)**: Run AlphaPose for tracking persons in a video by embedded PoseFlow algorithm:
40 | ``` bash
41 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --video ${path to video} --outdir examples/res --pose_flow --save_video
42 | ```
43 | 
44 | 
45 | ## Options
46 | - **Note**:  If you meet OOM(out of memory) problem, decreasing the pose estimation batch until the program can run on your computer:
47 | ``` bash
48 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --indir ${img_directory} --outdir examples/res --detbatch 1 --posebatch 30
49 | ```
50 | - **Getting more accurate**: You can use larger input for pose network to improve performance e.g.:
51 | ```
52 | python scripts/demo_inference.py --cfg ${cfg_file} --checkpoint ${trained_model} --indir ${img_directory} --outdir ${output_directory} --flip
53 | ```
54 | - **Speeding up**:  Checkout the [speed_up.md](./speed_up.md) for more details.
55 | 
56 | ## Output format
57 | Checkout the [output.md](./output.md) for more details.
58 | 


--------------------------------------------------------------------------------
/docs/alphapose_136.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/alphapose_136.gif


--------------------------------------------------------------------------------
/docs/alphapose_17.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/alphapose_17.gif


--------------------------------------------------------------------------------
/docs/alphapose_26.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/alphapose_26.gif


--------------------------------------------------------------------------------
/docs/alphapose_hybrik_smpl.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/alphapose_hybrik_smpl.gif


--------------------------------------------------------------------------------
/docs/contributors.md:
--------------------------------------------------------------------------------
 1 | AlphaPose - Authors and Contributors
 2 | ====================================
 3 | 
 4 | 
 5 | 
 6 | ### Authors
 7 | AlphaPose is authored by [Hao-Shu Fang\*](https://fang-haoshu.github.io/), [Jiefeng Li\*](https://jeff-leaf.site/), Hongyang Tang, Chao Xu, Haoyi Zhu, [Yuliang Xiu](http://xiuyuliang.cn/), Yong-Lu Li, and [Cewu Lu](www.mvig.org). Cewu Lu is corresponding author.
 8 | 
 9 | 
10 | 
11 | ### Contributors
12 | We would also like to thank the following people who have highly contributed to AlphaPose:
13 | 
14 | [Hongyang Tang](): AlphaPose Tracking module developer
15 | 
16 | [Chao Xu](): AlphaPose pytorch 0.3.0 version contributor
17 | 
18 | [Haoyi Zhu](): AlphaPose whole-body keypoints developer
19 | 
20 | [Chenxi Wang](): AlphaPose mxnet version main developer
21 | 
22 | [Chongwei Liu](): AlphaPose c++ version developer
23 | 
24 | [Ruiheng Chang](): AlphaPose master version(old) detection module
25 | 


--------------------------------------------------------------------------------
/docs/crowdpose.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/crowdpose.gif


--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
 1 | AlphaPose - Frequently Asked Question (FAQ)
 2 | ============================================
 3 | 
 4 | ## FAQ
 5 | 1. [Can't open webcan or video file](#Can't-open-webcan-or-video-file)
 6 | 
 7 | ## FAQ
 8 | ### Can't open webcam or video file
 9 | **Q:** - I can't open the webcam or video file.
10 | 
11 | **A**: Try re-install `opencv-python` with version >= 3.3.1.11 by
12 | ```
13 | pip3 uninstall opencv_python
14 | pip3 install opencv_python --user
15 | ```
16 | Many people meet this problem at https://github.com/opencv/opencv/issues/8471. The solution I use is 
17 | ```
18 | sudo cp <path to opencv source repo>/build/lib/python3/cv2.cpython-35m-x86_64-linux-gnu.so /usr/local/lib/python3.5/dist-packages/cv2/cv2.cpython-35m-x86_64-linux-gnu.so
19 | ```
20 | The idea is to replace the cv2.so library provided by pypi with the one compiled from sources. You can check for more info at https://github.com/opencv/opencv/issues/8471.
21 | 
22 | ### Can't open webcam
23 | **Q:** - I can't open the webcam with the latest `opencv-python`
24 | 
25 | **A**: Check if your device is valid by
26 | ```
27 | ls /dev/video*
28 | ```
29 | Usually you can find `video0`, but if you have a device with other index like `video3`, you can run the program by
30 | ```
31 | # main branch
32 | python scripts/demo_inference.py --cfg ${CONFIG} --checkpoint ${CKPT} --webcam 3 --outdir ${OUTDIR} --detector yolo  --vis
33 |     
34 | # pytorch branch
35 | python3 webcam_demo.py --webcam 3 --outdir examples/res --vis
36 | ```
37 | 
38 | ### Program crash
39 | **Q1:** - I meet `Killed` when processing heavy task, like large videos or images with crowded persons.
40 | 
41 | **A**: Your system meets out of cpu memory and kills the program autoly. Please reduce the length of result buffer by setting the `--qsize` flag. By default length, free cpu memory over 70G+ is recommended in heavy task.
42 | 
43 | **Q2:** - I meet segmentation fault when processing heavy task, like large videos or images with crowded persons.
44 | 
45 | **A**: The parallelization module `torch.multiprocessing` is prone to shared memory leaks. Its garbage collection mechanism `torch_shm_manager` may cause segmentation fault under long-time heavy load. We found this issue when processing large videos with hundreds of persons. To avoid this issue, you can set `--sp` flag to use multi-thread instead, which sacrifices a little efficiency for more stablity. 
46 | 


--------------------------------------------------------------------------------
/docs/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/logo.jpg


--------------------------------------------------------------------------------
/docs/pose.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/pose.gif


--------------------------------------------------------------------------------
/docs/posetrack.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/posetrack.gif


--------------------------------------------------------------------------------
/docs/posetrack2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/posetrack2.gif


--------------------------------------------------------------------------------
/docs/run.md:
--------------------------------------------------------------------------------
 1 | AlphaPose Usage & Examples
 2 | ====================================
 3 | 
 4 | Here, we first list the flags and other parameters you can tune. Default parameters work well and you don't need to tune them unless you know what you are doing.
 5 | 
 6 | ## Flags
 7 | - `--cfg`: Experiment configure file name
 8 | - `--checkpoint`: Experiment checkpoint file name
 9 | - `--sp`: Run the program using a single process. Windows users need to turn this flag on.
10 | - `--detector`: Detector you can use, yolo/tracker. For [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX), you should specify the model, i.e. yolox-l or yolox-x, etc.
11 | - `--indir`: Directory of the input images. All the images in the directory will be processed.
12 | - `--list`: A text file list for the input images
13 | - `--image`: Read single image and process.
14 | - `--video`: Read video and process the video frame by frame.
15 | - `--outdir`: Output directory to store the pose estimation results.
16 | - `--vis`: If turned-on, it will render the results and visualize them.
17 | - `--save_img`: If turned-on, it will render the results and save them as images in $outdir/vis. 
18 | - `--save_video`: If turned-on, it will render the results and save them as a video.
19 | - `--vis_fast`: If turned on, it will use faster rendering method. Default is false.
20 | - `--format`: The format of the saved results. By default, it will save the output in COCO-like format. Alternative options are 'cmu' and 'open', which saves the results in the format of CMU-Pose or OpenPose. For more details, see [output.md](output.md)
21 | 
22 | - `--detbatch`: Batch size for the detection network. 
23 | - `--posebatch`: Maximum batch size for the pose estimation network. If you met OOM problem, decrease this value until it fit in the memory.
24 | - `--flip`: Enable flip testing. Can increase the accuracy.
25 | - `--min_box_area`: Min box area to filter out, you can set it like 100 to filter out small people.
26 | - `--gpus`: Choose which cuda device to use by index and input comma to use multi gpus, e.g. 0,1,2,3. (input -1 for cpu only)
27 | 
28 | - `--pose_track`: Enable tracking pipeline with human re-id feature, it is currently the best performance pose tracker
29 | - `--pose_flow`: This flag will be depreciated. It enables the old tracking version of PoseFlow.
30 | 
31 | All the flags available here: [link](../scripts/demo_inference.py#L22)
32 | 
33 | 
34 | ## Parameters
35 | 1. yolo detector config is [here](../detector/yolo_cfg.py)
36 | - `CONFIDENCE`: Confidence threshold for human detection. Lower the value can improve the final accuracy but decrease the speed. Default is 0.05.
37 | - `NMS_THRES`: NMS threshold for human detection. Increase the value can improve the final accuracy but decrease the speed. Default is 0.6.
38 | - `INP_DIM`: The input size of detection network. The inp_dim should be multiple of 32. Default is 608. Increase it may improve the accuracy.
39 | 


--------------------------------------------------------------------------------
/docs/speed_up.md:
--------------------------------------------------------------------------------
 1 | AlphaPose - Speeding Up
 2 | ============================================
 3 | 
 4 | 
 5 | 1. Run AlphaPose for a video, speeding up by increasing the confidence, lowering the NMS threshold, lowering the input resolution of detector in `detector/yolo_cfg.py`
 6 | ```
 7 | cfg.NMS_THRES =  0.45
 8 | cfg.CONFIDENCE = 0.5
 9 | cfg.INP_DIM =  420
10 | ```
11 | It may miss some people though.
12 | 
13 | 2. Increase the detbatch and posebatch by setting the `--detbatch` and `--posebatch` flag if you have large GPU memory.
14 | 


--------------------------------------------------------------------------------
/docs/step1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/step1.jpg


--------------------------------------------------------------------------------
/docs/step2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/step2.jpg


--------------------------------------------------------------------------------
/docs/step3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/step3.jpg


--------------------------------------------------------------------------------
/docs/step4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/docs/step4.jpg


--------------------------------------------------------------------------------
/docs/win_install.md:
--------------------------------------------------------------------------------
 1 | AlphaPose - Windows Installation
 2 | ============================================
 3 | 
 4 | Tested on Win10 with GTX 1060
 5 | 
 6 | 1. Download and install Git for Windows: https://git-scm.com/download/win
 7 | 2. Download and install Python3(3.6 or 3.7): https://www.python.org/downloads/
 8 | 3. Download and install CUDA toolkit: https://developer.nvidia.com/cuda-downloads
 9 | 4. Download and install PyTorch: https://pytorch.org/
10 | 	Right click, choose the "Git Bash Here"
11 | 	<div align="left">
12 |     <img src="step1.jpg">
13 | 	</div>
14 | 	Input the command here and press Enter
15 | 	<div align="left">
16 |     <img src="step2.jpg">
17 | 	</div>
18 | 5. 	Download win-64/intel-openmp-2018.0.0-8.tar.bz2: https://anaconda.org/anaconda/intel-openmp/files
19 | 	put the .dll files in Library\bin into a dir, then append the path of it to the environment variable PATH. 
20 | 	I suggest you to put it in C:\Users\<Your user name>\bin since this dir is already in the PATH
21 | 6. Right click, choose the "Git Bash Here" and then follow the instructions in README to install AlphaPose
22 | 	<div align="left">
23 |     <img src="step3.jpg">
24 | 	</div>
25 | 7. Have fun! Now you can run AlphaPose by entering command. Try webcam demo by input 
26 | ```
27 | # pytorch branch
28 | python3 webcam_demo.py --vis
29 | ```
30 | **Note:** For `demo.py` and `video_demo.py`, you need to turn on the `--sp` flag, like
31 | 
32 | ```
33 | # pytorch branch
34 | python3 demo.py --indir ${img_directory} --outdir examples/res --sp
35 | ```
36 | 
37 | <div align="left">
38 | <img src="step4.jpg">
39 | </div>
40 | 


--------------------------------------------------------------------------------
/examples/demo/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/examples/demo/1.jpg


--------------------------------------------------------------------------------
/examples/demo/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/examples/demo/2.jpg


--------------------------------------------------------------------------------
/examples/demo/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/examples/demo/3.jpg


--------------------------------------------------------------------------------
/examples/list-coco-demo.txt:
--------------------------------------------------------------------------------
 1 | 000000375530.jpg
 2 | 000000244462.jpg
 3 | 000000113397.jpg
 4 | 000000113408.jpg
 5 | 000000375554.jpg
 6 | 000000171819.jpg
 7 | 000000375566.jpg
 8 | 000000244496.jpg
 9 | 000000139077.jpg
10 | 000000506656.jpg
11 | 000000375606.jpg
12 | 000000244539.jpg
13 | 000000565045.jpg
14 | 000000113473.jpg
15 | 000000375621.jpg
16 | 000000244550.jpg
17 | 000000492605.jpg
18 | 000000506707.jpg
19 | 000000113493.jpg
20 | 000000215524.jpg
21 | 


--------------------------------------------------------------------------------
/model_files/J_regressor_h36m.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/model_files/J_regressor_h36m.npy


--------------------------------------------------------------------------------
/model_files/h36m_mean_beta.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/model_files/h36m_mean_beta.npy


--------------------------------------------------------------------------------
/model_files/smpl_faces.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/model_files/smpl_faces.npy


--------------------------------------------------------------------------------
/pretrained_models/get_models.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/pretrained_models/get_models.sh


--------------------------------------------------------------------------------
/scripts/inference.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | CONFIG=$1
 4 | CKPT=$2
 5 | VIDEO=$3
 6 | OUTDIR=${4:-"./examples/res"}
 7 | 
 8 | python scripts/demo_inference.py \
 9 |     --cfg ${CONFIG} \
10 |     --checkpoint ${CKPT} \
11 |     --video ${VIDEO} \
12 |     --outdir ${OUTDIR} \
13 |     --detector yolo  --save_img --save_video
14 | 


--------------------------------------------------------------------------------
/scripts/inference_3d.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | CONFIG=$1
 4 | CKPT=$2
 5 | VIDEO=$3
 6 | OUTDIR=${4:-"./examples/res"}
 7 | 
 8 | python scripts/demo_3d_inference.py \
 9 |     --cfg ${CONFIG} \
10 |     --checkpoint ${CKPT} \
11 |     --video ${VIDEO} \
12 |     --outdir ${OUTDIR} \
13 |     --detector yolo  --save_img --save_video --pose_track
14 | 


--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
1 | set -x
2 | 
3 | CONFIG=$1
4 | EXPID=${2:-"alphapose"}
5 | 
6 | python ./scripts/train.py \
7 |     --exp-id ${EXPID} \
8 |     --cfg ${CONFIG}
9 | 


--------------------------------------------------------------------------------
/scripts/validate.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | CONFIG=$1
 4 | CKPT=$2
 5 | BATCH=${3:-"64"}
 6 | GPUS=${4:-"0,1,2,3"}
 7 | 
 8 | python ./scripts/validate.py \
 9 |     --cfg ${CONFIG} \
10 |     --batch ${BATCH} \
11 |     --gpus $GPUS\
12 |     --flip-test \
13 |     --checkpoint ${CKPT}
14 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [easy_install]
2 | index_url=https://pypi.tuna.tsinghua.edu.cn/simple
3 | 


--------------------------------------------------------------------------------
/trackers/PoseFlow/parallel_process.py:
--------------------------------------------------------------------------------
 1 | # adapted from http://danshiebler.com/2016-09-14-parallel-progress-bar/
 2 | from tqdm import tqdm
 3 | from concurrent.futures import ProcessPoolExecutor, as_completed
 4 | 
 5 | def parallel_process(array, function, n_jobs=16, use_kwargs=False, front_num=3):
 6 |     """
 7 |         A parallel version of the map function with a progress bar. 
 8 | 
 9 |         Args:
10 |             array (array-like): An array to iterate over.
11 |             function (function): A python function to apply to the elements of array
12 |             n_jobs (int, default=16): The number of cores to use
13 |             use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of 
14 |                 keyword arguments to function 
15 |             front_num (int, default=3): The number of iterations to run serially before kicking off the parallel job. 
16 |                 Useful for catching bugs
17 |         Returns:
18 |             [function(array[0]), function(array[1]), ...]
19 |     """
20 |     #We run the first few iterations serially to catch bugs
21 |     if front_num > 0:
22 |         front = [function(**a) if use_kwargs else function(*a) for a in array[:front_num]]
23 |     #If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging.
24 |     if n_jobs==1:
25 |         return front + [function(**a) if use_kwargs else function(*a) for a in tqdm(array[front_num:])]
26 |     #Assemble the workers
27 |     with ProcessPoolExecutor(max_workers=n_jobs) as pool:
28 |         #Pass the elements of array into function
29 |         if use_kwargs:
30 |             futures = [pool.submit(function, **a) for a in array[front_num:]]
31 |         else:
32 |             futures = [pool.submit(function, *a) for a in array[front_num:]]
33 |         kwargs = {
34 |             'total': len(futures),
35 |             'unit': 'it',
36 |             'unit_scale': True,
37 |             'leave': True
38 |         }
39 |         #Print out the progress as tasks complete
40 |         for f in tqdm(as_completed(futures), **kwargs):
41 |             pass
42 |     out = []
43 |     #Get the results from the futures. 
44 |     for i, future in enumerate(futures):
45 |         try:
46 |             out.append(future.result())
47 |         except Exception as e:
48 |             out.append(e)
49 |     return front + out


--------------------------------------------------------------------------------
/trackers/PoseFlow/posetrack1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/PoseFlow/posetrack1.gif


--------------------------------------------------------------------------------
/trackers/PoseFlow/posetrack2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/PoseFlow/posetrack2.gif


--------------------------------------------------------------------------------
/trackers/PoseFlow/posetrack_data:
--------------------------------------------------------------------------------
1 | /home/yuliang/data/posetrack_data/posetrack_data


--------------------------------------------------------------------------------
/trackers/PoseFlow/poseval:
--------------------------------------------------------------------------------
1 | /home/yuliang/data/posetrack_data/poseval


--------------------------------------------------------------------------------
/trackers/PoseFlow/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.14.5
 2 | scipy==1.1.0
 3 | opencv_python==3.4.2.16
 4 | opencv_contrib_python==3.4.2.16
 5 | matplotlib==2.2.2
 6 | tqdm==4.23.4
 7 | Image==1.5.25
 8 | Pillow==5.3.0
 9 | munkres==1.0.12
10 | 


--------------------------------------------------------------------------------
/trackers/README.md:
--------------------------------------------------------------------------------
 1 | # Pose Tracking Module for AlphaPose
 2 | 
 3 | AlphaPose provide three different tracking methods for now, you can try different method to see which one is better for you.
 4 | 
 5 | ## 1. Human-ReID based tracking (Recommended)
 6 | Currently the best performance tracking model. Paper coming soon.
 7 | 
 8 | #### Getting started
 9 | Download  human reid model ( [Google drive](https://drive.google.com/file/d/1myNKfr2cXqiHZVXaaG8ZAq_U2UpeOLfG/view?usp=share_link) or [Baidu Pan, code:6a8b](https://pan.baidu.com/s/1IoAHehdjJ0ucQl8p_4hfRw))and place it into `AlphaPose/trackers/weights/`.
10 | 
11 | Then simply run alphapose with additional flag `--pose_track`
12 | 
13 | You can try different person reid model by modifing `cfg.arch` and `cfg.loadmodel` in `./trackers/tracker_cfg.py`.
14 | 
15 | If you want to train your own reid model, please refer to this [project](https://github.com/KaiyangZhou/deep-person-reid)
16 | 
17 | #### Demo
18 | ``` bash
19 | ./scripts/inference.sh ${CONFIG} ${CHECKPOINT} ${VIDEO_NAME}  ${OUTPUT_DIR}, --pose_track
20 | ```
21 | #### Todo
22 | - [] Evaluation Tools for PoseTrack
23 | - [] More Models
24 | - [] Training code for [PoseTrack Dataset](https://posetrack.net/)
25 | 
26 | ## 2. Detector based human tracking
27 | Use a human detecter with tracking module (JDE). Please refer to [detector/tracker/](../detector/tracker/)
28 | 
29 | #### Getting started
30 | Download detector [JDE-1088x608](https://github.com/Zhongdao/Towards-Realtime-MOT#pretrained-model-and-baseline-models) and place it under `AlphaPose/detector/tracker/data/`
31 | 
32 | Enable tracking by setting the detector as tracker: `--detector tracker`
33 | #### Demo
34 | ``` bash
35 | ./scripts/inference.sh ${CONFIG} ${CHECKPOINT} ${VIDEO_NAME}  ${OUTPUT_DIR}, --detector tracker
36 | ```
37 | 
38 | ## 3. PoseFlow human tracking
39 | This tracker is based on our BMVC 2018 paper PoseFlow, for more info please refer to [PoseFlow/README.md](PoseFlow/)
40 | 
41 | #### Getting started
42 | 
43 | Simply run alphapose with additional flag `--pose_flow`
44 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/ResBnLin.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | """
 3 | @author:  tanghy
 4 | @contact: thutanghy@gmail.com
 5 | """
 6 | import torch
 7 | from torch import nn
 8 | import torch.nn.functional as F
 9 | from ReidModels.ResNet import build_resnet_backbone
10 | from ReidModels.bn_linear import BNneckLinear
11 | class SpatialAttn(nn.Module):
12 |     """Spatial Attention Layer"""
13 |     def __init__(self):
14 |         super(SpatialAttn, self).__init__()
15 | 
16 |     def forward(self, x):
17 |         # global cross-channel averaging # e.g. 32,2048,24,8
18 |         x = x.mean(1, keepdim=True)  # e.g. 32,1,24,8
19 |         h = x.size(2)
20 |         w = x.size(3)
21 |         x = x.view(x.size(0),-1)     # e.g. 32,192
22 |         z = x
23 |         for b in range(x.size(0)):
24 |             z[b] /= torch.sum(z[b])
25 |         z = z.view(x.size(0),1,h,w)
26 |         return z
27 | class ResModel(nn.Module):
28 | 
29 |     def __init__(self, n_ID):
30 |         super().__init__()
31 |         self.backbone = build_resnet_backbone()
32 |         self.head = BNneckLinear(n_ID)
33 |         self.atten = SpatialAttn()
34 |         self.conv1 = nn.Conv2d(17, 17, 1,stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros')
35 |         self.pool = nn.AvgPool2d(2, stride=2, padding=0,)
36 |     def forward(self, input,posemap,map_weight):
37 |         """
38 |         See :class:`ReIDHeads.forward`.
39 |         """
40 |         feat = self.backbone(input)
41 |         b,c,h,w = feat.shape
42 |         att = self.conv1(torch.mul(posemap,map_weight))
43 |         #print('att-1-size={}'.format(att.shape))
44 |         att = F.relu(att)
45 |         att = self.pool(att)
46 |         att = self.conv1(att)
47 |         #print('att-2-size={}'.format(att.shape))
48 |         att = F.softmax(att)
49 |         #print('att-3-size={}'.format(att.shape))
50 |         att = self.atten(att)
51 |         #print('att-4-size={}'.format(att.shape))
52 |         att = att.expand(b,c,h,w)
53 |         _feat = torch.mul(feat,att)
54 |         feat = _feat + feat
55 |         return self.head(feat)


--------------------------------------------------------------------------------
/trackers/ReidModels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/backbone/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/backbone/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/backbone/googlenet.py:
--------------------------------------------------------------------------------
 1 | '''GoogLeNet with PyTorch.'''
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from .lrn import SpatialCrossMapLRN
 7 | 
 8 | 
 9 | class Inception(nn.Module):
10 |     def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
11 |         super(Inception, self).__init__()
12 |         # 1x1 conv branch
13 |         self.b1 = nn.Sequential(
14 |             nn.Conv2d(in_planes, n1x1, kernel_size=1),
15 |             nn.ReLU(True),
16 |         )
17 | 
18 |         # 1x1 conv -> 3x3 conv branch
19 |         self.b2 = nn.Sequential(
20 |             nn.Conv2d(in_planes, n3x3red, kernel_size=1),
21 |             nn.ReLU(True),
22 |             nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
23 |             nn.ReLU(True),
24 |         )
25 | 
26 |         # 1x1 conv -> 5x5 conv branch
27 |         self.b3 = nn.Sequential(
28 |             nn.Conv2d(in_planes, n5x5red, kernel_size=1),
29 |             nn.ReLU(True),
30 | 
31 |             nn.Conv2d(n5x5red, n5x5, kernel_size=5, padding=2),
32 |             nn.ReLU(True),
33 |         )
34 | 
35 |         # 3x3 pool -> 1x1 conv branch
36 |         self.b4 = nn.Sequential(
37 |             nn.MaxPool2d(3, stride=1, padding=1),
38 | 
39 |             nn.Conv2d(in_planes, pool_planes, kernel_size=1),
40 |             nn.ReLU(True),
41 |         )
42 | 
43 |     def forward(self, x):
44 |         y1 = self.b1(x)
45 |         y2 = self.b2(x)
46 |         y3 = self.b3(x)
47 |         y4 = self.b4(x)
48 |         return torch.cat([y1,y2,y3,y4], 1)
49 | 
50 | 
51 | class GoogLeNet(nn.Module):
52 | 
53 |     output_channels = 832
54 | 
55 |     def __init__(self):
56 |         super(GoogLeNet, self).__init__()
57 |         self.pre_layers = nn.Sequential(
58 |             nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
59 |             nn.ReLU(True),
60 | 
61 |             nn.MaxPool2d(3, stride=2, ceil_mode=True),
62 |             SpatialCrossMapLRN(5),
63 | 
64 |             nn.Conv2d(64, 64, 1),
65 |             nn.ReLU(True),
66 | 
67 |             nn.Conv2d(64, 192, 3, padding=1),
68 |             nn.ReLU(True),
69 | 
70 |             SpatialCrossMapLRN(5),
71 |             nn.MaxPool2d(3, stride=2, ceil_mode=True),
72 |         )
73 | 
74 |         self.a3 = Inception(192,  64,  96, 128, 16, 32, 32)
75 |         self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
76 | 
77 |         self.maxpool = nn.MaxPool2d(3, stride=2, ceil_mode=True)
78 | 
79 |         self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
80 |         self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
81 |         self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
82 |         self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
83 |         self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
84 | 
85 |     def forward(self, x):
86 |         out = self.pre_layers(x)
87 |         out = self.a3(out)
88 |         out = self.b3(out)
89 |         out = self.maxpool(out)
90 |         out = self.a4(out)
91 |         out = self.b4(out)
92 |         out = self.c4(out)
93 |         out = self.d4(out)
94 |         out = self.e4(out)
95 | 
96 |         return out
97 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/backbone/lrn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.legacy.nn import SpatialCrossMapLRN as SpatialCrossMapLRNOld
 3 | from torch.autograd import Function, Variable
 4 | from torch.nn import Module
 5 | 
 6 | 
 7 | # function interface, internal, do not use this one!!!
 8 | class SpatialCrossMapLRNFunc(Function):
 9 | 
10 |     def __init__(self, size, alpha=1e-4, beta=0.75, k=1):
11 |         self.size = size
12 |         self.alpha = alpha
13 |         self.beta = beta
14 |         self.k = k
15 | 
16 |     def forward(self, input):
17 |         self.save_for_backward(input)
18 |         self.lrn = SpatialCrossMapLRNOld(self.size, self.alpha, self.beta, self.k)
19 |         self.lrn.type(input.type())
20 |         return self.lrn.forward(input)
21 | 
22 |     def backward(self, grad_output):
23 |         input, = self.saved_tensors
24 |         return self.lrn.backward(input, grad_output)
25 | 
26 | 
27 | # use this one instead
28 | class SpatialCrossMapLRN(Module):
29 |     def __init__(self, size, alpha=1e-4, beta=0.75, k=1):
30 |         super(SpatialCrossMapLRN, self).__init__()
31 |         self.size = size
32 |         self.alpha = alpha
33 |         self.beta = beta
34 |         self.k = k
35 | 
36 |     def forward(self, input):
37 |         return SpatialCrossMapLRNFunc(self.size, self.alpha, self.beta, self.k)(input)


--------------------------------------------------------------------------------
/trackers/ReidModels/backbone/sqeezenet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torchvision import models
 5 | 
 6 | 
 7 | class DilationLayer(nn.Module):
 8 |     def __init__(self, in_channels, out_channels, kernel_size=3, padding='same_padding', dilation=1, bn=False):
 9 |         super(DilationLayer, self).__init__()
10 |         if padding == 'same_padding':
11 |             padding = int((kernel_size - 1) / 2 * dilation)
12 |         self.Dconv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
13 |                                padding=padding, dilation=dilation)
14 |         self.Drelu = nn.ReLU(inplace=True)
15 |         self.Dbn = nn.BatchNorm2d(out_channels) if bn else None
16 | 
17 |     def forward(self, x):
18 |         x = self.Dconv(x)
19 |         if self.Dbn is not None:
20 |             x = self.Dbn(x)
21 |         x = self.Drelu(x)
22 |         return x
23 | 
24 | 
25 | class FeatExtractorSqueezeNetx16(nn.Module):
26 |     n_feats = [64, 128, 256, 512]
27 | 
28 |     def __init__(self, pretrained=True):
29 | 
30 |         super(FeatExtractorSqueezeNetx16, self).__init__()
31 |         print("loading layers from squeezenet1_1...")
32 |         sq = models.squeezenet1_1(pretrained=pretrained)
33 | 
34 |         self.conv1 = nn.Sequential(
35 |             sq.features[0],
36 |             sq.features[1],
37 |         )
38 |         self.conv2 = nn.Sequential(
39 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
40 |             sq.features[3],
41 |             sq.features[4],
42 |         )
43 |         self.conv3 = nn.Sequential(
44 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
45 |             sq.features[6],
46 |             sq.features[7],
47 |         )
48 |         self.conv4 = nn.Sequential(
49 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
50 |             sq.features[9],
51 |             sq.features[10],
52 |             sq.features[11],
53 |             sq.features[12],
54 |         )
55 | 
56 |         self.conv1[0].padding = (1, 1)
57 | 
58 |     def forward(self, x):
59 |         x2 = self.conv1(x)
60 |         x4 = self.conv2(x2)
61 |         x8 = self.conv3(x4)
62 |         x16 = self.conv4(x8)
63 | 
64 |         return x2, x4, x8, x16
65 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/bn_linear.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | """
 3 | @author:  tanghy
 4 | @contact: thutanghy@gmail.com
 5 | """
 6 | 
 7 | from torch import nn
 8 | import torch.nn.functional as F
 9 | 
10 | def bn_no_bias(in_features):
11 |     bn_layer = nn.BatchNorm1d(in_features)
12 |     bn_layer.bias.requires_grad_(False)
13 |     return bn_layer
14 | 
15 | def weights_init_kaiming(m):
16 |     classname = m.__class__.__name__
17 |     if classname.find('Linear') != -1:
18 |         nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out')
19 |         if m.bias is not None:
20 |             nn.init.constant_(m.bias, 0.0)
21 |     elif classname.find('Conv') != -1:
22 |         nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in')
23 |         if m.bias is not None:
24 |             nn.init.constant_(m.bias, 0.0)
25 |     elif classname.find('BatchNorm') != -1:
26 |         if m.affine:
27 |             nn.init.constant_(m.weight, 1.0)
28 |             nn.init.constant_(m.bias, 0.0)
29 | 
30 | 
31 | def weights_init_classifier(m):
32 |     classname = m.__class__.__name__
33 |     if classname.find('Linear') != -1:
34 |         nn.init.normal_(m.weight, std=0.001)
35 |         if m.bias is not None:
36 |             nn.init.constant_(m.bias, 0.0)
37 | 
38 | class BNneckLinear(nn.Module):
39 | 
40 |     def __init__(self, nID):
41 |         super().__init__()
42 |         self._num_classes = nID
43 | 
44 |         self.gap = nn.AdaptiveAvgPool2d(1)
45 |         self.bnneck = bn_no_bias(2048)
46 |         self.bnneck.apply(weights_init_kaiming)
47 | 
48 |         self.classifier = nn.Linear(2048, self._num_classes, bias=False)
49 |         self.classifier.apply(weights_init_classifier)
50 | 
51 |     def forward(self, features):
52 |         """
53 |         See :class:`ReIDHeads.forward`.
54 |         """
55 |         global_features = self.gap(features)
56 |         global_features = global_features.view(global_features.shape[0], -1)
57 |         bn_features = self.bnneck(global_features)
58 | 
59 |         if not self.training:
60 |             return F.normalize(bn_features)
61 | 
62 |         pred_class_logits = self.classifier(bn_features)
63 |         return global_features, pred_class_logits


--------------------------------------------------------------------------------
/trackers/ReidModels/classification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/classification/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/psroi_pooling/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/psroi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/_ext/psroi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._psroi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | sources = []
 6 | headers = []
 7 | defines = []
 8 | with_cuda = False
 9 | 
10 | if torch.cuda.is_available():
11 |     print('Including CUDA code.')
12 |     sources += ['src/psroi_pooling_cuda.c']
13 |     headers += ['src/psroi_pooling_cuda.h']
14 |     defines += [('WITH_CUDA', None)]
15 |     with_cuda = True
16 | 
17 | this_file = os.path.dirname(os.path.realpath(__file__))
18 | print(this_file)
19 | extra_objects = ['src/cuda/psroi_pooling.cu.o']
20 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
21 | 
22 | ffi = create_extension(
23 |     '_ext.psroi_pooling',
24 |     headers=headers,
25 |     sources=sources,
26 |     define_macros=defines,
27 |     relative_to=__file__,
28 |     with_cuda=with_cuda,
29 |     extra_objects=extra_objects
30 | )
31 | 
32 | if __name__ == '__main__':
33 |     ffi.build()
34 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/psroi_pooling/functions/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/functions/psroi_pooling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import psroi_pooling
 4 | 
 5 | 
 6 | class PSRoIPoolingFunction(Function):
 7 |     def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim):
 8 |         self.pooled_width = int(pooled_width)
 9 |         self.pooled_height = int(pooled_height)
10 |         self.spatial_scale = float(spatial_scale)
11 |         self.group_size = int(group_size)
12 |         self.output_dim = int(output_dim)
13 | 
14 |         self.output = None
15 |         self.mappingchannel = None
16 |         self.rois = None
17 |         self.feature_size = None
18 | 
19 |     def forward(self, features, rois):
20 |         batch_size, num_channels, data_height, data_width = features.size()
21 |         num_rois = rois.size(0)
22 | 
23 |         output = features.new().resize_(num_rois, self.output_dim, self.pooled_height, self.pooled_width).zero_()
24 |         mappingchannel = torch.IntTensor(num_rois, self.output_dim, self.pooled_height, self.pooled_width).zero_().cuda(features.get_device())
25 | 
26 |         rtn = psroi_pooling.psroi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale,
27 |                                                  self.group_size, self.output_dim,
28 |                                                  features, rois, output, mappingchannel)
29 |         assert rtn > 0
30 |         self.output = output
31 |         self.mappingchannel = mappingchannel
32 |         self.rois = rois
33 |         self.feature_size = features.size()
34 |         # print features.max(), features.min()
35 |         # print rois.max(), rois.min()
36 |         # print output.max(), output.min()
37 |         return output
38 | 
39 |     def backward(self, grad_output):
40 |         assert (self.feature_size is not None and grad_output.is_cuda)
41 | 
42 |         batch_size, num_channels, data_height, data_width = self.feature_size
43 | 
44 |         grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda()
45 | 
46 |         psroi_pooling.psroi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale,
47 |                                                   self.output_dim,
48 |                                                   grad_output, self.rois, grad_input, self.mappingchannel)
49 |         return grad_input, None
50 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda
 4 | 
 5 | cd src/cuda
 6 | echo "Compiling psroi pooling kernels by nvcc..."
 7 | ${CUDA_PATH}/bin/nvcc -c -o psroi_pooling.cu.o psroi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../../
10 | python build.py


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/ReidModels/psroi_pooling/modules/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/modules/psroi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | import sys
 3 | from ..functions.psroi_pooling import PSRoIPoolingFunction
 4 | 
 5 | 
 6 | class PSRoIPool(Module):
 7 |     def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim):
 8 |         super(PSRoIPool, self).__init__()
 9 | 
10 |         self.pooled_width = int(pooled_width)
11 |         self.pooled_height = int(pooled_height)
12 |         self.spatial_scale = float(spatial_scale)
13 |         self.group_size = int(group_size)
14 |         self.output_dim = int(output_dim)
15 | 
16 |     def forward(self, features, rois):
17 |         return PSRoIPoolingFunction(self.pooled_height, self.pooled_width, self.spatial_scale, self.group_size,
18 |                                     self.output_dim)(features, rois)
19 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/src/cuda/psroi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef PS_ROI_POOLING_KERNEL
 2 | #define PS_ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int PSROIPoolForwardLauncher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height, const int pooled_width,
11 |     const float* bottom_rois, const int group_size, const int output_dim, float* top_data, int* mapping_channel, cudaStream_t stream);
12 | 
13 | 
14 | int PSROIPoolBackwardLauncher(const float* top_diff, const int* mapping_channel, const int batch_size, const int num_rois, const float spatial_scale, const int channels, const int height, const int width, const int pooled_width, const int pooled_height, const int output_dim, float* bottom_diff, const float* bottom_rois, cudaStream_t stream);
15 | 
16 | #ifdef __cplusplus
17 | }
18 | 
19 | #endif
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/src/psroi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "cuda/psroi_pooling_kernel.h"
 4 | 
 5 | 
 6 | 
 7 | extern THCState* state;
 8 | 
 9 | int psroi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, int group_size, int output_dim,THCudaTensor *features, THCudaTensor* rois, THCudaTensor* output, THCudaIntTensor* mappingchannel){
10 | 	float* data_in = THCudaTensor_data(state, features);
11 | 	float* rois_in = THCudaTensor_data(state, rois);
12 | 	float* output_out = THCudaTensor_data(state, output);
13 | 	int* mappingchannel_out = THCudaIntTensor_data(state, mappingchannel);
14 | 	//Get # of Rois
15 | 	int num_rois = THCudaTensor_size(state, rois, 0);
16 | 	int size_rois = THCudaTensor_size(state, rois, 1);
17 | 	if (size_rois!=5)
18 | 	{
19 | 		return -1;
20 | 	}
21 | 
22 | 	//Get # of batch_size
23 | 	int batch_size = THCudaTensor_size(state, features, 0);
24 | 
25 | 	int data_height = THCudaTensor_size(state, features, 2);
26 | 	int data_width = THCudaTensor_size(state, features, 3);
27 | 	int num_channels = THCudaTensor_size(state, features, 1);
28 | 
29 | 	cudaStream_t stream = THCState_getCurrentStream(state);
30 | 
31 | 	// call the gpu kernel for psroi_pooling
32 | 	PSROIPoolForwardLauncher(data_in, spatial_scale, num_rois, data_height, data_width, num_channels, pooled_height, pooled_width,rois_in, group_size, 
33 | 	output_dim, output_out, mappingchannel_out,stream);
34 | 	return 1;
35 | }
36 | 
37 | 
38 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim, 
39 | THCudaTensor* top_grad, THCudaTensor* rois, THCudaTensor* bottom_grad, THCudaIntTensor* mappingchannel)
40 | {
41 |     	float *top_grad_flat = THCudaTensor_data(state, top_grad);
42 | 	float *rois_flat = THCudaTensor_data(state, rois);
43 | 
44 | 	float *bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
45 |     	int *mappingchannel_flat = THCudaIntTensor_data(state, mappingchannel);
46 | 
47 |     	// Number of ROIs
48 |     	int num_rois = THCudaTensor_size(state, rois, 0);
49 |     	int size_rois = THCudaTensor_size(state, rois, 1);
50 |     	if (size_rois != 5)
51 |     	{
52 |         	return -1;
53 |     	}
54 |     	// batch size
55 |     	int batch_size = THCudaTensor_size(state, bottom_grad, 0);
56 | 
57 |     	// data height
58 |     	int data_height = THCudaTensor_size(state, bottom_grad, 2);
59 |     	// data width
60 |     	int data_width = THCudaTensor_size(state, bottom_grad, 3);
61 |     	// Number of channels
62 |     	int num_channels = THCudaTensor_size(state, bottom_grad, 1);
63 | 
64 |     	cudaStream_t stream = THCState_getCurrentStream(state);
65 | 
66 |     	PSROIPoolBackwardLauncher(top_grad_flat, mappingchannel_flat, batch_size, num_rois, spatial_scale, num_channels, data_height, data_width, pooled_width,	      pooled_height, output_dim, bottom_grad_flat, rois_flat, stream);
67 |         return 1;
68 | }
69 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/src/psroi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int psroi_pooling_forward_cuda( int pooled_height, int pooled_width, float spatial_scale,int group_size, int output_dim,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * mappingchannel);
3 | 
4 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * mappingchannel);
6 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/reid/__init__.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | from distutils.version import LooseVersion
 4 | import torch
 5 | from torch.autograd import Variable
 6 | 
 7 | from utils import bbox as bbox_utils
 8 | from utils.log import logger
 9 | from ReidModels import net_utils
10 | from ReidModels.reid.image_part_aligned import Model
11 | 
12 | 
13 | def load_reid_model():
14 |     model = Model(n_parts=8)
15 |     model.inp_size = (80, 160)
16 |     ckpt = 'data/googlenet_part8_all_xavier_ckpt_56.h5'
17 | 
18 |     net_utils.load_net(ckpt, model)
19 |     logger.info('Load ReID model from {}'.format(ckpt))
20 | 
21 |     model = model.cuda()
22 |     model.eval()
23 |     return model
24 | 
25 | 
26 | def im_preprocess(image):
27 |     image = np.asarray(image, np.float32)
28 |     image -= np.array([104, 117, 123], dtype=np.float32).reshape(1, 1, -1)
29 |     image = image.transpose((2, 0, 1))
30 |     return image
31 | 
32 | 
33 | def extract_image_patches(image, bboxes):
34 |     bboxes = np.round(bboxes).astype(np.int)
35 |     bboxes = bbox_utils.clip_boxes(bboxes, image.shape)
36 |     patches = [image[box[1]:box[3], box[0]:box[2]] for box in bboxes]
37 |     return patches
38 | 
39 | 
40 | def extract_reid_features(reid_model, image, tlbrs):
41 |     if len(tlbrs) == 0:
42 |         return torch.FloatTensor()
43 | 
44 |     patches = extract_image_patches(image, tlbrs)
45 |     patches = np.asarray([im_preprocess(cv2.resize(p, reid_model.inp_size)) for p in patches], dtype=np.float32)
46 | 
47 |     gpu = net_utils.get_device(reid_model)
48 |     with torch.no_grad():
49 |         _img = torch.from_numpy(patches)
50 |         if gpu:
51 |             _img = _img.cuda()
52 |         features,id = reid_model(_img).detach()
53 |     return features
54 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/reid/image_part_aligned.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from models.backbone.googlenet import GoogLeNet
 6 | 
 7 | 
 8 | class Model(nn.Module):
 9 |     def __init__(self, n_parts=8,n_ID=300):
10 |         super(Model, self).__init__()
11 |         self.n_parts = n_parts
12 |         self.nID = n_ID
13 | 
14 |         self.feat_conv = GoogLeNet()
15 |         self.conv_input_feat = nn.Conv2d(self.feat_conv.output_channels, 512, 1)
16 |         # part net
17 |         self.conv_att = nn.Conv2d(512, self.n_parts, 1)
18 | 
19 |         for i in range(self.n_parts):
20 |             setattr(self, 'linear_feature{}'.format(i+1), nn.Linear(512, 64))
21 |         self.id_classifer = nn.Linear(512,self.nID)
22 |     def forward(self, x):
23 |         feature = self.feat_conv(x)
24 |         feature = self.conv_input_feat(feature)
25 | 
26 |         att_weights = torch.sigmoid(self.conv_att(feature))
27 | 
28 |         linear_feautres = []
29 |         for i in range(self.n_parts):
30 |             masked_feature = feature * torch.unsqueeze(att_weights[:, i], 1)
31 |             pooled_feature = F.avg_pool2d(masked_feature, masked_feature.size()[2:4])
32 |             linear_feautres.append(
33 |                 getattr(self, 'linear_feature{}'.format(i+1))(pooled_feature.view(pooled_feature.size(0), -1))
34 |             )
35 | 
36 |         concat_features = torch.cat(linear_feautres, 1)
37 |         normed_feature = concat_features / torch.clamp(torch.norm(concat_features, 2, 1, keepdim=True), min=1e-6)
38 |         out = self.id_classifer(normed_feature)
39 |         return normed_feature,out
40 | 


--------------------------------------------------------------------------------
/trackers/__init__.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | def track(tracker,args,orig_img,inps,boxes,hm,cropped_boxes,im_name,scores):
 4 |     hm = hm.cpu().data.numpy()
 5 |     online_targets = tracker.update(orig_img,inps,boxes,hm,cropped_boxes,im_name,scores,_debug=False)
 6 |     new_boxes,new_scores,new_ids,new_hm,new_crop = [],[],[],[],[]
 7 |     for t in online_targets:
 8 |         tlbr = t.tlbr
 9 |         tid = t.track_id
10 |         thm = t.pose
11 |         tcrop = t.crop_box
12 |         tscore = t.detscore
13 |         new_boxes.append(tlbr)
14 |         new_crop.append(tcrop)
15 |         new_hm.append(thm)
16 |         new_ids.append(tid)
17 |         new_scores.append(tscore)
18 | 
19 |     new_hm = torch.Tensor(new_hm).to(args.device)
20 |     return new_boxes,new_scores,new_ids,new_hm,new_crop
21 | 


--------------------------------------------------------------------------------
/trackers/tracker_cfg.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | cfg = edict()
 3 | cfg.nid = 1000
 4 | cfg.arch = "osnet_ain" # "osnet" or "res50-fc512"
 5 | cfg.loadmodel = "trackers/weights/osnet_ain_x1_0_msmt17_256x128_amsgrad_ep50_lr0.0015_coslr_b64_fb10_softmax_labsmth_flip_jitter.pth"
 6 | cfg.frame_rate =  30
 7 | cfg.track_buffer = 240 
 8 | cfg.conf_thres = 0.5
 9 | cfg.nms_thres = 0.4
10 | cfg.iou_thres = 0.5
11 | 


--------------------------------------------------------------------------------
/trackers/tracking/README.md:
--------------------------------------------------------------------------------
1 | ## Introduction
2 | Track Association part adapted from [Towards-Realtime-MOT](https://github.com/Zhongdao/Towards-Realtime-MOT), many thanks to their wonderful work!
3 | 


--------------------------------------------------------------------------------
/trackers/tracking/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/tracking/__init__.py


--------------------------------------------------------------------------------
/trackers/tracking/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 
54 | 


--------------------------------------------------------------------------------
/trackers/tracking/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MVIG-SJTU/AlphaPose/c60106d19afb443e964df6f06ed1842962f5f1f7/trackers/tracking/utils/__init__.py


--------------------------------------------------------------------------------
/trackers/tracking/utils/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from utils import _C
4 | 
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 | 


--------------------------------------------------------------------------------
/trackers/tracking/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | def parse_model_cfg(path):
 2 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 3 |     file = open(path, 'r')
 4 |     lines = file.read().split('\n')
 5 |     lines = [x for x in lines if x and not x.startswith('#')]
 6 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
 7 |     module_defs = []
 8 |     for line in lines:
 9 |         if line.startswith('['):  # This marks the start of a new block
10 |             module_defs.append({})
11 |             module_defs[-1]['type'] = line[1:-1].rstrip()
12 |             if module_defs[-1]['type'] == 'convolutional':
13 |                 module_defs[-1]['batch_normalize'] = 0
14 |         else:
15 |             key, value = line.split("=")
16 |             value = value.strip()
17 |             if value[0] == '$':
18 |                 value = module_defs[0].get(value.strip('$'), None)
19 |             module_defs[-1][key.rstrip()] = value.strip()
20 | 
21 |     return module_defs
22 | 
23 | 
24 | def parse_data_cfg(path):
25 |     """Parses the data configuration file"""
26 |     options = dict()
27 |     options['gpus'] = '0'
28 |     options['num_workers'] = '10'
29 |     with open(path, 'r') as fp:
30 |         lines = fp.readlines()
31 |     for line in lines:
32 |         line = line.strip()
33 |         if line == '' or line.startswith('#'):
34 |             continue
35 |         key, value = line.split('=')
36 |         options[key.strip()] = value.strip()
37 |     return options
38 | 


--------------------------------------------------------------------------------
/trackers/tracking/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |         self.duration = 0.
21 | 
22 |     def tic(self):
23 |         # using time.time instead of time.clock because time time.clock
24 |         # does not normalize for multithreading
25 |         self.start_time = time.time()
26 | 
27 |     def toc(self, average=True):
28 |         self.diff = time.time() - self.start_time
29 |         self.total_time += self.diff
30 |         self.calls += 1
31 |         self.average_time = self.total_time / self.calls
32 |         if average:
33 |             self.duration = self.average_time
34 |         else:
35 |             self.duration = self.diff
36 |         return self.duration
37 | 
38 |     def clear(self):
39 |         self.total_time = 0.
40 |         self.calls = 0
41 |         self.start_time = 0.
42 |         self.diff = 0.
43 |         self.average_time = 0.
44 |         self.duration = 0.
45 | 
46 | 


--------------------------------------------------------------------------------
/trackers/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name='root'):
 5 |     formatter = logging.Formatter(
 6 |         # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
 7 |         fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
 8 | 
 9 |     handler = logging.StreamHandler()
10 |     handler.setFormatter(formatter)
11 | 
12 |     logger = logging.getLogger(name)
13 |     logger.setLevel(logging.DEBUG)
14 |     logger.addHandler(handler)
15 |     return logger
16 | 
17 | 
18 | logger = get_logger('root')
19 | 


--------------------------------------------------------------------------------
/trackers/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | def parse_model_cfg(path):
 2 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 3 |     file = open(path, 'r')
 4 |     lines = file.read().split('\n')
 5 |     lines = [x for x in lines if x and not x.startswith('#')]
 6 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
 7 |     module_defs = []
 8 |     for line in lines:
 9 |         if line.startswith('['):  # This marks the start of a new block
10 |             module_defs.append({})
11 |             module_defs[-1]['type'] = line[1:-1].rstrip()
12 |             if module_defs[-1]['type'] == 'convolutional':
13 |                 module_defs[-1]['batch_normalize'] = 0
14 |         else:
15 |             key, value = line.split("=")
16 |             value = value.strip()
17 |             if value[0] == '$':
18 |                 value = module_defs[0].get(value.strip('$'), None)
19 |             module_defs[-1][key.rstrip()] = value
20 | 
21 |     return module_defs
22 | 
23 | 
24 | def parse_data_cfg(path):
25 |     """Parses the data configuration file"""
26 |     options = dict()
27 |     options['gpus'] = '0'
28 |     options['num_workers'] = '10'
29 |     with open(path, 'r') as fp:
30 |         lines = fp.readlines()
31 |     for line in lines:
32 |         line = line.strip()
33 |         if line == '' or line.startswith('#'):
34 |             continue
35 |         key, value = line.split('=')
36 |         options[key.strip()] = value.strip()
37 |     return options
38 | 


--------------------------------------------------------------------------------
/trackers/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |         self.duration = 0.
21 | 
22 |     def tic(self):
23 |         # using time.time instead of time.clock because time time.clock
24 |         # does not normalize for multithreading
25 |         self.start_time = time.time()
26 | 
27 |     def toc(self, average=True):
28 |         self.diff = time.time() - self.start_time
29 |         self.total_time += self.diff
30 |         self.calls += 1
31 |         self.average_time = self.total_time / self.calls
32 |         if average:
33 |             self.duration = self.average_time
34 |         else:
35 |             self.duration = self.diff
36 |         return self.duration
37 | 
38 |     def clear(self):
39 |         self.total_time = 0.
40 |         self.calls = 0
41 |         self.start_time = 0.
42 |         self.diff = 0.
43 |         self.average_time = 0.
44 |         self.duration = 0.
45 | 
46 | 


--------------------------------------------------------------------------------