├── .gitignore
├── README.md
├── alphapose
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── coco_det.py
    │   ├── coco_wholebody.py
    │   ├── coco_wholebody_det.py
    │   ├── concat_dataset.py
    │   ├── custom.py
    │   ├── halpe_136.py
    │   ├── halpe_136_det.py
    │   ├── halpe_26.py
    │   ├── halpe_26_det.py
    │   ├── mpii.py
    │   └── mscoco.py
    ├── models
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── criterion.py
    │   ├── fastpose.py
    │   ├── fastpose_duc.py
    │   ├── fastpose_duc_dense.py
    │   ├── hardnet.py
    │   ├── hrnet.py
    │   ├── layers
    │   │   ├── DUC.py
    │   │   ├── PixelUnshuffle.py
    │   │   ├── Resnet.py
    │   │   ├── SE_Resnet.py
    │   │   ├── SE_module.py
    │   │   ├── ShuffleResnet.py
    │   │   └── dcn
    │   │   │   ├── DCN.py
    │   │   │   ├── __init__.py
    │   │   │   ├── deform_conv.py
    │   │   │   ├── deform_pool.py
    │   │   │   └── src
    │   │   │       ├── deform_conv_cuda.cpp
    │   │   │       ├── deform_conv_cuda_kernel.cu
    │   │   │       ├── deform_pool_cuda.cpp
    │   │   │       └── deform_pool_cuda_kernel.cu
    │   └── simplepose.py
    ├── opt.py
    ├── utils
    │   ├── __init__.py
    │   ├── bbox.py
    │   ├── config.py
    │   ├── detector.py
    │   ├── env.py
    │   ├── file_detector.py
    │   ├── logger.py
    │   ├── metrics.py
    │   ├── pPose_nms.py
    │   ├── presets
    │   │   ├── __init__.py
    │   │   └── simple_transform.py
    │   ├── registry.py
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── roi_align.py
    │   │   └── src
    │   │   │   ├── roi_align_cuda.cpp
    │   │   │   └── roi_align_kernel.cu
    │   ├── transforms.py
    │   ├── vis.py
    │   ├── webcam_detector.py
    │   └── writer.py
    └── version.py
├── configs
    ├── coco
    │   ├── hardnet
    │   │   ├── 256x192_hard68_lr1e-3_1x.yaml
    │   │   └── 256x192_hard85_lr1e-3_1x.yaml
    │   ├── hrnet
    │   │   └── 256x192_w32_lr1e-3.yaml
    │   └── resnet
    │   │   ├── 256x192_res152_lr1e-3_1x-duc.yaml
    │   │   ├── 256x192_res50_lr1e-3_1x-concat.yaml
    │   │   ├── 256x192_res50_lr1e-3_1x-duc.yaml
    │   │   ├── 256x192_res50_lr1e-3_1x-simple.yaml
    │   │   ├── 256x192_res50_lr1e-3_1x.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-dcn.yaml
    │   │   ├── 256x192_res50_lr1e-3_2x-regression.yaml
    │   │   └── 256x192_res50_lr1e-3_2x.yaml
    ├── dense_coco
    │   └── resnet50
    │   │   └── 256x192_adam_lr1e-3-duc-dcn_1x_crop.yaml
    ├── halpe_136
    │   ├── hardnet
    │   │   └── 256x192_hard68_lr1e-3_1x.yaml
    │   └── resnet
    │   │   ├── 256x192_res50_lr1e-3_1x.yaml
    │   │   └── 256x192_res50_lr1e-3_2x-regression.yaml
    └── halpe_26
    │   └── resnet
    │       └── 256x192_res50_lr1e-3_1x.yaml
├── detector
    ├── apis.py
    ├── effdet_api.py
    ├── effdet_cfg.py
    ├── efficientdet
    │   ├── README.md
    │   ├── effdet
    │   │   ├── __init__.py
    │   │   ├── anchors.py
    │   │   ├── bench.py
    │   │   ├── config
    │   │   │   └── config.py
    │   │   ├── efficientdet.py
    │   │   ├── helpers.py
    │   │   └── object_detection
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── argmax_matcher.py
    │   │   │   ├── box_coder.py
    │   │   │   ├── box_list.py
    │   │   │   ├── faster_rcnn_box_coder.py
    │   │   │   ├── matcher.py
    │   │   │   ├── region_similarity_calculator.py
    │   │   │   └── target_assigner.py
    │   ├── utils.py
    │   └── weights
    │   │   └── get_models.sh
    ├── nms
    │   ├── __init__.py
    │   ├── nms_wrapper.py
    │   └── src
    │   │   ├── nms_cpu.cpp
    │   │   ├── nms_cuda.cpp
    │   │   ├── nms_kernel.cu
    │   │   ├── soft_nms_cpu.cpp
    │   │   └── soft_nms_cpu.pyx
    ├── tracker
    │   ├── README.md
    │   ├── __init__.py
    │   ├── cfg
    │   │   ├── ccmcpe.json
    │   │   └── yolov3.cfg
    │   ├── models.py
    │   ├── preprocess.py
    │   ├── tracker
    │   │   ├── __init__.py
    │   │   ├── basetrack.py
    │   │   ├── matching.py
    │   │   └── multitracker.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── datasets.py
    │   │   ├── evaluation.py
    │   │   ├── io.py
    │   │   ├── kalman_filter.py
    │   │   ├── log.py
    │   │   ├── nms.py
    │   │   ├── parse_config.py
    │   │   ├── timer.py
    │   │   ├── utils.py
    │   │   └── visualization.py
    ├── tracker_api.py
    ├── tracker_cfg.py
    ├── yolo
    │   ├── README.md
    │   ├── __init__.py
    │   ├── bbox.py
    │   ├── cam_demo.py
    │   ├── cfg
    │   │   ├── tiny-yolo-voc.cfg
    │   │   ├── yolo-voc.cfg
    │   │   ├── yolo.cfg
    │   │   ├── yolov3-spp.cfg
    │   │   └── yolov3.cfg
    │   ├── darknet.py
    │   ├── detect.py
    │   ├── pallete
    │   ├── preprocess.py
    │   ├── util.py
    │   ├── video_demo.py
    │   └── video_demo_half.py
    ├── yolo_api.py
    └── yolo_cfg.py
├── examples
    ├── demo
    │   ├── Copy of climbing_106.jpg
    │   ├── Copy of climbing_269.jpg
    │   ├── Copy of climbing_62.jpg
    │   ├── Copy of standing_147.jpg
    │   ├── Copy of standing_153.jpg
    │   └── Copy of standing_29.jpg
    ├── list-coco-demo.txt
    ├── list-coco-minival500.txt
    ├── list-coco-val5000.txt
    └── res
    │   ├── alphapose-results.json
    │   ├── final_xgboost_home_security_scaler_model.pickle
    │   └── final_xgboost_home_securuty_prediction_model_21jan_new_89_87.pickle
├── json_data_preprocessing_colab
    └── json_data_processing_for_Home_Security.ipynb
├── pretrained_models
    └── get_models.sh
├── scripts
    ├── demo_api.py
    ├── demo_inference.py
    ├── inference.sh
    ├── train.py
    ├── train.sh
    ├── validate.py
    └── validate.sh
├── setup.cfg
├── setup.py
└── trackers
    ├── PoseFlow
        ├── README.md
        ├── alpha-pose-results-sample.json
        ├── matching.py
        ├── parallel_process.py
        ├── poseflow_infer.py
        ├── posetrack1.gif
        ├── posetrack2.gif
        ├── posetrack_data
        ├── poseval
        ├── requirements.txt
        ├── tracker-baseline.py
        ├── tracker-general.py
        └── utils.py
    ├── README.md
    ├── ReidModels
        ├── ResBnLin.py
        ├── ResNet.py
        ├── __init__.py
        ├── backbone
        │   ├── __init__.py
        │   ├── googlenet.py
        │   ├── lrn.py
        │   └── sqeezenet.py
        ├── bn_linear.py
        ├── classification
        │   ├── __init__.py
        │   ├── classifier.py
        │   └── rfcn_cls.py
        ├── net_utils.py
        ├── osnet.py
        ├── osnet_ain.py
        ├── psroi_pooling
        │   ├── __init__.py
        │   ├── _ext
        │   │   ├── __init__.py
        │   │   └── psroi_pooling
        │   │   │   └── __init__.py
        │   ├── build.py
        │   ├── functions
        │   │   ├── __init__.py
        │   │   └── psroi_pooling.py
        │   ├── make.sh
        │   ├── modules
        │   │   ├── __init__.py
        │   │   └── psroi_pool.py
        │   └── src
        │   │   ├── cuda
        │   │       ├── psroi_pooling_kernel.cu
        │   │       └── psroi_pooling_kernel.h
        │   │   ├── psroi_pooling_cuda.c
        │   │   └── psroi_pooling_cuda.h
        ├── reid
        │   ├── __init__.py
        │   └── image_part_aligned.py
        └── resnet_fc.py
    ├── __init__.py
    ├── tracker_api.py
    ├── tracker_cfg.py
    ├── tracking
        ├── README.md
        ├── __init__.py
        ├── basetrack.py
        ├── matching.py
        └── utils
        │   ├── __init__.py
        │   ├── io.py
        │   ├── kalman_filter.py
        │   ├── nms.py
        │   ├── parse_config.py
        │   ├── timer.py
        │   └── utils.py
    └── utils
        ├── basetransforms.py
        ├── bbox.py
        ├── io.py
        ├── kalman_filter.py
        ├── log.py
        ├── parse_config.py
        ├── timer.py
        ├── transform.py
        └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | local_settings.py
 56 | db.sqlite3
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # Environments
 84 | .env
 85 | .venv
 86 | env/
 87 | venv/
 88 | ENV/
 89 | env.bak/
 90 | venv.bak/
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | .spyproject
 95 | 
 96 | # Rope project settings
 97 | .ropeproject
 98 | 
 99 | # mkdocs documentation
100 | /site
101 | 
102 | # mypy
103 | .mypy_cache/
104 | .vscode
105 | .tensorboard
106 | exp/coco*
107 | *.pth
108 | *.weights
109 | exp/json/test_kpt.json
110 | exp/json/test_gt_kpt.json
111 | exp/json/validate_rcnn_kpt.json
112 | exp/json/validate_gt_kpt.json
113 | data/
114 | tmp/
115 | exp/json
116 | tmp_*/
117 | example/res/


--------------------------------------------------------------------------------
/alphapose/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 | 
3 | __all__ = ['__version__', 'short_version']
4 | 


--------------------------------------------------------------------------------
/alphapose/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_det import Mscoco_det
 2 | from .concat_dataset import ConcatDataset
 3 | from .custom import CustomDataset
 4 | from .mscoco import Mscoco
 5 | from .mpii import Mpii
 6 | from .halpe_26 import Halpe_26
 7 | from .halpe_136 import Halpe_136
 8 | from .halpe_136_det import  Halpe_136_det
 9 | from .halpe_26_det import  Halpe_26_det
10 | __all__ = ['CustomDataset', 'Halpe_136', 'Halpe_26_det', 'Halpe_136_det', 'Halpe_26', 'Mscoco', 'Mscoco_det', 'Mpii', 'ConcatDataset', 'coco_wholebody', 'coco_wholebody_det']
11 | 


--------------------------------------------------------------------------------
/alphapose/datasets/coco_det.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------
  2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
  3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
  4 | # -----------------------------------------------------
  5 | 
  6 | """MS COCO Human Detection Box dataset."""
  7 | import json
  8 | import os
  9 | 
 10 | import cv2
 11 | import torch
 12 | import torch.utils.data as data
 13 | from tqdm import tqdm
 14 | 
 15 | from alphapose.utils.presets import SimpleTransform
 16 | from detector.apis import get_detector
 17 | from alphapose.models.builder import DATASET
 18 | 
 19 | 
 20 | @DATASET.register_module
 21 | class Mscoco_det(data.Dataset):
 22 |     """ COCO human detection box dataset.
 23 | 
 24 |     """
 25 |     EVAL_JOINTS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
 26 | 
 27 |     def __init__(self,
 28 |                  det_file=None,
 29 |                  opt=None,
 30 |                  **cfg):
 31 | 
 32 |         self._cfg = cfg
 33 |         self._opt = opt
 34 |         self._preset_cfg = cfg['PRESET']
 35 |         self._root = cfg['ROOT']
 36 |         self._img_prefix = cfg['IMG_PREFIX']
 37 |         if not det_file:
 38 |             det_file = cfg['DET_FILE']
 39 |         self._ann_file = os.path.join(self._root, cfg['ANN'])
 40 | 
 41 |         if os.path.exists(det_file):
 42 |             print("Detection results exist, will use it")
 43 |         else:
 44 |             print("Will create detection results to {}".format(det_file))
 45 |             self.write_coco_json(det_file)
 46 | 
 47 |         assert os.path.exists(det_file), "Error: no detection results found"
 48 |         with open(det_file, 'r') as fid:
 49 |             self._det_json = json.load(fid)
 50 | 
 51 |         self._input_size = self._preset_cfg['IMAGE_SIZE']
 52 |         self._output_size = self._preset_cfg['HEATMAP_SIZE']
 53 | 
 54 |         self._sigma = self._preset_cfg['SIGMA']
 55 | 
 56 |         if self._preset_cfg['TYPE'] == 'simple':
 57 |             self.transformation = SimpleTransform(
 58 |                 self, scale_factor=0,
 59 |                 input_size=self._input_size,
 60 |                 output_size=self._output_size,
 61 |                 rot=0, sigma=self._sigma,
 62 |                 train=False, add_dpg=False)
 63 | 
 64 |     def __getitem__(self, index):
 65 |         det_res = self._det_json[index]
 66 |         if not isinstance(det_res['image_id'], int):
 67 |             img_id, _ = os.path.splitext(os.path.basename(det_res['image_id']))
 68 |             img_id = int(img_id)
 69 |         else:
 70 |             img_id = det_res['image_id']
 71 |         img_path = './data/coco/val2017/%012d.jpg' % img_id
 72 | 
 73 |         # Load image
 74 |         image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) #scipy.misc.imread(img_path, mode='RGB')
 75 | 
 76 |         imght, imgwidth = image.shape[1], image.shape[2]
 77 |         x1, y1, w, h = det_res['bbox']
 78 |         bbox = [x1, y1, x1 + w, y1 + h]
 79 |         inp, bbox = self.transformation.test_transform(image, bbox)
 80 |         return inp, torch.Tensor(bbox), torch.Tensor([det_res['bbox']]), torch.Tensor([det_res['image_id']]), torch.Tensor([det_res['score']]), torch.Tensor([imght]), torch.Tensor([imgwidth])
 81 | 
 82 |     def __len__(self):
 83 |         return len(self._det_json)
 84 | 
 85 |     def write_coco_json(self, det_file):
 86 |         from pycocotools.coco import COCO
 87 |         import pathlib
 88 | 
 89 |         _coco = COCO(self._ann_file)
 90 |         image_ids = sorted(_coco.getImgIds())
 91 |         det_model = get_detector(self._opt)
 92 |         dets = []
 93 |         for entry in tqdm(_coco.loadImgs(image_ids)):
 94 |             abs_path = os.path.join(
 95 |                 self._root, self._img_prefix, entry['file_name'])
 96 |             det = det_model.detect_one_img(abs_path)
 97 |             if det:
 98 |                 dets += det
 99 |         pathlib.Path(os.path.split(det_file)[0]).mkdir(parents=True, exist_ok=True)
100 |         json.dump(dets, open(det_file, 'w'))
101 | 
102 |     @property
103 |     def joint_pairs(self):
104 |         """Joint pairs which defines the pairs of joint to be swapped
105 |         when the image is flipped horizontally."""
106 |         return [[1, 2], [3, 4], [5, 6], [7, 8],
107 |                 [9, 10], [11, 12], [13, 14], [15, 16]]
108 | 


--------------------------------------------------------------------------------
/alphapose/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import bisect
 7 | 
 8 | import torch
 9 | import torch.utils.data as data
10 | 
11 | from alphapose.models.builder import DATASET, build_dataset
12 | 
13 | 
14 | @DATASET.register_module
15 | class ConcatDataset(data.Dataset):
16 |     """Custom Concat dataset.
17 |     Annotation file must be in `coco` format.
18 | 
19 |     Parameters
20 |     ----------
21 |     train: bool, default is True
22 |         If true, will set as training mode.
23 |     dpg: bool, default is False
24 |         If true, will activate `dpg` for data augmentation.
25 |     skip_empty: bool, default is False
26 |         Whether skip entire image if no valid label is found.
27 |     cfg: dict, dataset configuration.
28 |     """
29 | 
30 |     def __init__(self,
31 |                  train=True,
32 |                  dpg=False,
33 |                  skip_empty=True,
34 |                  **cfg):
35 | 
36 |         self._cfg = cfg
37 |         self._subset_cfg_list = cfg['SET_LIST']
38 |         self._preset_cfg = cfg['PRESET']
39 |         self._mask_id = [item['MASK_ID'] for item in self._subset_cfg_list]
40 | 
41 |         self.num_joints = self._preset_cfg['NUM_JOINTS']
42 | 
43 |         self._subsets = []
44 |         self._subset_size = [0]
45 |         for _subset_cfg in self._subset_cfg_list:
46 |             subset = build_dataset(_subset_cfg, preset_cfg=self._preset_cfg, train=train)
47 |             self._subsets.append(subset)
48 |             self._subset_size.append(len(subset))
49 |         self.cumulative_sizes = self.cumsum(self._subset_size)
50 | 
51 |     def __getitem__(self, idx):
52 |         assert idx >= 0
53 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
54 |         dataset_idx -= 1
55 |         sample_idx = idx - self.cumulative_sizes[dataset_idx]
56 | 
57 |         sample = self._subsets[dataset_idx][sample_idx]
58 |         img, label, label_mask, img_id, bbox = sample
59 | 
60 |         K = label.shape[0]  # num_joints from `_subsets[dataset_idx]`
61 |         expend_label = torch.zeros((self.num_joints, *label.shape[1:]), dtype=label.dtype)
62 |         expend_label_mask = torch.zeros((self.num_joints, *label_mask.shape[1:]), dtype=label_mask.dtype)
63 |         expend_label[self._mask_id[dataset_idx]:self._mask_id[dataset_idx] + K] = label
64 |         expend_label_mask[self._mask_id[dataset_idx]:self._mask_id[dataset_idx] + K] = label_mask
65 | 
66 |         return img, expend_label, expend_label_mask, img_id, bbox
67 | 
68 |     def __len__(self):
69 |         return self.cumulative_sizes[-1]
70 | 
71 |     @staticmethod
72 |     def cumsum(sequence):
73 |         r, s = [], 0
74 |         for e in sequence:
75 |             r.append(e + s)
76 |             s += e
77 |         return r
78 | 


--------------------------------------------------------------------------------
/alphapose/datasets/halpe_26_det.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------
  2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
  3 | # Written by HaoyiZhu
  4 | # -----------------------------------------------------
  5 | 
  6 | """Haple_26 Human Detection Box dataset."""
  7 | import json
  8 | import os
  9 | 
 10 | import cv2
 11 | import torch
 12 | import torch.utils.data as data
 13 | from tqdm import tqdm
 14 | 
 15 | from alphapose.utils.presets import SimpleTransform
 16 | from detector.apis import get_detector
 17 | from alphapose.models.builder import DATASET
 18 | 
 19 | 
 20 | @DATASET.register_module
 21 | class Halpe_26_det(data.Dataset):
 22 |     """ Halpe_26 human detection box dataset.
 23 | 
 24 |     """
 25 |     EVAL_JOINTS = list(range(26))
 26 | 
 27 |     def __init__(self,
 28 |                  det_file=None,
 29 |                  opt=None,
 30 |                  **cfg):
 31 | 
 32 |         self._cfg = cfg
 33 |         self._opt = opt
 34 |         self._preset_cfg = cfg['PRESET']
 35 |         self._root = cfg['ROOT']
 36 |         self._img_prefix = cfg['IMG_PREFIX']
 37 |         if not det_file:
 38 |             det_file = cfg['DET_FILE']
 39 |         self._ann_file = os.path.join(self._root, cfg['ANN'])
 40 | 
 41 |         if os.path.exists(det_file):
 42 |             print("Detection results exist, will use it")
 43 |         else:
 44 |             print("Will create detection results to {}".format(det_file))
 45 |             self.write_coco_json(det_file)
 46 | 
 47 |         assert os.path.exists(det_file), "Error: no detection results found"
 48 |         with open(det_file, 'r') as fid:
 49 |             self._det_json = json.load(fid)
 50 | 
 51 |         self._input_size = self._preset_cfg['IMAGE_SIZE']
 52 |         self._output_size = self._preset_cfg['HEATMAP_SIZE']
 53 | 
 54 |         self._sigma = self._preset_cfg['SIGMA']
 55 | 
 56 |         if self._preset_cfg['TYPE'] == 'simple':
 57 |             self.transformation = SimpleTransform(
 58 |                 self, scale_factor=0,
 59 |                 input_size=self._input_size,
 60 |                 output_size=self._output_size,
 61 |                 rot=0, sigma=self._sigma,
 62 |                 train=False, add_dpg=False)
 63 | 
 64 |     def __getitem__(self, index):
 65 |         det_res = self._det_json[index]
 66 |         if not isinstance(det_res['image_id'], int):
 67 |             img_id, _ = os.path.splitext(os.path.basename(det_res['image_id']))
 68 |             img_id = int(img_id)
 69 |         else:
 70 |             img_id = det_res['image_id']
 71 |         img_path = '/DATA1/Benchmark/coco/val2017/%012d.jpg' % img_id
 72 | 
 73 |         # Load image
 74 |         image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) #scipy.misc.imread(img_path, mode='RGB')
 75 | 
 76 |         imght, imgwidth = image.shape[1], image.shape[2]
 77 |         x1, y1, w, h = det_res['bbox']
 78 |         bbox = [x1, y1, x1 + w, y1 + h]
 79 |         inp, bbox = self.transformation.test_transform(image, bbox)
 80 |         return inp, torch.Tensor(bbox), torch.Tensor([det_res['bbox']]), torch.Tensor([det_res['image_id']]), torch.Tensor([det_res['score']]), torch.Tensor([imght]), torch.Tensor([imgwidth])
 81 | 
 82 |     def __len__(self):
 83 |         return len(self._det_json)
 84 | 
 85 |     def write_coco_json(self, det_file):
 86 |         from pycocotools.coco import COCO
 87 |         import pathlib
 88 | 
 89 |         _coco = COCO(self._ann_file)
 90 |         image_ids = sorted(_coco.getImgIds())
 91 |         det_model = get_detector(self._opt)
 92 |         dets = []
 93 |         for entry in tqdm(_coco.loadImgs(image_ids)):
 94 |             abs_path = os.path.join(
 95 |                 '/DATA1/Benchmark/coco', self._img_prefix, entry['file_name'])
 96 |             det = det_model.detect_one_img(abs_path)
 97 |             if det:
 98 |                 dets += det
 99 |         pathlib.Path(os.path.split(det_file)[0]).mkdir(parents=True, exist_ok=True)
100 |         json.dump(dets, open(det_file, 'w'))
101 | 
102 |     @property
103 |     def joint_pairs(self):
104 |         """Joint pairs which defines the pairs of joint to be swapped
105 |         when the image is flipped horizontally."""
106 |         return[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], 
107 |         [20, 21], [22, 23], [24, 25]]
108 | 


--------------------------------------------------------------------------------
/alphapose/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .fastpose import FastPose
 2 | from .fastpose_duc import FastPose_DUC
 3 | from .hrnet import PoseHighResolutionNet
 4 | from .simplepose import SimplePose
 5 | from .fastpose_duc_dense import FastPose_DUC_Dense
 6 | from .hardnet import HarDNetPose
 7 | from .criterion import L1JointRegression
 8 | 
 9 | __all__ = ['FastPose', 'SimplePose', 'PoseHighResolutionNet',
10 |            'FastPose_DUC', 'FastPose_DUC_Dense', 'HarDNetPose', 'L1JointRegression']
11 | 


--------------------------------------------------------------------------------
/alphapose/models/builder.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from alphapose.utils import Registry, build_from_cfg, retrieve_from_cfg
 4 | 
 5 | 
 6 | SPPE = Registry('sppe')
 7 | LOSS = Registry('loss')
 8 | DATASET = Registry('dataset')
 9 | 
10 | 
11 | def build(cfg, registry, default_args=None):
12 |     if isinstance(cfg, list):
13 |         modules = [
14 |             build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
15 |         ]
16 |         return nn.Sequential(*modules)
17 |     else:
18 |         return build_from_cfg(cfg, registry, default_args)
19 | 
20 | 
21 | def build_sppe(cfg, preset_cfg, **kwargs):
22 |     default_args = {
23 |         'PRESET': preset_cfg,
24 |     }
25 |     for key, value in kwargs.items():
26 |         default_args[key] = value
27 |     return build(cfg, SPPE, default_args=default_args)
28 | 
29 | 
30 | def build_loss(cfg):
31 |     return build(cfg, LOSS)
32 | 
33 | 
34 | def build_dataset(cfg, preset_cfg, **kwargs):
35 |     exec(f'from ..datasets import {cfg.TYPE}')
36 |     default_args = {
37 |         'PRESET': preset_cfg,
38 |     }
39 |     for key, value in kwargs.items():
40 |         default_args[key] = value
41 |     return build(cfg, DATASET, default_args=default_args)
42 | 
43 | 
44 | def retrieve_dataset(cfg):
45 |     exec(f'from ..datasets import {cfg.TYPE}')
46 |     return retrieve_from_cfg(cfg, DATASET)
47 | 


--------------------------------------------------------------------------------
/alphapose/models/criterion.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from .builder import LOSS
 9 | 
10 | from alphapose.utils.transforms import _integral_tensor
11 | 
12 | 
13 | class IngetralCoordinate(torch.autograd.Function):
14 |     ''' Symmetry integral regression function.
15 |     '''
16 |     AMPLITUDE = 2
17 | 
18 |     @staticmethod
19 |     def forward(ctx, input):
20 |         assert isinstance(
21 |             input, torch.Tensor), 'IngetralCoordinate only takes input as torch.Tensor'
22 |         input_size = input.size()
23 |         weight = torch.arange(
24 |             input_size[-1], dtype=input.dtype, layout=input.layout, device=input.device)
25 |         ctx.input_size = input_size
26 |         output = input.mul(weight)
27 |         ctx.save_for_backward(weight, output)
28 | 
29 |         return output
30 | 
31 |     @staticmethod
32 |     def backward(ctx, grad_output):
33 |         weight, output = ctx.saved_tensors
34 |         output_coord = output.sum(dim=2, keepdim=True)
35 |         weight = weight[None, None, :].repeat(
36 |             output_coord.shape[0], output_coord.shape[1], 1)
37 |         weight_mask = torch.ones(weight.shape, dtype=grad_output.dtype,
38 |                                  layout=grad_output.layout, device=grad_output.device)
39 |         weight_mask[weight < output_coord] = -1
40 |         weight_mask[output_coord.repeat(
41 |             1, 1, weight.shape[-1]) > ctx.input_size[-1]] = 1
42 |         weight_mask *= IngetralCoordinate.AMPLITUDE
43 |         return grad_output.mul(weight_mask)
44 | 
45 | 
46 | @LOSS.register_module
47 | class L1JointRegression(nn.Module):
48 |     ''' L1 Joint Regression Loss
49 |     '''
50 |     def __init__(self, OUTPUT_3D=False, size_average=True, reduce=True, NORM_TYPE='softmax'):
51 |         super(L1JointRegression, self).__init__()
52 |         self.size_average = size_average
53 |         self.reduce = reduce
54 |         self.output_3d = OUTPUT_3D
55 |         self.norm_type = NORM_TYPE
56 | 
57 |         self.integral_operation = IngetralCoordinate.apply
58 | 
59 |     def forward(self, preds, *args):
60 |         gt_joints = args[0]
61 |         gt_joints_vis = args[1]
62 | 
63 |         if self.output_3d:
64 |             num_joints = int(gt_joints_vis.shape[1] / 3)
65 |         else:
66 |             num_joints = int(gt_joints_vis.shape[1] / 2)
67 |         hm_width = preds.shape[-1]
68 |         hm_height = preds.shape[-2]
69 |         hm_depth = preds.shape[-3] // num_joints if self.output_3d else 1
70 | 
71 |         pred_jts, pred_scores = _integral_tensor(
72 |             preds, num_joints, self.output_3d, hm_width, hm_height, hm_depth, integral_operation=self.integral_operation, norm_type=self.norm_type)
73 | 
74 |         _assert_no_grad(gt_joints)
75 |         _assert_no_grad(gt_joints_vis)
76 |         return weighted_l1_loss(pred_jts, pred_scores, gt_joints, gt_joints_vis, self.size_average)
77 | 
78 | 
79 | def _assert_no_grad(tensor):
80 |     assert not tensor.requires_grad, \
81 |         "nn criterions don't compute the gradient w.r.t. targets - please " \
82 |         "mark these tensors as not requiring gradients"
83 | 
84 | 
85 | def weighted_l1_loss(input, scores, target, weights, size_average):
86 |     out = torch.abs(input - target)
87 |     out = out * weights
88 |     #out_of_scores = torch.abs(scores - torch.ones_like(scores))
89 |     #out_of_scores = out_of_scores.reshape((out_of_scores.shape[0], -1))
90 |     #out_of_scores = out_of_scores * weights[:, 0::2]
91 |     if size_average:
92 |         return out.sum() / len(input)
93 |     else:
94 |         return out.sum()
95 | 
96 | 
97 | LOSS.register_module(torch.nn.MSELoss)
98 | 


--------------------------------------------------------------------------------
/alphapose/models/fastpose.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | from .builder import SPPE
 9 | from .layers.DUC import DUC
10 | from .layers.SE_Resnet import SEResnet
11 | 
12 | 
13 | @SPPE.register_module
14 | class FastPose(nn.Module):
15 | 
16 |     def __init__(self, norm_layer=nn.BatchNorm2d, **cfg):
17 |         super(FastPose, self).__init__()
18 |         self._preset_cfg = cfg['PRESET']
19 |         if 'CONV_DIM' in cfg.keys():
20 |             self.conv_dim = cfg['CONV_DIM']
21 |         else:
22 |             self.conv_dim = 128
23 |         if 'DCN' in cfg.keys():
24 |             stage_with_dcn = cfg['STAGE_WITH_DCN']
25 |             dcn = cfg['DCN']
26 |             self.preact = SEResnet(
27 |                 f"resnet{cfg['NUM_LAYERS']}", dcn=dcn, stage_with_dcn=stage_with_dcn)
28 |         else:
29 |             self.preact = SEResnet(f"resnet{cfg['NUM_LAYERS']}")
30 | 
31 |         # Imagenet pretrain model
32 |         import torchvision.models as tm   # noqa: F401,F403
33 |         assert cfg['NUM_LAYERS'] in [18, 34, 50, 101, 152]
34 |         x = eval(f"tm.resnet{cfg['NUM_LAYERS']}(pretrained=True)")
35 | 
36 |         model_state = self.preact.state_dict()
37 |         state = {k: v for k, v in x.state_dict().items()
38 |                  if k in self.preact.state_dict() and v.size() == self.preact.state_dict()[k].size()}
39 |         model_state.update(state)
40 |         self.preact.load_state_dict(model_state)
41 | 
42 |         self.suffle1 = nn.PixelShuffle(2)
43 |         self.duc1 = DUC(512, 1024, upscale_factor=2, norm_layer=norm_layer)
44 |         if self.conv_dim == 256:
45 |             self.duc2 = DUC(256, 1024, upscale_factor=2, norm_layer=norm_layer)
46 |         else:
47 |             self.duc2 = DUC(256, 512, upscale_factor=2, norm_layer=norm_layer)
48 |         self.conv_out = nn.Conv2d(
49 |             self.conv_dim, self._preset_cfg['NUM_JOINTS'], kernel_size=3, stride=1, padding=1)
50 | 
51 |     def forward(self, x):
52 |         out = self.preact(x)
53 |         out = self.suffle1(out)
54 |         out = self.duc1(out)
55 |         out = self.duc2(out)
56 | 
57 |         out = self.conv_out(out)
58 |         return out
59 | 
60 |     def _initialize(self):
61 |         for m in self.conv_out.modules():
62 |             if isinstance(m, nn.Conv2d):
63 |                 # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
64 |                 # logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
65 |                 # logger.info('=> init {}.bias as 0'.format(name))
66 |                 nn.init.normal_(m.weight, std=0.001)
67 |                 nn.init.constant_(m.bias, 0)
68 | 


--------------------------------------------------------------------------------
/alphapose/models/fastpose_duc.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | from .builder import SPPE
 9 | from .layers.Resnet import ResNet
10 | from .layers.SE_Resnet import SEResnet
11 | from .layers.ShuffleResnet import ShuffleResnet
12 | 
13 | 
14 | @SPPE.register_module
15 | class FastPose_DUC(nn.Module):
16 |     conv_dim = 256
17 | 
18 |     def __init__(self, norm_layer=nn.BatchNorm2d, **cfg):
19 |         super(FastPose_DUC, self).__init__()
20 |         self._preset_cfg = cfg['PRESET']
21 |         if cfg['BACKBONE'] == 'shuffle':
22 |             print('Load shuffle backbone...')
23 |             backbone = ShuffleResnet
24 |         elif cfg['BACKBONE'] == 'se-resnet':
25 |             print('Load SE Resnet...')
26 |             backbone = SEResnet
27 |         else:
28 |             print('Load Resnet...')
29 |             backbone = ResNet
30 | 
31 |         if 'DCN' in cfg.keys():
32 |             stage_with_dcn = cfg['STAGE_WITH_DCN']
33 |             dcn = cfg['DCN']
34 |             self.preact = backbone(
35 |                 f"resnet{cfg['NUM_LAYERS']}", dcn=dcn, stage_with_dcn=stage_with_dcn)
36 |         else:
37 |             self.preact = backbone(f"resnet{cfg['NUM_LAYERS']}")
38 | 
39 |         # Imagenet pretrain model
40 |         import torchvision.models as tm   # noqa: F401,F403
41 |         assert cfg['NUM_LAYERS'] in [18, 34, 50, 101, 152]
42 |         x = eval(f"tm.resnet{cfg['NUM_LAYERS']}(pretrained=True)")
43 | 
44 |         model_state = self.preact.state_dict()
45 |         state = {k: v for k, v in x.state_dict().items()
46 |                  if k in self.preact.state_dict() and v.size() == self.preact.state_dict()[k].size()}
47 |         model_state.update(state)
48 |         self.preact.load_state_dict(model_state)
49 |         self.norm_layer = norm_layer
50 | 
51 |         stage1_cfg = cfg['STAGE1']
52 |         stage2_cfg = cfg['STAGE2']
53 |         stage3_cfg = cfg['STAGE3']
54 | 
55 |         self.duc1 = self._make_duc_stage(stage1_cfg, 2048, 1024)
56 |         self.duc2 = self._make_duc_stage(stage2_cfg, 1024, 512)
57 |         self.duc3 = self._make_duc_stage(stage3_cfg, 512, self.conv_dim)
58 | 
59 |         self.conv_out = nn.Conv2d(
60 |             self.conv_dim, self._preset_cfg['NUM_JOINTS'], kernel_size=3, stride=1, padding=1)
61 | 
62 |     def forward(self, x):
63 |         out = self.preact(x)
64 |         out = self.duc1(out)
65 |         out = self.duc2(out)
66 |         out = self.duc3(out)
67 | 
68 |         out = self.conv_out(out)
69 |         return out
70 | 
71 |     def _make_duc_stage(self, layer_config, inplanes, outplanes):
72 |         layers = []
73 | 
74 |         shuffle = nn.PixelShuffle(2)
75 |         inplanes //= 4
76 |         layers.append(shuffle)
77 |         for i in range(layer_config.NUM_CONV - 1):
78 |             conv = nn.Conv2d(inplanes, inplanes, kernel_size=3,
79 |                              padding=1, bias=False)
80 |             norm_layer = self.norm_layer(inplanes, momentum=0.1)
81 |             relu = nn.ReLU(inplace=True)
82 |             layers += [conv, norm_layer, relu]
83 |         conv = nn.Conv2d(inplanes, outplanes, kernel_size=3,
84 |                          padding=1, bias=False)
85 |         norm_layer = self.norm_layer(outplanes, momentum=0.1)
86 |         relu = nn.ReLU(inplace=True)
87 |         layers += [conv, norm_layer, relu]
88 |         return nn.Sequential(*layers)
89 | 
90 |     def _initialize(self):
91 |         for m in self.conv_out.modules():
92 |             if isinstance(m, nn.Conv2d):
93 |                 # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
94 |                 # logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
95 |                 # logger.info('=> init {}.bias as 0'.format(name))
96 |                 nn.init.normal_(m.weight, std=0.001)
97 |                 nn.init.constant_(m.bias, 0)
98 | 


--------------------------------------------------------------------------------
/alphapose/models/layers/DUC.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class DUC(nn.Module):
10 |     '''
11 |     Initialize: inplanes, planes, upscale_factor
12 |     OUTPUT: (planes // upscale_factor^2) * ht * wd
13 |     '''
14 | 
15 |     def __init__(self, inplanes, planes,
16 |                  upscale_factor=2, norm_layer=nn.BatchNorm2d):
17 |         super(DUC, self).__init__()
18 |         self.conv = nn.Conv2d(
19 |             inplanes, planes, kernel_size=3, padding=1, bias=False)
20 |         self.bn = norm_layer(planes, momentum=0.1)
21 |         self.relu = nn.ReLU(inplace=True)
22 |         self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
23 | 
24 |     def forward(self, x):
25 |         x = self.conv(x)
26 |         x = self.bn(x)
27 |         x = self.relu(x)
28 |         x = self.pixel_shuffle(x)
29 |         return x
30 | 


--------------------------------------------------------------------------------
/alphapose/models/layers/PixelUnshuffle.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class PixelUnshuffle(nn.Module):
10 |     '''
11 |     Initialize: inplanes, planes, upscale_factor
12 |     OUTPUT: (planes // upscale_factor^2) * ht * wd
13 |     '''
14 | 
15 |     def __init__(self, downscale_factor=2):
16 |         super(PixelUnshuffle, self).__init__()
17 |         self._r = downscale_factor
18 | 
19 |     def forward(self, x):
20 |         b, c, h, w = x.shape
21 |         out_c = c * (self._r * self._r)
22 |         out_h = h // self._r
23 |         out_w = w // self._r
24 | 
25 |         x_view = x.contiguous().view(b, c, out_h, self._r, out_w, self._r)
26 |         x_prime = x_view.permute(0, 1, 3, 5, 2, 4).contiguous().view(b, out_c, out_h, out_w)
27 | 
28 |         return x_prime
29 | 


--------------------------------------------------------------------------------
/alphapose/models/layers/SE_module.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | from torch import nn
 7 | 
 8 | 
 9 | class SELayer(nn.Module):
10 |     def __init__(self, channel, reduction=1):
11 |         super(SELayer, self).__init__()
12 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
13 |         self.fc = nn.Sequential(
14 |             nn.Linear(channel, channel // reduction),
15 |             nn.ReLU(inplace=True),
16 |             nn.Linear(channel // reduction, channel),
17 |             nn.Sigmoid()
18 |         )
19 | 
20 |     def forward(self, x):
21 |         b, c, _, _ = x.size()
22 |         y = self.avg_pool(x).view(b, c)
23 |         y = self.fc(y).view(b, c, 1, 1)
24 |         return x * y
25 | 


--------------------------------------------------------------------------------
/alphapose/models/layers/dcn/DCN.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | from . import DeformConv, ModulatedDeformConv
 9 | 
10 | 
11 | class DCN(nn.Module):
12 |     '''
13 |     Initialize: inplanes, planes, upscale_factor
14 |     OUTPUT: (planes // upscale_factor^2) * ht * wd
15 |     '''
16 | 
17 |     def __init__(self, inplanes, planes, dcn,
18 |                  kernel_size, stride=1,
19 |                  padding=0, bias=False):
20 |         super(DCN, self).__init__()
21 |         fallback_on_stride = dcn.get('FALLBACK_ON_STRIDE', False)
22 |         self.with_modulated_dcn = dcn.get('MODULATED', False)
23 |         if fallback_on_stride:
24 |             self.conv = nn.Conv2d(inplanes, planes, kernel_size=kernel_size, stride=stride,
25 |                                   padding=padding, bias=bias)
26 |         else:
27 |             self.deformable_groups = dcn.get('DEFORM_GROUP', 1)
28 |             if not self.with_modulated_dcn:
29 |                 conv_op = DeformConv
30 |                 offset_channels = 18
31 |             else:
32 |                 conv_op = ModulatedDeformConv
33 |                 offset_channels = 27
34 | 
35 |             self.conv_offset = nn.Conv2d(
36 |                 inplanes,
37 |                 self.deformable_groups * offset_channels,
38 |                 kernel_size=kernel_size,
39 |                 stride=stride,
40 |                 padding=padding)
41 |             self.conv = conv_op(
42 |                 inplanes,
43 |                 planes,
44 |                 kernel_size=kernel_size,
45 |                 stride=stride,
46 |                 padding=padding,
47 |                 deformable_groups=self.deformable_groups,
48 |                 bias=bias)
49 | 
50 |     def forward(self, x):
51 |         if self.with_modulated_dcn:
52 |             offset_mask = self.conv_offset(x)
53 |             offset = offset_mask[:, :18 * self.deformable_groups, :, :]
54 |             mask = offset_mask[:, -9 * self.deformable_groups:, :, :]
55 |             mask = mask.sigmoid()
56 |             out = self.conv(x, offset, mask)
57 |         else:
58 |             offset = self.conv_offset(x)
59 |             out = self.conv(x, offset)
60 | 
61 |         return out
62 | 


--------------------------------------------------------------------------------
/alphapose/models/layers/dcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv,
 2 |                           ModulatedDeformConvPack, deform_conv,
 3 |                           modulated_deform_conv)
 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
 5 |                           ModulatedDeformRoIPoolingPack, deform_roi_pooling)
 6 | from .DCN import DCN
 7 | 
 8 | __all__ = [
 9 |     'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',
10 |     'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
11 |     'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',
12 |     'deform_roi_pooling', 'DCN'
13 | ]
14 | 


--------------------------------------------------------------------------------
/alphapose/models/layers/dcn/src/deform_pool_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
 3 | 
 4 | // based on
 5 | // author: Charles Shang
 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
 7 | 
 8 | #include <torch/extension.h>
 9 | #include <ATen/DeviceGuard.h>
10 | 
11 | #include <cmath>
12 | #include <vector>
13 | 
14 | void DeformablePSROIPoolForward(
15 |     const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
16 |     at::Tensor out, at::Tensor top_count, const int batch, const int channels,
17 |     const int height, const int width, const int num_bbox,
18 |     const int channels_trans, const int no_trans, const float spatial_scale,
19 |     const int output_dim, const int group_size, const int pooled_size,
20 |     const int part_size, const int sample_per_part, const float trans_std);
21 | 
22 | void DeformablePSROIPoolBackwardAcc(
23 |     const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
24 |     const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
25 |     at::Tensor trans_grad, const int batch, const int channels,
26 |     const int height, const int width, const int num_bbox,
27 |     const int channels_trans, const int no_trans, const float spatial_scale,
28 |     const int output_dim, const int group_size, const int pooled_size,
29 |     const int part_size, const int sample_per_part, const float trans_std);
30 | 
31 | void deform_psroi_pooling_cuda_forward(
32 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
33 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
34 |     const int output_dim, const int group_size, const int pooled_size,
35 |     const int part_size, const int sample_per_part, const float trans_std) {
36 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
37 |   at::DeviceGuard guard(input.device());
38 | 
39 |   const int batch = input.size(0);
40 |   const int channels = input.size(1);
41 |   const int height = input.size(2);
42 |   const int width = input.size(3);
43 |   const int channels_trans = no_trans ? 2 : trans.size(1);
44 | 
45 |   const int num_bbox = bbox.size(0);
46 |   if (num_bbox != out.size(0))
47 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
48 |              out.size(0), num_bbox);
49 | 
50 |   DeformablePSROIPoolForward(
51 |       input, bbox, trans, out, top_count, batch, channels, height, width,
52 |       num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
53 |       pooled_size, part_size, sample_per_part, trans_std);
54 | }
55 | 
56 | void deform_psroi_pooling_cuda_backward(
57 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
58 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
59 |     const int no_trans, const float spatial_scale, const int output_dim,
60 |     const int group_size, const int pooled_size, const int part_size,
61 |     const int sample_per_part, const float trans_std) {
62 |   AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
63 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
64 |   at::DeviceGuard guard(input.device());
65 | 
66 |   const int batch = input.size(0);
67 |   const int channels = input.size(1);
68 |   const int height = input.size(2);
69 |   const int width = input.size(3);
70 |   const int channels_trans = no_trans ? 2 : trans.size(1);
71 | 
72 |   const int num_bbox = bbox.size(0);
73 |   if (num_bbox != out_grad.size(0))
74 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
75 |              out_grad.size(0), num_bbox);
76 | 
77 |   DeformablePSROIPoolBackwardAcc(
78 |       out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
79 |       channels, height, width, num_bbox, channels_trans, no_trans,
80 |       spatial_scale, output_dim, group_size, pooled_size, part_size,
81 |       sample_per_part, trans_std);
82 | }
83 | 
84 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
85 |   m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward,
86 |         "deform psroi pooling forward(CUDA)");
87 |   m.def("deform_psroi_pooling_cuda_backward",
88 |         &deform_psroi_pooling_cuda_backward,
89 |         "deform psroi pooling backward(CUDA)");
90 | }
91 | 


--------------------------------------------------------------------------------
/alphapose/models/simplepose.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | from .builder import SPPE
 9 | from .layers.Resnet import ResNet
10 | 
11 | 
12 | @SPPE.register_module
13 | class SimplePose(nn.Module):
14 |     def __init__(self, norm_layer=nn.BatchNorm2d, **cfg):
15 |         super(SimplePose, self).__init__()
16 |         self._preset_cfg = cfg['PRESET']
17 |         self.deconv_dim = cfg['NUM_DECONV_FILTERS']
18 |         self._norm_layer = norm_layer
19 | 
20 |         self.preact = ResNet(f"resnet{cfg['NUM_LAYERS']}")
21 | 
22 |         # Imagenet pretrain model
23 |         import torchvision.models as tm   # noqa: F401,F403
24 |         assert cfg['NUM_LAYERS'] in [18, 34, 50, 101, 152]
25 |         x = eval(f"tm.resnet{cfg['NUM_LAYERS']}(pretrained=True)")
26 | 
27 |         model_state = self.preact.state_dict()
28 |         state = {k: v for k, v in x.state_dict().items()
29 |                  if k in self.preact.state_dict() and v.size() == self.preact.state_dict()[k].size()}
30 |         model_state.update(state)
31 |         self.preact.load_state_dict(model_state)
32 | 
33 |         self.deconv_layers = self._make_deconv_layer()
34 |         self.final_layer = nn.Conv2d(
35 |             self.deconv_dim[2], self._preset_cfg['NUM_JOINTS'], kernel_size=1, stride=1, padding=0)
36 | 
37 |     def _make_deconv_layer(self):
38 |         deconv_layers = []
39 |         deconv1 = nn.ConvTranspose2d(
40 |             2048, self.deconv_dim[0], kernel_size=4, stride=2, padding=int(4 / 2) - 1, bias=False)
41 |         bn1 = self._norm_layer(self.deconv_dim[0])
42 |         deconv2 = nn.ConvTranspose2d(
43 |             self.deconv_dim[0], self.deconv_dim[1], kernel_size=4, stride=2, padding=int(4 / 2) - 1, bias=False)
44 |         bn2 = self._norm_layer(self.deconv_dim[1])
45 |         deconv3 = nn.ConvTranspose2d(
46 |             self.deconv_dim[1], self.deconv_dim[2], kernel_size=4, stride=2, padding=int(4 / 2) - 1, bias=False)
47 |         bn3 = self._norm_layer(self.deconv_dim[2])
48 | 
49 |         deconv_layers.append(deconv1)
50 |         deconv_layers.append(bn1)
51 |         deconv_layers.append(nn.ReLU(inplace=True))
52 |         deconv_layers.append(deconv2)
53 |         deconv_layers.append(bn2)
54 |         deconv_layers.append(nn.ReLU(inplace=True))
55 |         deconv_layers.append(deconv3)
56 |         deconv_layers.append(bn3)
57 |         deconv_layers.append(nn.ReLU(inplace=True))
58 | 
59 |         return nn.Sequential(*deconv_layers)
60 | 
61 |     def _initialize(self):
62 |         for name, m in self.deconv_layers.named_modules():
63 |             if isinstance(m, nn.ConvTranspose2d):
64 |                 # logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
65 |                 # logger.info('=> init {}.bias as 0'.format(name))
66 |                 nn.init.normal_(m.weight, std=0.001)
67 |                 # if self.deconv_with_bias:
68 |                 #     nn.init.constant_(m.bias, 0)
69 |             elif isinstance(m, nn.BatchNorm2d):
70 |                 # logger.info('=> init {}.weight as 1'.format(name))
71 |                 # logger.info('=> init {}.bias as 0'.format(name))
72 |                 nn.init.constant_(m.weight, 1)
73 |                 nn.init.constant_(m.bias, 0)
74 |         for m in self.final_layer.modules():
75 |             if isinstance(m, nn.Conv2d):
76 |                 # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
77 |                 # logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
78 |                 # logger.info('=> init {}.bias as 0'.format(name))
79 |                 nn.init.normal_(m.weight, std=0.001)
80 |                 nn.init.constant_(m.bias, 0)
81 | 
82 |     def forward(self, x):
83 |         out = self.preact(x)
84 |         out = self.deconv_layers(out)
85 |         out = self.final_layer(out)
86 |         return out
87 | 


--------------------------------------------------------------------------------
/alphapose/opt.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | import argparse
 6 | import logging
 7 | import os
 8 | from types import MethodType
 9 | 
10 | import torch
11 | 
12 | from .utils.config import update_config
13 | 
14 | parser = argparse.ArgumentParser(description='AlphaPose Training')
15 | 
16 | "----------------------------- Experiment options -----------------------------"
17 | parser.add_argument('--cfg',
18 |                     help='experiment configure file name',
19 |                     required=True,
20 |                     type=str)
21 | parser.add_argument('--exp-id', default='default', type=str,
22 |                     help='Experiment ID')
23 | 
24 | "----------------------------- General options -----------------------------"
25 | parser.add_argument('--nThreads', default=60, type=int,
26 |                     help='Number of data loading threads')
27 | parser.add_argument('--snapshot', default=2, type=int,
28 |                     help='How often to take a snapshot of the model (0 = never)')
29 | 
30 | parser.add_argument('--rank', default=-1, type=int,
31 |                     help='node rank for distributed training')
32 | parser.add_argument('--dist-url', default='tcp://192.168.1.214:23345', type=str,
33 |                     help='url used to set up distributed training')
34 | parser.add_argument('--dist-backend', default='nccl', type=str,
35 |                     help='distributed backend')
36 | parser.add_argument('--launcher', choices=['none', 'pytorch', 'slurm', 'mpi'], default='none',
37 |                     help='job launcher')
38 | 
39 | "----------------------------- Training options -----------------------------"
40 | parser.add_argument('--sync', default=False, dest='sync',
41 |                     help='Use Sync Batchnorm', action='store_true')
42 | parser.add_argument('--detector', dest='detector',
43 |                     help='detector name', default="yolo")
44 | 
45 | "----------------------------- Log options -----------------------------"
46 | parser.add_argument('--board', default=True, dest='board',
47 |                     help='Logging with tensorboard', action='store_true')
48 | parser.add_argument('--debug', default=False, dest='debug',
49 |                     help='Visualization debug', action='store_true')
50 | parser.add_argument('--map', default=True, dest='map',
51 |                     help='Evaluate mAP per epoch', action='store_true')
52 | 
53 | 
54 | opt = parser.parse_args()
55 | cfg_file_name = os.path.basename(opt.cfg)
56 | cfg = update_config(opt.cfg)
57 | 
58 | cfg['FILE_NAME'] = cfg_file_name
59 | cfg.TRAIN.DPG_STEP = [i - cfg.TRAIN.DPG_MILESTONE for i in cfg.TRAIN.DPG_STEP]
60 | opt.world_size = cfg.TRAIN.WORLD_SIZE
61 | opt.work_dir = './exp/{}-{}/'.format(opt.exp_id, cfg_file_name)
62 | opt.gpus = [i for i in range(torch.cuda.device_count())]
63 | opt.device = torch.device("cuda:" + str(opt.gpus[0]) if opt.gpus[0] >= 0 else "cpu")
64 | 
65 | if not os.path.exists("./exp/{}-{}".format(opt.exp_id, cfg_file_name)):
66 |     os.makedirs("./exp/{}-{}".format(opt.exp_id, cfg_file_name))
67 | 
68 | filehandler = logging.FileHandler(
69 |     './exp/{}-{}/training.log'.format(opt.exp_id, cfg_file_name))
70 | streamhandler = logging.StreamHandler()
71 | 
72 | logger = logging.getLogger('')
73 | logger.setLevel(logging.INFO)
74 | logger.addHandler(filehandler)
75 | logger.addHandler(streamhandler)
76 | 
77 | 
78 | def epochInfo(self, set, idx, loss, acc):
79 |     self.info('{set}-{idx:d} epoch | loss:{loss:.8f} | acc:{acc:.4f}'.format(
80 |         set=set,
81 |         idx=idx,
82 |         loss=loss,
83 |         acc=acc
84 |     ))
85 | 
86 | 
87 | logger.epochInfo = MethodType(epochInfo, logger)
88 | 


--------------------------------------------------------------------------------
/alphapose/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .registry import Registry, build_from_cfg, retrieve_from_cfg
2 | 
3 | __all__ = [
4 |     'Registry', 'build_from_cfg', 'retrieve_from_cfg'
5 | ]
6 | 


--------------------------------------------------------------------------------
/alphapose/utils/config.py:
--------------------------------------------------------------------------------
1 | import yaml
2 | from easydict import EasyDict as edict
3 | 
4 | 
5 | def update_config(config_file):
6 |     with open(config_file) as f:
7 |         config = edict(yaml.load(f, Loader=yaml.FullLoader))
8 |         return config
9 | 


--------------------------------------------------------------------------------
/alphapose/utils/env.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import os
 7 | import torch
 8 | import torch.distributed as dist
 9 | 
10 | 
11 | def init_dist(opt):
12 |     """Initialize distributed computing environment."""
13 |     opt.ngpus_per_node = torch.cuda.device_count()
14 | 
15 |     torch.cuda.set_device(opt.gpu)
16 | 
17 |     if opt.launcher == 'pytorch':
18 |         _init_dist_pytorch(opt)
19 |     elif opt.launcher == 'mpi':
20 |         _init_dist_mpi(opt)
21 |     elif opt.launcher == 'slurm':
22 |         _init_dist_slurm(opt)
23 |     else:
24 |         raise ValueError('Invalid launcher type: {}'.format(opt.launcher))
25 | 
26 | 
27 | def _init_dist_pytorch(opt, **kwargs):
28 |     """Set up environment."""
29 |     # TODO: use local_rank instead of rank % num_gpus
30 |     opt.rank = opt.rank * opt.ngpus_per_node + opt.gpu
31 |     opt.world_size = opt.world_size
32 |     dist.init_process_group(backend=opt.dist_backend, init_method=opt.dist_url,
33 |                             world_size=opt.world_size, rank=opt.rank)
34 |     print(f"{opt.dist_url}, ws:{opt.world_size}, rank:{opt.rank}")
35 | 
36 |     if opt.rank % opt.ngpus_per_node == 0:
37 |         opt.log = True
38 |     else:
39 |         opt.log = False
40 | 
41 | 
42 | def _init_dist_slurm(opt, port=23348, **kwargs):
43 |     """Set up slurm environment."""
44 |     proc_id = int(os.environ['SLURM_PROCID'])
45 |     ntasks = int(os.environ['SLURM_NTASKS'])
46 |     node_list = os.environ['SLURM_NODELIST']
47 |     num_gpus = torch.cuda.device_count()
48 |     torch.cuda.set_device(proc_id % num_gpus)
49 |     if '[' in node_list:
50 |         beg = node_list.find('[')
51 |         pos1 = node_list.find('-', beg)
52 |         if pos1 < 0:
53 |             pos1 = 1000
54 |         pos2 = node_list.find(',', beg)
55 |         if pos2 < 0:
56 |             pos2 = 1000
57 |         node_list = node_list[:min(pos1, pos2)].replace('[', '')
58 |     addr = node_list[8:].replace('-', '.')
59 |     os.environ['MASTER_PORT'] = str(port)
60 |     os.environ['MASTER_ADDR'] = addr
61 |     os.environ['WORLD_SIZE'] = str(ntasks)
62 |     os.environ['RANK'] = str(proc_id)
63 | 
64 |     opt.ngpus_per_node = num_gpus
65 |     opt.rank = int(proc_id)
66 |     opt.rank = proc_id * num_gpus + opt.gpu
67 |     opt.world_size = int(ntasks) * num_gpus
68 | 
69 |     print(f"tcp://{node_list}:{port}, ws:{opt.world_size}, rank:{opt.rank}, proc_id:{proc_id}")
70 |     dist.init_process_group(backend=opt.dist_backend,
71 |                             init_method=f'tcp://{node_list}:{port}',
72 |                             world_size=opt.world_size,
73 |                             rank=opt.rank)
74 |     if opt.rank == 0:
75 |         opt.log = True
76 |     else:
77 |         opt.log = False
78 | 
79 | 
80 | def _init_dist_mpi(backend, **kwargs):
81 |     raise NotImplementedError
82 | 


--------------------------------------------------------------------------------
/alphapose/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com)
 4 | # -----------------------------------------------------
 5 | 
 6 | import torch
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | def board_writing(writer, loss, acc, iterations, dataset='Train'):
11 |     writer.add_scalar(
12 |         '{}/Loss'.format(dataset), loss, iterations)
13 |     writer.add_scalar(
14 |         '{}/acc'.format(dataset), acc, iterations)
15 | 
16 | 
17 | def debug_writing(writer, outputs, labels, inputs, iterations):
18 |     tmp_tar = torch.unsqueeze(labels.cpu().data[0], dim=1)
19 |     # tmp_out = torch.unsqueeze(outputs.cpu().data[0], dim=1)
20 | 
21 |     tmp_inp = inputs.cpu().data[0]
22 |     tmp_inp[0] += 0.406
23 |     tmp_inp[1] += 0.457
24 |     tmp_inp[2] += 0.480
25 | 
26 |     tmp_inp[0] += torch.sum(F.interpolate(tmp_tar, scale_factor=4, mode='bilinear'), dim=0)[0]
27 |     tmp_inp.clamp_(0, 1)
28 | 
29 |     writer.add_image('Data/input', tmp_inp, iterations)
30 | 


--------------------------------------------------------------------------------
/alphapose/utils/presets/__init__.py:
--------------------------------------------------------------------------------
1 | from .simple_transform import SimpleTransform
2 | 
3 | __all__ = ['SimpleTransform']
4 | 


--------------------------------------------------------------------------------
/alphapose/utils/registry.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | 
  3 | 
  4 | class Registry(object):
  5 | 
  6 |     def __init__(self, name):
  7 |         self._name = name
  8 |         self._module_dict = dict()
  9 | 
 10 |     def __repr__(self):
 11 |         format_str = self.__class__.__name__ + '(name={}, items={})'.format(
 12 |             self._name, list(self._module_dict.keys()))
 13 |         return format_str
 14 | 
 15 |     @property
 16 |     def name(self):
 17 |         return self._name
 18 | 
 19 |     @property
 20 |     def module_dict(self):
 21 |         return self._module_dict
 22 | 
 23 |     def get(self, key):
 24 |         return self._module_dict.get(key, None)
 25 | 
 26 |     def _register_module(self, module_class):
 27 |         """Register a module.
 28 | 
 29 |         Args:
 30 |             module (:obj:`nn.Module`): Module to be registered.
 31 |         """
 32 |         if not inspect.isclass(module_class):
 33 |             raise TypeError('module must be a class, but got {}'.format(
 34 |                 type(module_class)))
 35 |         module_name = module_class.__name__
 36 |         if module_name in self._module_dict:
 37 |             raise KeyError('{} is already registered in {}'.format(
 38 |                 module_name, self.name))
 39 |         self._module_dict[module_name] = module_class
 40 | 
 41 |     def register_module(self, cls):
 42 |         self._register_module(cls)
 43 |         return cls
 44 | 
 45 | 
 46 | def build_from_cfg(cfg, registry, default_args=None):
 47 |     """Build a module from config dict.
 48 | 
 49 |     Args:
 50 |         cfg (dict): Config dict. It should at least contain the key "type".
 51 |         registry (:obj:`Registry`): The registry to search the type from.
 52 |         default_args (dict, optional): Default initialization arguments.
 53 | 
 54 |     Returns:
 55 |         obj: The constructed object.
 56 |     """
 57 |     assert isinstance(cfg, dict) and 'TYPE' in cfg
 58 |     assert isinstance(default_args, dict) or default_args is None
 59 |     args = cfg.copy()
 60 |     obj_type = args.pop('TYPE')
 61 | 
 62 |     if isinstance(obj_type, str):
 63 |         obj_cls = registry.get(obj_type)
 64 |         if obj_cls is None:
 65 |             raise KeyError('{} is not in the {} registry'.format(
 66 |                 obj_type, registry.name))
 67 |     elif inspect.isclass(obj_type):
 68 |         obj_cls = obj_type
 69 |     else:
 70 |         raise TypeError('type must be a str or valid type, but got {}'.format(
 71 |             type(obj_type)))
 72 |     if default_args is not None:
 73 |         for name, value in default_args.items():
 74 |             args.setdefault(name, value)
 75 |     return obj_cls(**args)
 76 | 
 77 | 
 78 | def retrieve_from_cfg(cfg, registry):
 79 |     """Retrieve a module class from config dict.
 80 | 
 81 |     Args:
 82 |         cfg (dict): Config dict. It should at least contain the key "type".
 83 |         registry (:obj:`Registry`): The registry to search the type from.
 84 | 
 85 |     Returns:
 86 |         class: The class.
 87 |     """
 88 |     assert isinstance(cfg, dict) and 'TYPE' in cfg
 89 |     args = cfg.copy()
 90 |     obj_type = args.pop('TYPE')
 91 | 
 92 |     if isinstance(obj_type, str):
 93 |         obj_cls = registry.get(obj_type)
 94 |         if obj_cls is None:
 95 |             raise KeyError('{} is not in the {} registry'.format(
 96 |                 obj_type, registry.name))
 97 |     elif inspect.isclass(obj_type):
 98 |         obj_cls = obj_type
 99 |     else:
100 |         raise TypeError('type must be a str or valid type, but got {}'.format(
101 |             type(obj_type)))
102 | 
103 |     return obj_cls
104 | 


--------------------------------------------------------------------------------
/alphapose/utils/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_align import roi_align, RoIAlign
2 | 
3 | __all__ = ['roi_align', 'RoIAlign']
4 | 


--------------------------------------------------------------------------------
/alphapose/utils/roi_align/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | from torch.nn.modules.utils import _pair
 5 | 
 6 | from . import roi_align_cuda
 7 | 
 8 | 
 9 | class RoIAlignFunction(Function):
10 | 
11 |     @staticmethod
12 |     def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
13 |         out_h, out_w = _pair(out_size)
14 |         assert isinstance(out_h, int) and isinstance(out_w, int)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.sample_num = sample_num
17 |         ctx.save_for_backward(rois)
18 |         ctx.feature_size = features.size()
19 | 
20 |         batch_size, num_channels, data_height, data_width = features.size()
21 |         num_rois = rois.size(0)
22 | 
23 |         output = features.new_zeros(num_rois, num_channels, out_h, out_w)
24 |         if features.is_cuda:
25 |             roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,
26 |                                    sample_num, output)
27 |         else:
28 |             raise NotImplementedError
29 | 
30 |         return output
31 | 
32 |     @staticmethod
33 |     @once_differentiable
34 |     def backward(ctx, grad_output):
35 |         feature_size = ctx.feature_size
36 |         spatial_scale = ctx.spatial_scale
37 |         sample_num = ctx.sample_num
38 |         rois = ctx.saved_tensors[0]
39 |         assert (feature_size is not None and grad_output.is_cuda)
40 | 
41 |         batch_size, num_channels, data_height, data_width = feature_size
42 |         out_w = grad_output.size(3)
43 |         out_h = grad_output.size(2)
44 | 
45 |         grad_input = grad_rois = None
46 |         if ctx.needs_input_grad[0]:
47 |             grad_input = rois.new_zeros(batch_size, num_channels, data_height,
48 |                                         data_width)
49 |             roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
50 |                                     out_w, spatial_scale, sample_num,
51 |                                     grad_input)
52 | 
53 |         return grad_input, grad_rois, None, None, None
54 | 
55 | 
56 | roi_align = RoIAlignFunction.apply
57 | 
58 | 
59 | class RoIAlign(nn.Module):
60 | 
61 |     def __init__(self,
62 |                  out_size,
63 |                  spatial_scale=1,
64 |                  sample_num=0,
65 |                  use_torchvision=False):
66 |         super(RoIAlign, self).__init__()
67 | 
68 |         self.out_size = out_size
69 |         self.spatial_scale = float(spatial_scale)
70 |         self.sample_num = int(sample_num)
71 |         self.use_torchvision = use_torchvision
72 | 
73 |     def forward(self, features, rois):
74 |         if self.use_torchvision:
75 |             from torchvision.ops import roi_align as tv_roi_align
76 |             return tv_roi_align(features, rois, _pair(self.out_size),
77 |                                 self.spatial_scale, self.sample_num)
78 |         else:
79 |             return roi_align(features, rois, self.out_size, self.spatial_scale,
80 |                              self.sample_num)
81 | 
82 |     def __repr__(self):
83 |         format_str = self.__class__.__name__
84 |         format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format(
85 |             self.out_size, self.spatial_scale, self.sample_num)
86 |         format_str += ', use_torchvision={})'.format(self.use_torchvision)
87 |         return format_str
88 | 


--------------------------------------------------------------------------------
/alphapose/utils/roi_align/src/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                            const float spatial_scale, const int sample_num,
 8 |                            const int channels, const int height,
 9 |                            const int width, const int num_rois,
10 |                            const int pooled_height, const int pooled_width,
11 |                            at::Tensor output);
12 | 
13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
14 |                             const float spatial_scale, const int sample_num,
15 |                             const int channels, const int height,
16 |                             const int width, const int num_rois,
17 |                             const int pooled_height, const int pooled_width,
18 |                             at::Tensor bottom_grad);
19 | 
20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 |   CHECK_CUDA(x);       \
25 |   CHECK_CONTIGUOUS(x)
26 | 
27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
28 |                            int pooled_height, int pooled_width,
29 |                            float spatial_scale, int sample_num,
30 |                            at::Tensor output) {
31 |   CHECK_INPUT(features);
32 |   CHECK_INPUT(rois);
33 |   CHECK_INPUT(output);
34 | 
35 |   // Number of ROIs
36 |   int num_rois = rois.size(0);
37 |   int size_rois = rois.size(1);
38 | 
39 |   if (size_rois != 5) {
40 |     printf("wrong roi size\n");
41 |     return 0;
42 |   }
43 | 
44 |   int num_channels = features.size(1);
45 |   int data_height = features.size(2);
46 |   int data_width = features.size(3);
47 | 
48 |   ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num,
49 |                          num_channels, data_height, data_width, num_rois,
50 |                          pooled_height, pooled_width, output);
51 | 
52 |   return 1;
53 | }
54 | 
55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
56 |                             int pooled_height, int pooled_width,
57 |                             float spatial_scale, int sample_num,
58 |                             at::Tensor bottom_grad) {
59 |   CHECK_INPUT(top_grad);
60 |   CHECK_INPUT(rois);
61 |   CHECK_INPUT(bottom_grad);
62 | 
63 |   // Number of ROIs
64 |   int num_rois = rois.size(0);
65 |   int size_rois = rois.size(1);
66 |   if (size_rois != 5) {
67 |     printf("wrong roi size\n");
68 |     return 0;
69 |   }
70 | 
71 |   int num_channels = bottom_grad.size(1);
72 |   int data_height = bottom_grad.size(2);
73 |   int data_width = bottom_grad.size(3);
74 | 
75 |   ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num,
76 |                           num_channels, data_height, data_width, num_rois,
77 |                           pooled_height, pooled_width, bottom_grad);
78 | 
79 |   return 1;
80 | }
81 | 
82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
83 |   m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
84 |   m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
85 | }


--------------------------------------------------------------------------------
/alphapose/version.py:
--------------------------------------------------------------------------------
1 | # GENERATED VERSION FILE
2 | # TIME: Thu Jan 21 12:06:57 2021
3 | 
4 | __version__ = '0.3.0+4d58914'
5 | short_version = '0.3.0'
6 | 


--------------------------------------------------------------------------------
/configs/coco/hardnet/256x192_hard68_lr1e-3_1x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'HarDNetPose'
36 |   INIT_WEIGHTS: ''
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   FINAL_CONV_KERNEL: 1
40 |   NUM_LAYERS: 68
41 |   DOWN_RATIO: 4
42 |   TRT: False
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   DPG_MILESTONE: 140
63 |   DPG_STEP:
64 |   - 160
65 |   - 190
66 | 


--------------------------------------------------------------------------------
/configs/coco/hardnet/256x192_hard85_lr1e-3_1x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'HarDNetPose'
36 |   INIT_WEIGHTS: ''
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   FINAL_CONV_KERNEL: 1
40 |   NUM_LAYERS: 85
41 |   DOWN_RATIO: 4
42 |   TRT: False
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   DPG_MILESTONE: 140
63 |   DPG_STEP:
64 |   - 160
65 |   - 190
66 | 


--------------------------------------------------------------------------------
/configs/coco/hrnet/256x192_w32_lr1e-3.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'PoseHighResolutionNet'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_LAYERS: 50
39 |   FINAL_CONV_KERNEL: 1
40 |   PRETRAINED_LAYERS: ['*']
41 |   STAGE2:
42 |     NUM_MODULES: 1
43 |     NUM_BRANCHES: 2
44 |     NUM_BLOCKS: [4, 4]
45 |     NUM_CHANNELS: [32, 64]
46 |     BLOCK: 'BASIC'
47 |     FUSE_METHOD: 'SUM'
48 |   STAGE3:
49 |     NUM_MODULES: 4
50 |     NUM_BRANCHES: 3
51 |     NUM_BLOCKS: [4, 4, 4]
52 |     NUM_CHANNELS: [32, 64, 128]
53 |     BLOCK: 'BASIC'
54 |     FUSE_METHOD: 'SUM'
55 |   STAGE4:
56 |     NUM_MODULES: 3
57 |     NUM_BRANCHES: 4
58 |     NUM_BLOCKS: [4, 4, 4, 4]
59 |     NUM_CHANNELS: [32, 64, 128, 256]
60 |     BLOCK: 'BASIC'
61 |     FUSE_METHOD: 'SUM'
62 | LOSS:
63 |   TYPE: 'MSELoss'
64 | DETECTOR:
65 |   NAME: 'yolo'
66 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
67 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
68 |   NMS_THRES: 0.6
69 |   CONFIDENCE: 0.05
70 | TRAIN:
71 |   WORLD_SIZE: 4
72 |   BATCH_SIZE: 32
73 |   BEGIN_EPOCH: 0
74 |   END_EPOCH: 270
75 |   OPTIMIZER: 'adam'
76 |   LR: 0.001
77 |   LR_FACTOR: 0.1
78 |   LR_STEP:
79 |   - 170
80 |   - 200
81 |   DPG_MILESTONE: 210
82 |   DPG_STEP:
83 |   - 230
84 |   - 250


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res152_lr1e-3_1x-duc.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose_DUC'
36 |   BACKBONE: 'se-resnet'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 152
44 |   FINAL_CONV_KERNEL: 1
45 |   STAGE1:
46 |     NUM_CONV: 4
47 |   STAGE2:
48 |     NUM_CONV: 2
49 |   STAGE3:
50 |     NUM_CONV: 1
51 | LOSS:
52 |   TYPE: 'MSELoss'
53 | DETECTOR:
54 |   NAME: 'yolo'
55 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
56 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
57 |   NMS_THRES: 0.6
58 |   CONFIDENCE: 0.05
59 | TRAIN:
60 |   WORLD_SIZE: 4
61 |   BATCH_SIZE: 32
62 |   BEGIN_EPOCH: 0
63 |   END_EPOCH: 200
64 |   OPTIMIZER: 'adam'
65 |   LR: 0.001
66 |   LR_FACTOR: 0.1
67 |   LR_STEP:
68 |   - 90
69 |   - 120
70 |   DPG_MILESTONE: 140
71 |   DPG_STEP:
72 |   - 160
73 |   - 190
74 | 


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_1x-concat.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'ConcatDataset'
 4 |     SET_LIST:
 5 |       - TYPE: 'Mscoco'
 6 |         MASK_ID: 0
 7 |         ROOT: './data/coco/'
 8 |         IMG_PREFIX: 'train2017'
 9 |         ANN: 'annotations/person_keypoints_train2017.json'
10 |         AUG:
11 |           FLIP: true
12 |           ROT_FACTOR: 40
13 |           SCALE_FACTOR: 0.3
14 |           NUM_JOINTS_HALF_BODY: 8
15 |           PROB_HALF_BODY: -1
16 |       - TYPE: 'Mpii'
17 |         MASK_ID: 17
18 |         ROOT: './data/mpii/'
19 |         IMG_PREFIX: 'images'
20 |         ANN: 'annot_mpii.json'
21 |         AUG:
22 |           FLIP: true
23 |           ROT_FACTOR: 40
24 |           SCALE_FACTOR: 0.3
25 |           NUM_JOINTS_HALF_BODY: 8
26 |           PROB_HALF_BODY: -1
27 |   VAL:
28 |     TYPE: 'Mscoco'
29 |     ROOT: './data/coco/'
30 |     IMG_PREFIX: 'val2017'
31 |     ANN: 'annotations/person_keypoints_val2017.json'
32 |   TEST:
33 |     TYPE: 'Mscoco_det'
34 |     ROOT: './data/coco/'
35 |     IMG_PREFIX: 'val2017'
36 |     DET_FILE: './exp/json/test_det_yolo.json'
37 |     ANN: 'annotations/person_keypoints_val2017.json'
38 | DATA_PRESET:
39 |   TYPE: 'simple'
40 |   SIGMA: 2
41 |   NUM_JOINTS: 33
42 |   IMAGE_SIZE:
43 |   - 256
44 |   - 192
45 |   HEATMAP_SIZE:
46 |   - 64
47 |   - 48
48 | MODEL:
49 |   TYPE: 'FastPose'
50 |   PRETRAINED: ''
51 |   TRY_LOAD: ''
52 |   NUM_DECONV_FILTERS:
53 |   - 256
54 |   - 256
55 |   - 256
56 |   NUM_LAYERS: 50
57 | LOSS:
58 |   TYPE: 'MSELoss'
59 | DETECTOR:
60 |   NAME: 'yolo'
61 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
62 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
63 |   NMS_THRES: 0.6
64 |   CONFIDENCE: 0.05
65 | TRAIN:
66 |   WORLD_SIZE: 4
67 |   BATCH_SIZE: 32
68 |   BEGIN_EPOCH: 0
69 |   END_EPOCH: 200
70 |   OPTIMIZER: 'adam'
71 |   LR: 0.001
72 |   LR_FACTOR: 0.1
73 |   LR_STEP:
74 |   - 90
75 |   - 120
76 |   DPG_MILESTONE: 140
77 |   DPG_STEP:
78 |   - 160
79 |   - 190


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_1x-duc.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose_DUC'
36 |   BACKBONE: 'shuffle'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 |   FINAL_CONV_KERNEL: 1
45 |   STAGE1:
46 |     NUM_CONV: 4
47 |   STAGE2:
48 |     NUM_CONV: 2
49 |   STAGE3:
50 |     NUM_CONV: 1
51 | LOSS:
52 |   TYPE: 'MSELoss'
53 | DETECTOR:
54 |   NAME: 'yolo'
55 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
56 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
57 |   NMS_THRES: 0.6
58 |   CONFIDENCE: 0.05
59 | TRAIN:
60 |   WORLD_SIZE: 4
61 |   BATCH_SIZE: 32
62 |   BEGIN_EPOCH: 0
63 |   END_EPOCH: 200
64 |   OPTIMIZER: 'adam'
65 |   LR: 0.001
66 |   LR_FACTOR: 0.1
67 |   LR_STEP:
68 |   - 90
69 |   - 120
70 |   DPG_MILESTONE: 140
71 |   DPG_STEP:
72 |   - 160
73 |   - 190


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_1x-simple.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'SimplePose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.1
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   DPG_MILESTONE: 140
63 |   DPG_STEP:
64 |   - 160
65 |   - 190


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_1x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   DPG_MILESTONE: 140
63 |   DPG_STEP:
64 |   - 160
65 |   - 190
66 | 


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_2x-dcn.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 |   DCN:
44 |     MODULATED: false
45 |     DEFORM_GROUP: 1
46 |     FALLBACK_ON_STRIDE: false
47 |   STAGE_WITH_DCN:
48 |   - false
49 |   - true
50 |   - true
51 |   - true
52 | LOSS:
53 |   TYPE: 'MSELoss'
54 | DETECTOR:
55 |   NAME: 'yolo'
56 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
57 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
58 |   NMS_THRES: 0.6
59 |   CONFIDENCE: 0.05
60 | TRAIN:
61 |   WORLD_SIZE: 4
62 |   BATCH_SIZE: 32
63 |   BEGIN_EPOCH: 0
64 |   END_EPOCH: 270
65 |   OPTIMIZER: 'adam'
66 |   LR: 0.001
67 |   LR_FACTOR: 0.1
68 |   LR_STEP:
69 |   - 170
70 |   - 200
71 |   DPG_MILESTONE: 210
72 |   DPG_STEP:
73 |   - 230
74 |   - 250


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_2x-regression.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'L1JointRegression'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 17
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 | LOSS:
45 |   TYPE: 'L1JointRegression'
46 |   NORM_TYPE: 'sigmoid'
47 |   OUTPUT_3D: False
48 | DETECTOR:
49 |   NAME: 'yolo'
50 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
51 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
52 |   NMS_THRES: 0.6
53 |   CONFIDENCE: 0.05
54 | TRAIN:
55 |   WORLD_SIZE: 4
56 |   BATCH_SIZE: 32
57 |   BEGIN_EPOCH: 0
58 |   END_EPOCH: 270
59 |   OPTIMIZER: 'adam'
60 |   LR: 0.001
61 |   LR_FACTOR: 0.1
62 |   LR_STEP:
63 |   - 170
64 |   - 200
65 |   DPG_MILESTONE: 210
66 |   DPG_STEP:
67 |   - 230
68 |   - 250


--------------------------------------------------------------------------------
/configs/coco/resnet/256x192_res50_lr1e-3_2x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Mscoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Mscoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Mscoco_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 17
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 270
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 170
61 |   - 200
62 |   DPG_MILESTONE: 210
63 |   DPG_STEP:
64 |   - 230
65 |   - 250


--------------------------------------------------------------------------------
/configs/dense_coco/resnet50/256x192_adam_lr1e-3-duc-dcn_1x_crop.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'densecoco'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017_dense.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 16
12 |       PROB_HALF_BODY: -1  
13 |   VAL:
14 |     TYPE: 'densecoco'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017_dense.json'
18 |   TEST:
19 |     TYPE: 'densecoco'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'test2017'
22 |     ANN: 'annotations/person_keypoints_val2017_dense.json'
23 | DATA_PRESET:
24 |   TYPE: 'simple'
25 |   SIGMA: 2
26 |   NUM_JOINTS: 17
27 |   NUM_JOINTS_DENSE: 49
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:  
35 |   TYPE: 'FastPose_DUC_Dense'
36 |   BACKBONE: 'se-resnet'  
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 |   FINAL_CONV_KERNEL: 1
45 |   STAGE1:
46 |     NUM_CONV: 4
47 |   STAGE2:
48 |     NUM_CONV: 2
49 |   STAGE3:
50 |     NUM_CONV: 1 
51 |   DCN:
52 |     MODULATED: false
53 |     DEFORM_GROUP: 1
54 |     FALLBACK_ON_STRIDE: false
55 |   STAGE_WITH_DCN:
56 |   - false
57 |   - true
58 |   - true
59 |   - true
60 | LOSS:
61 |   TYPE: 'MSELoss'
62 | DETECTOR:
63 |   NAME: 'yolo'
64 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
65 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
66 |   NMS_THRES: 0.6
67 |   CONFIDENCE: 0.05
68 | TRAIN:
69 |   WORLD_SIZE: 4
70 |   BATCH_SIZE: 32
71 |   BEGIN_EPOCH: 0
72 |   END_EPOCH: 200
73 |   OPTIMIZER: 'adam'
74 |   LR: 0.001
75 |   LR_FACTOR: 0.1
76 |   LR_STEP:
77 |   - 90
78 |   - 120
79 |   DPG_MILESTONE: 140
80 |   DPG_STEP:
81 |   - 160
82 |   - 190


--------------------------------------------------------------------------------
/configs/halpe_136/hardnet/256x192_hard68_lr1e-3_1x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_136'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Halpe_136'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Halpe_136_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 136
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'HarDNetPose'
36 |   INIT_WEIGHTS: ''
37 |   PRETRAINED: ''
38 |   TRY_LOAD: ''
39 |   FINAL_CONV_KERNEL: 1
40 |   NUM_LAYERS: 68
41 |   DOWN_RATIO: 4
42 |   TRT: False
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   DPG_MILESTONE: 140
63 |   DPG_STEP:
64 |   - 160
65 |   - 190
66 | 


--------------------------------------------------------------------------------
/configs/halpe_136/resnet/256x192_res50_lr1e-3_1x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_136'
 4 |     ROOT: './data/coco/'
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: 'annotations/person_keypoints_train2017.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Halpe_136'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'annotations/person_keypoints_val2017.json'
18 |   TEST:
19 |     TYPE: 'Halpe_136_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'annotations/person_keypoints_val2017.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 136
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: ''
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 32
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 90
61 |   - 120
62 |   DPG_MILESTONE: 140
63 |   DPG_STEP:
64 |   - 160
65 |   - 190
66 | 


--------------------------------------------------------------------------------
/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_136'
 4 |     ROOT: ''
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: '/home/group3/hico-coco.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 45
10 |       SCALE_FACTOR: 0.35
11 |       NUM_JOINTS_HALF_BODY: 8
12 |       PROB_HALF_BODY: 0.3
13 |   VAL:
14 |     TYPE: 'Halpe_136'
15 |     ROOT: './data/coco/'
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: 'coco_val_full_finetuned.json'
18 |   TEST:
19 |     TYPE: 'Halpe_136_det'
20 |     ROOT: './data/coco/'
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: 'coco_val_full_finetuned.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   LOSS_TYPE: 'L1JointRegression'
27 |   SIGMA: 2
28 |   NUM_JOINTS: 136
29 |   IMAGE_SIZE:
30 |   - 256
31 |   - 192
32 |   HEATMAP_SIZE:
33 |   - 64
34 |   - 48
35 | MODEL:
36 |   TYPE: 'FastPose'
37 |   PRETRAINED: '/home/group3/newrepo/AlphaPose/exp/hico-coco-_regression-256x192_res50_lr1e-3_2x-regression-frei.yaml/model_3.pth'
38 |   TRY_LOAD: ''
39 |   NUM_DECONV_FILTERS:
40 |   - 256
41 |   - 256
42 |   - 256
43 |   NUM_LAYERS: 50
44 |   CONV_DIM: 256
45 | LOSS:
46 |   TYPE: 'L1JointRegression'
47 |   NORM_TYPE: 'sigmoid'
48 |   OUTPUT_3D: False
49 | DETECTOR:
50 |   NAME: 'yolo'
51 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
52 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
53 |   NMS_THRES: 0.6
54 |   CONFIDENCE: 0.05
55 | TRAIN:
56 |   WORLD_SIZE: 4
57 |   BATCH_SIZE: 48
58 |   BEGIN_EPOCH: 5
59 |   END_EPOCH: 270
60 |   OPTIMIZER: 'adam'
61 |   LR: 0.001
62 |   LR_FACTOR: 0.1
63 |   LR_STEP:
64 |   - 170
65 |   - 200
66 |   DPG_MILESTONE: 210
67 |   DPG_STEP:
68 |   - 230
69 |   - 250
70 | 


--------------------------------------------------------------------------------
/configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   TRAIN:
 3 |     TYPE: 'Halpe_26'
 4 |     ROOT: ''
 5 |     IMG_PREFIX: 'train2017'
 6 |     ANN: '/home/group3/hico_and_coco_26kpts.json'
 7 |     AUG:
 8 |       FLIP: true
 9 |       ROT_FACTOR: 40
10 |       SCALE_FACTOR: 0.3
11 |       NUM_JOINTS_HALF_BODY: 11
12 |       PROB_HALF_BODY: -1
13 |   VAL:
14 |     TYPE: 'Halpe_26'
15 |     ROOT: ''
16 |     IMG_PREFIX: 'val2017'
17 |     ANN: '/home/group3/coco_val_full_26.json'
18 |   TEST:
19 |     TYPE: 'Halpe_26'
20 |     ROOT: ''
21 |     IMG_PREFIX: 'val2017'
22 |     DET_FILE: './exp/json/test_det_yolo.json'
23 |     ANN: '/home/group3/coco_val_full_26.json'
24 | DATA_PRESET:
25 |   TYPE: 'simple'
26 |   SIGMA: 2
27 |   NUM_JOINTS: 26
28 |   IMAGE_SIZE:
29 |   - 256
30 |   - 192
31 |   HEATMAP_SIZE:
32 |   - 64
33 |   - 48
34 | MODEL:
35 |   TYPE: 'FastPose'
36 |   PRETRAINED: ''
37 |   TRY_LOAD: '/home/group3/AlphaPose/exp/pami_hico_and_coco-256x192_res50_lr1e-3_1x.yaml/model_6.pth'
38 |   NUM_DECONV_FILTERS:
39 |   - 256
40 |   - 256
41 |   - 256
42 |   NUM_LAYERS: 50
43 | LOSS:
44 |   TYPE: 'MSELoss'
45 | DETECTOR:
46 |   NAME: 'yolo'
47 |   CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg'
48 |   WEIGHTS: 'detector/yolo/data/yolov3-spp.weights'
49 |   NMS_THRES: 0.6
50 |   CONFIDENCE: 0.05
51 | TRAIN:
52 |   WORLD_SIZE: 4
53 |   BATCH_SIZE: 48
54 |   BEGIN_EPOCH: 0
55 |   END_EPOCH: 200
56 |   OPTIMIZER: 'adam'
57 |   LR: 0.001
58 |   LR_FACTOR: 0.1
59 |   LR_STEP:
60 |   - 50
61 |   - 70
62 |   DPG_MILESTONE: 90
63 |   DPG_STEP:
64 |   - 110
65 |   - 130
66 | 


--------------------------------------------------------------------------------
/detector/apis.py:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------
 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved.
 3 | # Written by Chao Xu (xuchao.19962007@sjtu.edu.cn)
 4 | # -----------------------------------------------------
 5 | 
 6 | """API of detector"""
 7 | from abc import ABC, abstractmethod
 8 | 
 9 | 
10 | def get_detector(opt=None):
11 |     if opt.detector == 'yolo':
12 |         from detector.yolo_api import YOLODetector
13 |         from detector.yolo_cfg import cfg
14 |         return YOLODetector(cfg, opt)
15 |     elif opt.detector == 'tracker':
16 |         from detector.tracker_api import Tracker
17 |         from detector.tracker_cfg import cfg
18 |         return Tracker(cfg, opt)
19 |     elif opt.detector.startswith('efficientdet_d'):
20 |         from detector.effdet_api import EffDetDetector
21 |         from detector.effdet_cfg import cfg
22 |         return EffDetDetector(cfg, opt)
23 |     else:
24 |         raise NotImplementedError
25 | 
26 | 
27 | class BaseDetector(ABC):
28 |     def __init__(self):
29 |         pass
30 | 
31 |     @abstractmethod
32 |     def image_preprocess(self, img_name):
33 |         pass
34 | 
35 |     @abstractmethod
36 |     def images_detection(self, imgs, orig_dim_list):
37 |         pass
38 | 
39 |     @abstractmethod
40 |     def detect_one_img(self, img_name):
41 |         pass
42 | 


--------------------------------------------------------------------------------
/detector/effdet_cfg.py:
--------------------------------------------------------------------------------
1 | from easydict import EasyDict as edict
2 | 
3 | cfg = edict()
4 | 
5 | cfg.NMS_THRES =  0.6  # 0.6(0.713) 0.5(0.707)
6 | cfg.CONFIDENCE = 0.2  # 0.15       0.1
7 | cfg.NUM_CLASSES = 80
8 | cfg.MAX_DETECTIONS = 200  # 100
9 | 


--------------------------------------------------------------------------------
/detector/efficientdet/README.md:
--------------------------------------------------------------------------------
1 | # A PyTorch implementation of a EfficientDet Object Detector
2 | 
3 | Forked and modified from https://github.com/rwightman/efficientdet-pytorch, many thanks!
4 | 


--------------------------------------------------------------------------------
/detector/efficientdet/effdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .efficientdet import EfficientDet
2 | from .bench import DetBenchEval, DetBenchTrain
3 | from .config.config import get_efficientdet_config
4 | from .helpers import load_checkpoint, load_pretrained


--------------------------------------------------------------------------------
/detector/efficientdet/effdet/helpers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | from collections import OrderedDict
 4 | try:
 5 |     from torch.hub import load_state_dict_from_url
 6 | except ImportError:
 7 |     from torch.utils.model_zoo import load_url as load_state_dict_from_url
 8 | 
 9 | 
10 | def load_checkpoint(model, checkpoint_path):
11 |     if checkpoint_path and os.path.isfile(checkpoint_path):
12 |         print("=> Loading checkpoint '{}'".format(checkpoint_path))
13 |         checkpoint = torch.load(checkpoint_path)
14 |         if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
15 |             new_state_dict = OrderedDict()
16 |             for k, v in checkpoint['state_dict'].items():
17 |                 if k.startswith('module'):
18 |                     name = k[7:]  # remove `module.`
19 |                 else:
20 |                     name = k
21 |                 new_state_dict[name] = v
22 |             model.load_state_dict(new_state_dict)
23 |         else:
24 |             model.load_state_dict(checkpoint)
25 |         print("=> Loaded checkpoint '{}'".format(checkpoint_path))
26 |     else:
27 |         print("=> Error: No checkpoint found at '{}'".format(checkpoint_path))
28 |         raise FileNotFoundError()
29 | 
30 | 
31 | def load_pretrained(model, url, filter_fn=None, strict=True):
32 |     if not url:
33 |         print("=> Warning: Pretrained model URL is empty, using random initialization.")
34 |         return
35 |     state_dict = load_state_dict_from_url(url, progress=False, map_location='cpu')
36 |     if filter_fn is not None:
37 |         state_dict = filter_fn(state_dict)
38 |     model.load_state_dict(state_dict, strict=strict)
39 | 


--------------------------------------------------------------------------------
/detector/efficientdet/effdet/object_detection/README.md:
--------------------------------------------------------------------------------
1 | # Tensorflow Object Detection
2 | 
3 | All of this code is adapted/ported/copied from https://github.com/google/automl/tree/552d0facd14f4fe9205a67fb13ecb5690a4d1c94/efficientdet/object_detection


--------------------------------------------------------------------------------
/detector/efficientdet/effdet/object_detection/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Research. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | # Object detection data loaders and libraries are mostly based on RetinaNet:
16 | # https://github.com/tensorflow/tpu/tree/master/models/official/retinanet
17 | 


--------------------------------------------------------------------------------
/detector/efficientdet/effdet/object_detection/faster_rcnn_box_coder.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google Research. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Faster RCNN box coder.
 16 | 
 17 | Faster RCNN box coder follows the coding schema described below:
 18 |   ty = (y - ya) / ha
 19 |   tx = (x - xa) / wa
 20 |   th = log(h / ha)
 21 |   tw = log(w / wa)
 22 |   where x, y, w, h denote the box's center coordinates, width and height
 23 |   respectively. Similarly, xa, ya, wa, ha denote the anchor's center
 24 |   coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
 25 |   center, width and height respectively.
 26 | 
 27 |   See http://arxiv.org/abs/1506.01497 for details.
 28 | """
 29 | 
 30 | import torch
 31 | 
 32 | from . import box_coder
 33 | from . import box_list
 34 | 
 35 | EPS = 1e-8
 36 | 
 37 | 
 38 | class FasterRcnnBoxCoder(box_coder.BoxCoder):
 39 |     """Faster RCNN box coder."""
 40 | 
 41 |     def __init__(self, scale_factors=None):
 42 |         """Constructor for FasterRcnnBoxCoder.
 43 | 
 44 |         Args:
 45 |             scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
 46 |                 If set to None, does not perform scaling. For Faster RCNN,
 47 |                 the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
 48 |         """
 49 |         if scale_factors:
 50 |             assert len(scale_factors) == 4
 51 |             for scalar in scale_factors:
 52 |                 assert scalar > 0
 53 |         self._scale_factors = scale_factors
 54 | 
 55 |     @property
 56 |     def code_size(self):
 57 |         return 4
 58 | 
 59 |     def _encode(self, boxes, anchors):
 60 |         """Encode a box collection with respect to anchor collection.
 61 | 
 62 |         Args:
 63 |             boxes: BoxList holding N boxes to be encoded.
 64 |             anchors: BoxList of anchors.
 65 | 
 66 |         Returns:
 67 |             a tensor representing N anchor-encoded boxes of the format [ty, tx, th, tw].
 68 |         """
 69 |         # Convert anchors to the center coordinate representation.
 70 |         ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
 71 |         ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
 72 |         # Avoid NaN in division and log below.
 73 |         ha += EPS
 74 |         wa += EPS
 75 |         h += EPS
 76 |         w += EPS
 77 | 
 78 |         tx = (xcenter - xcenter_a) / wa
 79 |         ty = (ycenter - ycenter_a) / ha
 80 |         tw = torch.log(w / wa)
 81 |         th = torch.log(h / ha)
 82 |         # Scales location targets as used in paper for joint training.
 83 |         if self._scale_factors:
 84 |             ty *= self._scale_factors[0]
 85 |             tx *= self._scale_factors[1]
 86 |             th *= self._scale_factors[2]
 87 |             tw *= self._scale_factors[3]
 88 |         return torch.stack([ty, tx, th, tw]).T
 89 | 
 90 |     def _decode(self, rel_codes, anchors):
 91 |         """Decode relative codes to boxes.
 92 | 
 93 |         Args:
 94 |             rel_codes: a tensor representing N anchor-encoded boxes.
 95 |             anchors: BoxList of anchors.
 96 | 
 97 |         Returns:
 98 |             boxes: BoxList holding N bounding boxes.
 99 |         """
100 |         ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
101 | 
102 |         ty, tx, th, tw = rel_codes.T.unbind()
103 |         if self._scale_factors:
104 |             ty /= self._scale_factors[0]
105 |             tx /= self._scale_factors[1]
106 |             th /= self._scale_factors[2]
107 |             tw /= self._scale_factors[3]
108 |         w = torch.exp(tw) * wa
109 |         h = torch.exp(th) * ha
110 |         ycenter = ty * ha + ycenter_a
111 |         xcenter = tx * wa + xcenter_a
112 |         ymin = ycenter - h / 2.
113 |         xmin = xcenter - w / 2.
114 |         ymax = ycenter + h / 2.
115 |         xmax = xcenter + w / 2.
116 |         return box_list.BoxList(torch.stack([ymin, xmin, ymax, xmax]).T)
117 | 


--------------------------------------------------------------------------------
/detector/efficientdet/weights/get_models.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/efficientdet/weights/get_models.sh


--------------------------------------------------------------------------------
/detector/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 | 
3 | __all__ = ['nms', 'soft_nms']
4 | 


--------------------------------------------------------------------------------
/detector/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from . import nms_cpu, nms_cuda
 5 | from .soft_nms_cpu import soft_nms_cpu
 6 | 
 7 | 
 8 | def nms(dets, iou_thr, device_id=None):
 9 |     """Dispatch to either CPU or GPU NMS implementations.
10 | 
11 |     The input can be either a torch tensor or numpy array. GPU NMS will be used
12 |     if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
13 |     will be used. The returned type will always be the same as inputs.
14 | 
15 |     Arguments:
16 |         dets (torch.Tensor or np.ndarray): bboxes with scores.
17 |         iou_thr (float): IoU threshold for NMS.
18 |         device_id (int, optional): when `dets` is a numpy array, if `device_id`
19 |             is None, then cpu nms is used, otherwise gpu_nms will be used.
20 | 
21 |     Returns:
22 |         tuple: kept bboxes and indice, which is always the same data type as
23 |             the input.
24 |     """
25 |     # convert dets (tensor or numpy array) to tensor
26 |     if isinstance(dets, torch.Tensor):
27 |         is_numpy = False
28 |         dets_th = dets.to('cpu')
29 |     elif isinstance(dets, np.ndarray):
30 |         is_numpy = True
31 |         device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
32 |         dets_th = torch.from_numpy(dets).to(device)
33 |     else:
34 |         raise TypeError(
35 |             'dets must be either a Tensor or numpy array, but got {}'.format(
36 |                 type(dets)))
37 | 
38 |     # execute cpu or cuda nms
39 |     if dets_th.shape[0] == 0:
40 |         inds = dets_th.new_zeros(0, dtype=torch.long)
41 |     else:
42 |         if dets_th.is_cuda:
43 |             inds = nms_cuda.nms(dets_th, iou_thr)
44 |         else:
45 |             inds = nms_cpu.nms(dets_th, iou_thr)
46 | 
47 |     if is_numpy:
48 |         inds = inds.cpu().numpy()
49 |     return dets[inds, :], inds
50 | 
51 | 
52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
53 |     if isinstance(dets, torch.Tensor):
54 |         is_tensor = True
55 |         dets_np = dets.detach().cpu().numpy()
56 |     elif isinstance(dets, np.ndarray):
57 |         is_tensor = False
58 |         dets_np = dets
59 |     else:
60 |         raise TypeError(
61 |             'dets must be either a Tensor or numpy array, but got {}'.format(
62 |                 type(dets)))
63 | 
64 |     method_codes = {'linear': 1, 'gaussian': 2}
65 |     if method not in method_codes:
66 |         raise ValueError('Invalid method for SoftNMS: {}'.format(method))
67 |     new_dets, inds = soft_nms_cpu(
68 |         dets_np,
69 |         iou_thr,
70 |         method=method_codes[method],
71 |         sigma=sigma,
72 |         min_score=min_score)
73 | 
74 |     if is_tensor:
75 |         return dets.new_tensor(new_dets), dets.new_tensor(
76 |             inds, dtype=torch.long)
77 |     else:
78 |         return new_dets.astype(np.float32), inds.astype(np.int64)
79 | 


--------------------------------------------------------------------------------
/detector/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | template <typename scalar_t>
 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
 6 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
 7 | 
 8 |   if (dets.numel() == 0) {
 9 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
10 |   }
11 | 
12 |   auto x1_t = dets.select(1, 0).contiguous();
13 |   auto y1_t = dets.select(1, 1).contiguous();
14 |   auto x2_t = dets.select(1, 2).contiguous();
15 |   auto y2_t = dets.select(1, 3).contiguous();
16 |   auto scores = dets.select(1, 4).contiguous();
17 | 
18 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
19 | 
20 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
21 | 
22 |   auto ndets = dets.size(0);
23 |   at::Tensor suppressed_t =
24 |       at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
25 | 
26 |   auto suppressed = suppressed_t.data<uint8_t>();
27 |   auto order = order_t.data<int64_t>();
28 |   auto x1 = x1_t.data<scalar_t>();
29 |   auto y1 = y1_t.data<scalar_t>();
30 |   auto x2 = x2_t.data<scalar_t>();
31 |   auto y2 = y2_t.data<scalar_t>();
32 |   auto areas = areas_t.data<scalar_t>();
33 | 
34 |   for (int64_t _i = 0; _i < ndets; _i++) {
35 |     auto i = order[_i];
36 |     if (suppressed[i] == 1) continue;
37 |     auto ix1 = x1[i];
38 |     auto iy1 = y1[i];
39 |     auto ix2 = x2[i];
40 |     auto iy2 = y2[i];
41 |     auto iarea = areas[i];
42 | 
43 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
44 |       auto j = order[_j];
45 |       if (suppressed[j] == 1) continue;
46 |       auto xx1 = std::max(ix1, x1[j]);
47 |       auto yy1 = std::max(iy1, y1[j]);
48 |       auto xx2 = std::min(ix2, x2[j]);
49 |       auto yy2 = std::min(iy2, y2[j]);
50 | 
51 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
52 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
53 |       auto inter = w * h;
54 |       auto ovr = inter / (iarea + areas[j] - inter);
55 |       if (ovr >= threshold) suppressed[j] = 1;
56 |     }
57 |   }
58 |   return at::nonzero(suppressed_t == 0).squeeze(1);
59 | }
60 | 
61 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
62 |   at::Tensor result;
63 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
64 |     result = nms_cpu_kernel<scalar_t>(dets, threshold);
65 |   });
66 |   return result;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("nms", &nms, "non-maximum suppression");
71 | }


--------------------------------------------------------------------------------
/detector/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda(dets, threshold);
13 | }
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("nms", &nms, "non-maximum suppression");
17 | }


--------------------------------------------------------------------------------
/detector/nms/src/soft_nms_cpu.pyx:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------
  2 | # Soft-NMS: Improving Object Detection With One Line of Code
  3 | # Copyright (c) University of Maryland, College Park
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Navaneeth Bodla and Bharat Singh
  6 | # Modified by Kai Chen
  7 | # ----------------------------------------------------------
  8 | 
  9 | # cython: language_level=3, boundscheck=False
 10 | 
 11 | import numpy as np
 12 | cimport numpy as np
 13 | 
 14 | 
 15 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 16 |     return a if a >= b else b
 17 | 
 18 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 19 |     return a if a <= b else b
 20 | 
 21 | 
 22 | def soft_nms_cpu(
 23 |     np.ndarray[float, ndim=2] boxes_in,
 24 |     float iou_thr,
 25 |     unsigned int method=1,
 26 |     float sigma=0.5,
 27 |     float min_score=0.001,
 28 | ):
 29 |     boxes = boxes_in.copy()
 30 |     cdef int N = boxes.shape[0]
 31 |     cdef float iw, ih, box_area
 32 |     cdef float ua
 33 |     cdef int pos = 0
 34 |     cdef float maxscore = 0
 35 |     cdef int maxpos = 0
 36 |     cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
 37 |     inds = np.arange(N)
 38 | 
 39 |     for i in range(N):
 40 |         maxscore = boxes[i, 4]
 41 |         maxpos = i
 42 | 
 43 |         tx1 = boxes[i, 0]
 44 |         ty1 = boxes[i, 1]
 45 |         tx2 = boxes[i, 2]
 46 |         ty2 = boxes[i, 3]
 47 |         ts = boxes[i, 4]
 48 |         ti = inds[i]
 49 | 
 50 |         pos = i + 1
 51 |         # get max box
 52 |         while pos < N:
 53 |             if maxscore < boxes[pos, 4]:
 54 |                 maxscore = boxes[pos, 4]
 55 |                 maxpos = pos
 56 |             pos = pos + 1
 57 | 
 58 |         # add max box as a detection
 59 |         boxes[i, 0] = boxes[maxpos, 0]
 60 |         boxes[i, 1] = boxes[maxpos, 1]
 61 |         boxes[i, 2] = boxes[maxpos, 2]
 62 |         boxes[i, 3] = boxes[maxpos, 3]
 63 |         boxes[i, 4] = boxes[maxpos, 4]
 64 |         inds[i] = inds[maxpos]
 65 | 
 66 |         # swap ith box with position of max box
 67 |         boxes[maxpos, 0] = tx1
 68 |         boxes[maxpos, 1] = ty1
 69 |         boxes[maxpos, 2] = tx2
 70 |         boxes[maxpos, 3] = ty2
 71 |         boxes[maxpos, 4] = ts
 72 |         inds[maxpos] = ti
 73 | 
 74 |         tx1 = boxes[i, 0]
 75 |         ty1 = boxes[i, 1]
 76 |         tx2 = boxes[i, 2]
 77 |         ty2 = boxes[i, 3]
 78 |         ts = boxes[i, 4]
 79 | 
 80 |         pos = i + 1
 81 |         # NMS iterations, note that N changes if detection boxes fall below
 82 |         # threshold
 83 |         while pos < N:
 84 |             x1 = boxes[pos, 0]
 85 |             y1 = boxes[pos, 1]
 86 |             x2 = boxes[pos, 2]
 87 |             y2 = boxes[pos, 3]
 88 |             s = boxes[pos, 4]
 89 | 
 90 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
 91 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
 92 |             if iw > 0:
 93 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
 94 |                 if ih > 0:
 95 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
 96 |                     ov = iw * ih / ua  # iou between max box and detection box
 97 | 
 98 |                     if method == 1:  # linear
 99 |                         if ov > iou_thr:
100 |                             weight = 1 - ov
101 |                         else:
102 |                             weight = 1
103 |                     elif method == 2:  # gaussian
104 |                         weight = np.exp(-(ov * ov) / sigma)
105 |                     else:  # original NMS
106 |                         if ov > iou_thr:
107 |                             weight = 0
108 |                         else:
109 |                             weight = 1
110 | 
111 |                     boxes[pos, 4] = weight * boxes[pos, 4]
112 | 
113 |                     # if box score falls below threshold, discard the box by
114 |                     # swapping with last box update N
115 |                     if boxes[pos, 4] < min_score:
116 |                         boxes[pos, 0] = boxes[N-1, 0]
117 |                         boxes[pos, 1] = boxes[N-1, 1]
118 |                         boxes[pos, 2] = boxes[N-1, 2]
119 |                         boxes[pos, 3] = boxes[N-1, 3]
120 |                         boxes[pos, 4] = boxes[N-1, 4]
121 |                         inds[pos] = inds[N - 1]
122 |                         N = N - 1
123 |                         pos = pos - 1
124 | 
125 |             pos = pos + 1
126 | 
127 |     return boxes[:N], inds[:N]
128 | 


--------------------------------------------------------------------------------
/detector/tracker/README.md:
--------------------------------------------------------------------------------
1 | ## Introduction
2 | MOT Tracker adapted from [Towards-Realtime-MOT](https://github.com/Zhongdao/Towards-Realtime-MOT), many thanks to their wonderful work!
3 | 
4 | #### Getting started
5 | Download detector [JDE-1088x608](https://github.com/Zhongdao/Towards-Realtime-MOT#pretrained-model-and-baseline-models) and place it under `AlphaPose/detector/tracker/data/`
6 | 
7 | Enable tracking by setting the detector as tracker: `--detector tracker`


--------------------------------------------------------------------------------
/detector/tracker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/tracker/__init__.py


--------------------------------------------------------------------------------
/detector/tracker/cfg/ccmcpe.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "root":"/home/wangzd/datasets/MOT",
 3 |     "train":
 4 |     {
 5 |         "mot17":"./data/mot17.train",
 6 |         "caltech":"./data/caltech.train",
 7 |         "citypersons":"./data/citypersons.train",
 8 |         "cuhksysu":"./data/cuhksysu.train",
 9 |         "prw":"./data/prw.train",
10 |         "eth":"./data/eth.train"
11 |     },
12 |     "test_emb":
13 |     {
14 |         "caltech":"./data/caltech.10k.val",
15 |         "cuhksysu":"./data/cuhksysu.val",
16 |         "prw":"./data/prw.val"
17 |     },
18 |     "test":
19 |     {
20 |         "mot19":"./data/mot19.train",
21 |         "caltech":"./data/caltech.val",
22 |         "citypersons":"./data/citypersons.val"
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/detector/tracker/preprocess.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | import numpy as np
 8 | import cv2
 9 | import matplotlib.pyplot as plt
10 | try:
11 |     from util import count_parameters as count
12 |     from util import convert2cpu as cpu
13 | except ImportError:
14 |     from yolo.util import count_parameters as count
15 |     from yolo.util import convert2cpu as cpu
16 | from PIL import Image, ImageDraw
17 | 
18 | 
19 | def letterbox_image(img, img_size=(1088, 608), color=(127.5, 127.5, 127.5)):  
20 |     # resize a rectangular image to a padded rectangular 
21 |     height=img_size[1]
22 |     width=img_size[0]
23 |     shape = img.shape[:2]  # shape = [height, width]
24 |     ratio = min(float(height)/shape[0], float(width)/shape[1])
25 |     new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height]
26 |     dw = (width - new_shape[0]) / 2  # width padding
27 |     dh = (height - new_shape[1]) / 2  # height padding
28 |     top, bottom = round(dh - 0.1), round(dh + 0.1)
29 |     left, right = round(dw - 0.1), round(dw + 0.1)
30 |     img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
31 |     img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded rectangular
32 |     return img
33 | 
34 | 
35 | def prep_image(img, img_size=(1088, 608)):
36 |     """
37 |     Prepare image for inputting to the neural network.
38 | 
39 |     Returns a Variable
40 |     """
41 | 
42 |     orig_im = cv2.imread(img)
43 |     dim = orig_im.shape[1], orig_im.shape[0]
44 |     img = (letterbox_image(orig_im, img_size))
45 |     img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
46 |     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
47 |     return img_, orig_im, dim
48 | 
49 | 
50 | def prep_frame(img, img_size=(1088, 608)):
51 |     """
52 |     Prepare image for inputting to the neural network.
53 | 
54 |     Returns a Variable
55 |     """
56 | 
57 |     orig_im = img
58 |     dim = orig_im.shape[1], orig_im.shape[0]
59 |     img = (letterbox_image(orig_im, img_size))
60 |     img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
61 |     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
62 |     return img_, orig_im, dim
63 | 
64 | 


--------------------------------------------------------------------------------
/detector/tracker/tracker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/tracker/tracker/__init__.py


--------------------------------------------------------------------------------
/detector/tracker/tracker/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 
54 | 


--------------------------------------------------------------------------------
/detector/tracker/tracker/matching.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import scipy
  4 | from scipy.spatial.distance import cdist
  5 | from scipy.optimize import linear_sum_assignment
  6 | 
  7 | from cython_bbox import bbox_overlaps as bbox_ious
  8 | from tracker.utils import kalman_filter
  9 | import time
 10 | 
 11 | def merge_matches(m1, m2, shape):
 12 |     O,P,Q = shape
 13 |     m1 = np.asarray(m1)
 14 |     m2 = np.asarray(m2)
 15 | 
 16 |     M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
 17 |     M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
 18 | 
 19 |     mask = M1*M2
 20 |     match = mask.nonzero()
 21 |     match = list(zip(match[0], match[1]))
 22 |     unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
 23 |     unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
 24 | 
 25 |     return match, unmatched_O, unmatched_Q
 26 | 
 27 | 
 28 | def _indices_to_matches(cost_matrix, indices, thresh):
 29 |     matched_cost = cost_matrix[tuple(zip(*indices))]
 30 |     matched_mask = (matched_cost <= thresh)
 31 | 
 32 |     matches = indices[matched_mask]
 33 |     unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
 34 |     unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
 35 | 
 36 |     return matches, unmatched_a, unmatched_b
 37 | 
 38 | 
 39 | def linear_assignment(cost_matrix, thresh):
 40 |     """
 41 |     Simple linear assignment
 42 |     :type cost_matrix: np.ndarray
 43 |     :type thresh: float
 44 |     :return: matches, unmatched_a, unmatched_b
 45 |     """
 46 |     if cost_matrix.size == 0:
 47 |         return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
 48 | 
 49 |     cost_matrix[cost_matrix > thresh] = thresh + 1e-4
 50 |     row_ind, col_ind = linear_sum_assignment(cost_matrix)
 51 |     indices = np.column_stack((row_ind, col_ind))
 52 | 
 53 |     return _indices_to_matches(cost_matrix, indices, thresh)
 54 | 
 55 | 
 56 | def ious(atlbrs, btlbrs):
 57 |     """
 58 |     Compute cost based on IoU
 59 |     :type atlbrs: list[tlbr] | np.ndarray
 60 |     :type atlbrs: list[tlbr] | np.ndarray
 61 | 
 62 |     :rtype ious np.ndarray
 63 |     """
 64 |     ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
 65 |     if ious.size == 0:
 66 |         return ious
 67 | 
 68 |     ious = bbox_ious(
 69 |         np.ascontiguousarray(atlbrs, dtype=np.float),
 70 |         np.ascontiguousarray(btlbrs, dtype=np.float)
 71 |     )
 72 | 
 73 |     return ious
 74 | 
 75 | 
 76 | def iou_distance(atracks, btracks):
 77 |     """
 78 |     Compute cost based on IoU
 79 |     :type atracks: list[STrack]
 80 |     :type btracks: list[STrack]
 81 | 
 82 |     :rtype cost_matrix np.ndarray
 83 |     """
 84 | 
 85 |     if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
 86 |         atlbrs = atracks
 87 |         btlbrs = btracks
 88 |     else:
 89 |         atlbrs = [track.tlbr for track in atracks]
 90 |         btlbrs = [track.tlbr for track in btracks]
 91 |     _ious = ious(atlbrs, btlbrs)
 92 |     cost_matrix = 1 - _ious
 93 | 
 94 |     return cost_matrix
 95 | 
 96 | def embedding_distance(tracks, detections, metric='cosine'):
 97 |     """
 98 |     :param tracks: list[STrack]
 99 |     :param detections: list[BaseTrack]
100 |     :param metric:
101 |     :return: cost_matrix np.ndarray
102 |     """
103 | 
104 |     cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
105 |     if cost_matrix.size == 0:
106 |         return cost_matrix
107 |     det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
108 |     for i, track in enumerate(tracks):
109 |         cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
110 |     return cost_matrix
111 | 
112 | 
113 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
114 |     if cost_matrix.size == 0:
115 |         return cost_matrix
116 |     gating_dim = 2 if only_position else 4
117 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
118 |     measurements = np.asarray([det.to_xyah() for det in detections])
119 |     for row, track in enumerate(tracks):
120 |         gating_distance = kf.gating_distance(
121 |             track.mean, track.covariance, measurements, only_position)
122 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
123 |     return cost_matrix
124 | 


--------------------------------------------------------------------------------
/detector/tracker/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/tracker/utils/__init__.py


--------------------------------------------------------------------------------
/detector/tracker/utils/evaluation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import copy
  4 | import motmetrics as mm
  5 | 
  6 | from utils.io import read_results, unzip_objs
  7 | 
  8 | 
  9 | class Evaluator(object):
 10 | 
 11 |     def __init__(self, data_root, seq_name, data_type):
 12 |         self.data_root = data_root
 13 |         self.seq_name = seq_name
 14 |         self.data_type = data_type
 15 | 
 16 |         self.load_annotations()
 17 |         self.reset_accumulator()
 18 | 
 19 |     def load_annotations(self):
 20 |         assert self.data_type == 'mot'
 21 | 
 22 |         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
 23 |         self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
 24 |         self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
 25 | 
 26 |     def reset_accumulator(self):
 27 |         self.acc = mm.MOTAccumulator(auto_id=True)
 28 | 
 29 |     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
 30 |         # results
 31 |         trk_tlwhs = np.copy(trk_tlwhs)
 32 |         trk_ids = np.copy(trk_ids)
 33 | 
 34 |         # gts
 35 |         gt_objs = self.gt_frame_dict.get(frame_id, [])
 36 |         gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
 37 | 
 38 |         # ignore boxes
 39 |         ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
 40 |         ignore_tlwhs = unzip_objs(ignore_objs)[0]
 41 | 
 42 |         # remove ignored results
 43 |         keep = np.ones(len(trk_tlwhs), dtype=bool)
 44 |         iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
 45 |         match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 46 |         match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 47 |         match_ious = iou_distance[match_is, match_js]
 48 | 
 49 |         match_js = np.asarray(match_js, dtype=int)
 50 |         match_js = match_js[np.logical_not(np.isnan(match_ious))]
 51 |         keep[match_js] = False
 52 |         trk_tlwhs = trk_tlwhs[keep]
 53 |         trk_ids = trk_ids[keep]
 54 | 
 55 |         # get distance matrix
 56 |         iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
 57 | 
 58 |         # acc
 59 |         self.acc.update(gt_ids, trk_ids, iou_distance)
 60 | 
 61 |         if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
 62 |             events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
 63 |         else:
 64 |             events = None
 65 |         return events
 66 | 
 67 |     def eval_file(self, filename):
 68 |         self.reset_accumulator()
 69 | 
 70 |         result_frame_dict = read_results(filename, self.data_type, is_gt=False)
 71 |         frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
 72 |         for frame_id in frames:
 73 |             trk_objs = result_frame_dict.get(frame_id, [])
 74 |             trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
 75 |             self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
 76 | 
 77 |         return self.acc
 78 | 
 79 |     @staticmethod
 80 |     def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
 81 |         names = copy.deepcopy(names)
 82 |         if metrics is None:
 83 |             metrics = mm.metrics.motchallenge_metrics
 84 |         metrics = copy.deepcopy(metrics)
 85 | 
 86 |         mh = mm.metrics.create()
 87 |         summary = mh.compute_many(
 88 |             accs,
 89 |             metrics=metrics,
 90 |             names=names,
 91 |             generate_overall=True
 92 |         )
 93 | 
 94 |         return summary
 95 | 
 96 |     @staticmethod
 97 |     def save_summary(summary, filename):
 98 |         import pandas as pd
 99 |         writer = pd.ExcelWriter(filename)
100 |         summary.to_excel(writer)
101 |         writer.save()
102 | 


--------------------------------------------------------------------------------
/detector/tracker/utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import numpy as np
  4 | 
  5 | from utils.log import logger
  6 | 
  7 | 
  8 | def write_results(filename, results_dict: Dict, data_type: str):
  9 |     if not filename:
 10 |         return
 11 |     path = os.path.dirname(filename)
 12 |     if not os.path.exists(path):
 13 |         os.makedirs(path)
 14 | 
 15 |     if data_type in ('mot', 'mcmot', 'lab'):
 16 |         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 17 |     elif data_type == 'kitti':
 18 |         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 19 |     else:
 20 |         raise ValueError(data_type)
 21 | 
 22 |     with open(filename, 'w') as f:
 23 |         for frame_id, frame_data in results_dict.items():
 24 |             if data_type == 'kitti':
 25 |                 frame_id -= 1
 26 |             for tlwh, track_id in frame_data:
 27 |                 if track_id < 0:
 28 |                     continue
 29 |                 x1, y1, w, h = tlwh
 30 |                 x2, y2 = x1 + w, y1 + h
 31 |                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
 32 |                 f.write(line)
 33 |     logger.info('Save results to {}'.format(filename))
 34 | 
 35 | 
 36 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
 37 |     if data_type in ('mot', 'lab'):
 38 |         read_fun = read_mot_results
 39 |     else:
 40 |         raise ValueError('Unknown data type: {}'.format(data_type))
 41 | 
 42 |     return read_fun(filename, is_gt, is_ignore)
 43 | 
 44 | 
 45 | """
 46 | labels={'ped', ...			% 1
 47 | 'person_on_vhcl', ...	% 2
 48 | 'car', ...				% 3
 49 | 'bicycle', ...			% 4
 50 | 'mbike', ...			% 5
 51 | 'non_mot_vhcl', ...		% 6
 52 | 'static_person', ...	% 7
 53 | 'distractor', ...		% 8
 54 | 'occluder', ...			% 9
 55 | 'occluder_on_grnd', ...		%10
 56 | 'occluder_full', ...		% 11
 57 | 'reflection', ...		% 12
 58 | 'crowd' ...			% 13
 59 | };
 60 | """
 61 | 
 62 | 
 63 | def read_mot_results(filename, is_gt, is_ignore):
 64 |     valid_labels = {1}
 65 |     ignore_labels = {2, 7, 8, 12}
 66 |     results_dict = dict()
 67 |     if os.path.isfile(filename):
 68 |         with open(filename, 'r') as f:
 69 |             for line in f.readlines():
 70 |                 linelist = line.split(',')
 71 |                 if len(linelist) < 7:
 72 |                     continue
 73 |                 fid = int(linelist[0])
 74 |                 if fid < 1:
 75 |                     continue
 76 |                 results_dict.setdefault(fid, list())
 77 | 
 78 |                 if is_gt:
 79 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 80 |                         label = int(float(linelist[7]))
 81 |                         mark = int(float(linelist[6]))
 82 |                         if mark == 0 or label not in valid_labels:
 83 |                             continue
 84 |                     score = 1
 85 |                 elif is_ignore:
 86 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 87 |                         label = int(float(linelist[7]))
 88 |                         vis_ratio = float(linelist[8])
 89 |                         if label not in ignore_labels and vis_ratio >= 0:
 90 |                             continue
 91 |                     else:
 92 |                         continue
 93 |                     score = 1
 94 |                 else:
 95 |                     score = float(linelist[6])
 96 | 
 97 |                 tlwh = tuple(map(float, linelist[2:6]))
 98 |                 target_id = int(linelist[1])
 99 | 
100 |                 results_dict[fid].append((tlwh, target_id, score))
101 | 
102 |     return results_dict
103 | 
104 | 
105 | def unzip_objs(objs):
106 |     if len(objs) > 0:
107 |         tlwhs, ids, scores = zip(*objs)
108 |     else:
109 |         tlwhs, ids, scores = [], [], []
110 |     tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
111 | 
112 |     return tlwhs, ids, scores


--------------------------------------------------------------------------------
/detector/tracker/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name='root'):
 5 |     formatter = logging.Formatter(
 6 |         # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
 7 |         fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
 8 | 
 9 |     handler = logging.StreamHandler()
10 |     handler.setFormatter(formatter)
11 | 
12 |     logger = logging.getLogger(name)
13 |     logger.setLevel(logging.DEBUG)
14 |     logger.addHandler(handler)
15 |     return logger
16 | 
17 | 
18 | logger = get_logger('root')
19 | 


--------------------------------------------------------------------------------
/detector/tracker/utils/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from utils import _C
4 | 
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 | 


--------------------------------------------------------------------------------
/detector/tracker/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | def parse_model_cfg(path):
 2 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 3 |     file = open(path, 'r')
 4 |     lines = file.read().split('\n')
 5 |     lines = [x for x in lines if x and not x.startswith('#')]
 6 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
 7 |     module_defs = []
 8 |     for line in lines:
 9 |         if line.startswith('['):  # This marks the start of a new block
10 |             module_defs.append({})
11 |             module_defs[-1]['type'] = line[1:-1].rstrip()
12 |             if module_defs[-1]['type'] == 'convolutional':
13 |                 module_defs[-1]['batch_normalize'] = 0
14 |         else:
15 |             key, value = line.split("=")
16 |             value = value.strip()
17 |             module_defs[-1][key.rstrip()] = value.strip()
18 | 
19 |     return module_defs
20 | 
21 | 
22 | def parse_data_cfg(path):
23 |     """Parses the data configuration file"""
24 |     options = dict()
25 |     options['gpus'] = '0'
26 |     options['num_workers'] = '10'
27 |     with open(path, 'r') as fp:
28 |         lines = fp.readlines()
29 |     for line in lines:
30 |         line = line.strip()
31 |         if line == '' or line.startswith('#'):
32 |             continue
33 |         key, value = line.split('=')
34 |         options[key.strip()] = value.strip()
35 |     return options
36 | 


--------------------------------------------------------------------------------
/detector/tracker/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |         self.duration = 0.
21 | 
22 |     def tic(self):
23 |         # using time.time instead of time.clock because time time.clock
24 |         # does not normalize for multithreading
25 |         self.start_time = time.time()
26 | 
27 |     def toc(self, average=True):
28 |         self.diff = time.time() - self.start_time
29 |         self.total_time += self.diff
30 |         self.calls += 1
31 |         self.average_time = self.total_time / self.calls
32 |         if average:
33 |             self.duration = self.average_time
34 |         else:
35 |             self.duration = self.diff
36 |         return self.duration
37 | 
38 |     def clear(self):
39 |         self.total_time = 0.
40 |         self.calls = 0
41 |         self.start_time = 0.
42 |         self.diff = 0.
43 |         self.average_time = 0.
44 |         self.duration = 0.
45 | 
46 | 


--------------------------------------------------------------------------------
/detector/tracker/utils/visualization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | 
 5 | def tlwhs_to_tlbrs(tlwhs):
 6 |     tlbrs = np.copy(tlwhs)
 7 |     if len(tlbrs) == 0:
 8 |         return tlbrs
 9 |     tlbrs[:, 2] += tlwhs[:, 0]
10 |     tlbrs[:, 3] += tlwhs[:, 1]
11 |     return tlbrs
12 | 
13 | 
14 | def get_color(idx):
15 |     idx = idx * 3
16 |     color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
17 | 
18 |     return color
19 | 
20 | 
21 | def resize_image(image, max_size=800):
22 |     if max(image.shape[:2]) > max_size:
23 |         scale = float(max_size) / max(image.shape[:2])
24 |         image = cv2.resize(image, None, fx=scale, fy=scale)
25 |     return image
26 | 
27 | 
28 | def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None):
29 |     im = np.ascontiguousarray(np.copy(image))
30 |     im_h, im_w = im.shape[:2]
31 | 
32 |     top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
33 | 
34 |     text_scale = max(1, image.shape[1] / 1600.)
35 |     text_thickness = 1 if text_scale > 1.1 else 1
36 |     line_thickness = max(1, int(image.shape[1] / 500.))
37 | 
38 |     radius = max(5, int(im_w/140.))
39 |     cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
40 |                 (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2)
41 | 
42 |     for i, tlwh in enumerate(tlwhs):
43 |         x1, y1, w, h = tlwh
44 |         intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
45 |         obj_id = int(obj_ids[i])
46 |         id_text = '{}'.format(int(obj_id))
47 |         if ids2 is not None:
48 |             id_text = id_text + ', {}'.format(int(ids2[i]))
49 |         _line_thickness = 1 if obj_id <= 0 else line_thickness
50 |         color = get_color(abs(obj_id))
51 |         cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
52 |         cv2.putText(im, id_text, (intbox[0], intbox[1] + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255),
53 |                     thickness=text_thickness)
54 |     return im
55 | 
56 | 
57 | def plot_trajectory(image, tlwhs, track_ids):
58 |     image = image.copy()
59 |     for one_tlwhs, track_id in zip(tlwhs, track_ids):
60 |         color = get_color(int(track_id))
61 |         for tlwh in one_tlwhs:
62 |             x1, y1, w, h = tuple(map(int, tlwh))
63 |             cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2)
64 | 
65 |     return image
66 | 
67 | 
68 | def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None):
69 |     im = np.copy(image)
70 |     text_scale = max(1, image.shape[1] / 800.)
71 |     thickness = 2 if text_scale > 1.3 else 1
72 |     for i, det in enumerate(tlbrs):
73 |         x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int)
74 |         if len(det) >= 7:
75 |             label = 'det' if det[5] > 0 else 'trk'
76 |             if ids is not None:
77 |                 text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i])
78 |                 cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
79 |                             thickness=thickness)
80 |             else:
81 |                 text = '{}# {:.2f}'.format(label, det[6])
82 | 
83 |         if scores is not None:
84 |             text = '{:.2f}'.format(scores[i])
85 |             cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
86 |                         thickness=thickness)
87 | 
88 |         cv2.rectangle(im, (x1, y1), (x2, y2), color, 2)
89 | 
90 |     return im
91 | 


--------------------------------------------------------------------------------
/detector/tracker_cfg.py:
--------------------------------------------------------------------------------
1 | from easydict import EasyDict as edict
2 | 
3 | cfg = edict()
4 | cfg.CONFIG = 'detector/tracker/cfg/yolov3.cfg'
5 | cfg.WEIGHTS = 'detector/tracker/data/jde.1088x608.uncertainty.pt'
6 | cfg.IMG_SIZE =  (1088, 608)
7 | cfg.NMS_THRES =  0.6
8 | cfg.CONFIDENCE = 0.4
9 | cfg.BUFFER_SIZE = 30 # frame buffer


--------------------------------------------------------------------------------
/detector/yolo/README.md:
--------------------------------------------------------------------------------
1 | # A PyTorch implementation of a YOLO v3 Object Detector
2 | 
3 | Forked from https://github.com/ayooshkathuria/pytorch-yolo-v3
4 | 


--------------------------------------------------------------------------------
/detector/yolo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/yolo/__init__.py


--------------------------------------------------------------------------------
/detector/yolo/bbox.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import torch 
  4 | import random
  5 | 
  6 | import numpy as np
  7 | import cv2
  8 | 
  9 | def confidence_filter(result, confidence):
 10 |     conf_mask = (result[:,:,4] > confidence).float().unsqueeze(2)
 11 |     result = result*conf_mask    
 12 |     
 13 |     return result
 14 | 
 15 | def confidence_filter_cls(result, confidence):
 16 |     max_scores = torch.max(result[:,:,5:25], 2)[0]
 17 |     res = torch.cat((result, max_scores),2)
 18 |     print(res.shape)
 19 |     
 20 |     
 21 |     cond_1 = (res[:,:,4] > confidence).float()
 22 |     cond_2 = (res[:,:,25] > 0.995).float()
 23 |     
 24 |     conf = cond_1 + cond_2
 25 |     conf = torch.clamp(conf, 0.0, 1.0)
 26 |     conf = conf.unsqueeze(2)
 27 |     result = result*conf   
 28 |     return result
 29 | 
 30 | 
 31 | 
 32 | def get_abs_coord(box):
 33 |     box[2], box[3] = abs(box[2]), abs(box[3])
 34 |     x1 = (box[0] - box[2]/2) - 1 
 35 |     y1 = (box[1] - box[3]/2) - 1 
 36 |     x2 = (box[0] + box[2]/2) - 1 
 37 |     y2 = (box[1] + box[3]/2) - 1
 38 |     return x1, y1, x2, y2
 39 |     
 40 | 
 41 | 
 42 | def sanity_fix(box):
 43 |     if (box[0] > box[2]):
 44 |         box[0], box[2] = box[2], box[0]
 45 |     
 46 |     if (box[1] >  box[3]):
 47 |         box[1], box[3] = box[3], box[1]
 48 |         
 49 |     return box
 50 | 
 51 | def bbox_iou(box1, box2, args=None):
 52 |     """
 53 |     Returns the IoU of two bounding boxes 
 54 |     
 55 |     
 56 |     """
 57 |     #Get the coordinates of bounding boxes
 58 |     b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
 59 |     b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
 60 |     
 61 |     #get the corrdinates of the intersection rectangle
 62 |     inter_rect_x1 =  torch.max(b1_x1, b2_x1)
 63 |     inter_rect_y1 =  torch.max(b1_y1, b2_y1)
 64 |     inter_rect_x2 =  torch.min(b1_x2, b2_x2)
 65 |     inter_rect_y2 =  torch.min(b1_y2, b2_y2)
 66 |     
 67 |     #Intersection area
 68 |     if not args:
 69 |         inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape).cuda())*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape).cuda())
 70 |     else:
 71 |         inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape).to(args.device))*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape).to(args.device))
 72 |     #Union Area
 73 |     b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
 74 |     b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
 75 |     
 76 |     iou = inter_area / (b1_area + b2_area - inter_area)
 77 |     
 78 |     return iou
 79 | 
 80 | 
 81 | def pred_corner_coord(prediction):
 82 |     #Get indices of non-zero confidence bboxes
 83 |     ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
 84 |     
 85 |     box = prediction[ind_nz[0], ind_nz[1]]
 86 |     
 87 |     
 88 |     box_a = box.new(box.shape)
 89 |     box_a[:,0] = (box[:,0] - box[:,2]/2)
 90 |     box_a[:,1] = (box[:,1] - box[:,3]/2)
 91 |     box_a[:,2] = (box[:,0] + box[:,2]/2) 
 92 |     box_a[:,3] = (box[:,1] + box[:,3]/2)
 93 |     box[:,:4] = box_a[:,:4]
 94 |     
 95 |     prediction[ind_nz[0], ind_nz[1]] = box
 96 |     
 97 |     return prediction
 98 | 
 99 | 
100 | 
101 | 
102 | def write(x, batches, results, colors, classes):
103 |     c1 = tuple(x[1:3].int())
104 |     c2 = tuple(x[3:5].int())
105 |     img = results[int(x[0])]
106 |     cls = int(x[-1])
107 |     label = "{0}".format(classes[cls])
108 |     color = random.choice(colors)
109 |     cv2.rectangle(img, c1, c2,color, 1)
110 |     t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
111 |     c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
112 |     cv2.rectangle(img, c1, c2,color, -1)
113 |     cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
114 |     return img
115 | 


--------------------------------------------------------------------------------
/detector/yolo/cfg/tiny-yolo-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | batch=64
  3 | subdivisions=8
  4 | width=416
  5 | height=416
  6 | channels=3
  7 | momentum=0.9
  8 | decay=0.0005
  9 | angle=0
 10 | saturation = 1.5
 11 | exposure = 1.5
 12 | hue=.1
 13 | 
 14 | learning_rate=0.001
 15 | max_batches = 40200
 16 | policy=steps
 17 | steps=-1,100,20000,30000
 18 | scales=.1,10,.1,.1
 19 | 
 20 | [convolutional]
 21 | batch_normalize=1
 22 | filters=16
 23 | size=3
 24 | stride=1
 25 | pad=1
 26 | activation=leaky
 27 | 
 28 | [maxpool]
 29 | size=2
 30 | stride=2
 31 | 
 32 | [convolutional]
 33 | batch_normalize=1
 34 | filters=32
 35 | size=3
 36 | stride=1
 37 | pad=1
 38 | activation=leaky
 39 | 
 40 | [maxpool]
 41 | size=2
 42 | stride=2
 43 | 
 44 | [convolutional]
 45 | batch_normalize=1
 46 | filters=64
 47 | size=3
 48 | stride=1
 49 | pad=1
 50 | activation=leaky
 51 | 
 52 | [maxpool]
 53 | size=2
 54 | stride=2
 55 | 
 56 | [convolutional]
 57 | batch_normalize=1
 58 | filters=128
 59 | size=3
 60 | stride=1
 61 | pad=1
 62 | activation=leaky
 63 | 
 64 | [maxpool]
 65 | size=2
 66 | stride=2
 67 | 
 68 | [convolutional]
 69 | batch_normalize=1
 70 | filters=256
 71 | size=3
 72 | stride=1
 73 | pad=1
 74 | activation=leaky
 75 | 
 76 | [maxpool]
 77 | size=2
 78 | stride=2
 79 | 
 80 | [convolutional]
 81 | batch_normalize=1
 82 | filters=512
 83 | size=3
 84 | stride=1
 85 | pad=1
 86 | activation=leaky
 87 | 
 88 | [maxpool]
 89 | size=2
 90 | stride=1
 91 | 
 92 | [convolutional]
 93 | batch_normalize=1
 94 | filters=1024
 95 | size=3
 96 | stride=1
 97 | pad=1
 98 | activation=leaky
 99 | 
100 | ###########
101 | 
102 | [convolutional]
103 | batch_normalize=1
104 | size=3
105 | stride=1
106 | pad=1
107 | filters=1024
108 | activation=leaky
109 | 
110 | [convolutional]
111 | size=1
112 | stride=1
113 | pad=1
114 | filters=125
115 | activation=linear
116 | 
117 | [region]
118 | anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52
119 | bias_match=1
120 | classes=20
121 | coords=4
122 | num=5
123 | softmax=1
124 | jitter=.2
125 | rescore=1
126 | 
127 | object_scale=5
128 | noobject_scale=1
129 | class_scale=1
130 | coord_scale=1
131 | 
132 | absolute=1
133 | thresh = .6
134 | random=1
135 | 


--------------------------------------------------------------------------------
/detector/yolo/cfg/yolo-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=64
  4 | subdivisions=8
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | height=416
  9 | width=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 80200
 21 | policy=steps
 22 | steps=-1,500,40000,60000
 23 | scales=0.1,10,.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=125
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
243 | bias_match=1
244 | classes=20
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/detector/yolo/cfg/yolo.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=425
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
243 | bias_match=1
244 | classes=80
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/detector/yolo/detect.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import time
  3 | import torch 
  4 | import torch.nn as nn
  5 | from torch.autograd import Variable
  6 | import numpy as np
  7 | import cv2 
  8 | from util import *
  9 | import argparse
 10 | import os 
 11 | import os.path as osp
 12 | from darknet import Darknet
 13 | from preprocess import prep_image, inp_to_image
 14 | import pandas as pd
 15 | import random 
 16 | import pickle as pkl
 17 | import itertools
 18 | 
 19 | 
 20 | if __name__ == '__main__':
 21 | 
 22 |     scales = "1,2,3"
 23 |     images = "imgs/messi.jpg"
 24 |     batch_size = 1
 25 |     confidence = 0.5
 26 |     nms_thesh = 0.4
 27 | 
 28 |     CUDA = torch.cuda.is_available()
 29 | 
 30 |     num_classes = 80
 31 |     classes = load_classes('data/coco.names') 
 32 | 
 33 |     #Set up the neural network
 34 |     print("Loading network.....")
 35 |     model = Darknet("cfg/yolov3-spp.cfg")
 36 |     model.load_weights("yolov3-spp.weights")
 37 |     print("Network successfully loaded")
 38 | 
 39 |     model.net_info["height"] = "608"
 40 |     inp_dim = int(model.net_info["height"])
 41 |     assert inp_dim % 32 == 0
 42 |     assert inp_dim > 32
 43 | 
 44 |     #If there's a GPU availible, put the model on GPU
 45 |     if CUDA:
 46 |         model.cuda()
 47 | 
 48 |     #Set the model in evaluation mode
 49 |     model.eval()
 50 | 
 51 |     #Detection phase
 52 |     try:
 53 |         imlist = []
 54 |         imlist.append(osp.join(osp.realpath('.'), images))
 55 |     except FileNotFoundError:
 56 |         print ("No file or directory with the name {}".format(images))
 57 |         exit()
 58 | 
 59 |     batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
 60 |     im_batches = [x[0] for x in batches]
 61 |     orig_ims = [x[1] for x in batches]
 62 |     im_dim_list = [x[2] for x in batches]
 63 |     im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
 64 | 
 65 |     if CUDA:
 66 |         im_dim_list = im_dim_list.cuda()
 67 | 
 68 | 
 69 |     for batch in im_batches:
 70 |         #load the image
 71 |         if CUDA:
 72 |             batch = batch.cuda()
 73 |         with torch.no_grad():
 74 |             prediction = model(Variable(batch), CUDA)
 75 | 
 76 |         prediction = write_results(prediction, confidence, num_classes, nms=True, nms_conf=nms_thesh)
 77 |         output = prediction
 78 | 
 79 |         if CUDA:
 80 |             torch.cuda.synchronize()
 81 | 
 82 |     try:
 83 |         output
 84 |     except NameError:
 85 |         print("No detections were made")
 86 |         exit()
 87 |     print(im_dim_list.shape)
 88 |     im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
 89 | 
 90 |     scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
 91 | 
 92 | 
 93 |     output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
 94 |     output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
 95 | 
 96 |     output[:,1:5] /= scaling_factor
 97 | 
 98 |     for i in range(output.shape[0]):
 99 |         output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
100 |         output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
101 | 
102 |     print(output)
103 |     print(output.shape)
104 | 


--------------------------------------------------------------------------------
/detector/yolo/pallete:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/yolo/pallete


--------------------------------------------------------------------------------
/detector/yolo/preprocess.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | import numpy as np
 8 | import cv2
 9 | import matplotlib.pyplot as plt
10 | try:
11 |     from util import count_parameters as count
12 |     from util import convert2cpu as cpu
13 | except ImportError:
14 |     from yolo.util import count_parameters as count
15 |     from yolo.util import convert2cpu as cpu
16 | from PIL import Image, ImageDraw
17 | 
18 | 
19 | def letterbox_image(img, inp_dim):
20 |     '''resize image with unchanged aspect ratio using padding'''
21 |     img_w, img_h = img.shape[1], img.shape[0]
22 |     w, h = inp_dim
23 |     new_w = int(img_w * min(w / img_w, h / img_h))
24 |     new_h = int(img_h * min(w / img_w, h / img_h))
25 |     resized_image = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
26 | 
27 |     canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
28 | 
29 |     canvas[(h - new_h) // 2:(h - new_h) // 2 + new_h, (w - new_w) // 2:(w - new_w) // 2 + new_w, :] = resized_image
30 | 
31 |     return canvas
32 | 
33 | 
34 | def prep_image(img, inp_dim):
35 |     """
36 |     Prepare image for inputting to the neural network.
37 | 
38 |     Returns a Variable
39 |     """
40 | 
41 |     orig_im = cv2.imread(img)
42 |     dim = orig_im.shape[1], orig_im.shape[0]
43 |     img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
44 |     img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
45 |     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
46 |     return img_, orig_im, dim
47 | 
48 | 
49 | def prep_frame(img, inp_dim):
50 |     """
51 |     Prepare image for inputting to the neural network.
52 | 
53 |     Returns a Variable
54 |     """
55 | 
56 |     orig_im = img
57 |     dim = orig_im.shape[1], orig_im.shape[0]
58 |     img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
59 |     img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
60 |     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
61 |     return img_, orig_im, dim
62 | 
63 | 
64 | def prep_image_pil(img, network_dim):
65 |     orig_im = Image.open(img)
66 |     img = orig_im.convert('RGB')
67 |     dim = img.size
68 |     img = img.resize(network_dim)
69 |     img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
70 |     img = img.view(*network_dim, 3).transpose(0, 1).transpose(0, 2).contiguous()
71 |     img = img.view(1, 3, *network_dim)
72 |     img = img.float().div(255.0)
73 |     return (img, orig_im, dim)
74 | 
75 | 
76 | def inp_to_image(inp):
77 |     inp = inp.cpu().squeeze()
78 |     inp = inp * 255
79 |     try:
80 |         inp = inp.data.numpy()
81 |     except RuntimeError:
82 |         inp = inp.numpy()
83 |     inp = inp.transpose(1, 2, 0)
84 | 
85 |     inp = inp[:, :, ::-1]
86 |     return inp
87 | 


--------------------------------------------------------------------------------
/detector/yolo_cfg.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | 
 3 | cfg = edict()
 4 | cfg.CONFIG = 'detector/yolo/cfg/yolov3-spp.cfg'
 5 | cfg.WEIGHTS = 'detector/yolo/data/yolov3-spp.weights'
 6 | cfg.INP_DIM =  608
 7 | cfg.NMS_THRES =  0.6
 8 | cfg.CONFIDENCE = 0.1
 9 | cfg.NUM_CLASSES = 80
10 | 


--------------------------------------------------------------------------------
/examples/demo/Copy of climbing_106.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of climbing_106.jpg


--------------------------------------------------------------------------------
/examples/demo/Copy of climbing_269.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of climbing_269.jpg


--------------------------------------------------------------------------------
/examples/demo/Copy of climbing_62.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of climbing_62.jpg


--------------------------------------------------------------------------------
/examples/demo/Copy of standing_147.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of standing_147.jpg


--------------------------------------------------------------------------------
/examples/demo/Copy of standing_153.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of standing_153.jpg


--------------------------------------------------------------------------------
/examples/demo/Copy of standing_29.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of standing_29.jpg


--------------------------------------------------------------------------------
/examples/list-coco-demo.txt:
--------------------------------------------------------------------------------
 1 | 000000375530.jpg
 2 | 000000244462.jpg
 3 | 000000113397.jpg
 4 | 000000113408.jpg
 5 | 000000375554.jpg
 6 | 000000171819.jpg
 7 | 000000375566.jpg
 8 | 000000244496.jpg
 9 | 000000139077.jpg
10 | 000000506656.jpg
11 | 000000375606.jpg
12 | 000000244539.jpg
13 | 000000565045.jpg
14 | 000000113473.jpg
15 | 000000375621.jpg
16 | 000000244550.jpg
17 | 000000492605.jpg
18 | 000000506707.jpg
19 | 000000113493.jpg
20 | 000000215524.jpg
21 | 


--------------------------------------------------------------------------------
/examples/res/final_xgboost_home_security_scaler_model.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/res/final_xgboost_home_security_scaler_model.pickle


--------------------------------------------------------------------------------
/examples/res/final_xgboost_home_securuty_prediction_model_21jan_new_89_87.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/res/final_xgboost_home_securuty_prediction_model_21jan_new_89_87.pickle


--------------------------------------------------------------------------------
/pretrained_models/get_models.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/pretrained_models/get_models.sh


--------------------------------------------------------------------------------
/scripts/inference.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | CONFIG=$1
 4 | CKPT=$2
 5 | VIDEO=$3
 6 | OUTDIR=${4:-"./examples/res"}
 7 | 
 8 | python scripts/demo_inference.py \
 9 |     --cfg ${CONFIG} \
10 |     --checkpoint ${CKPT} \
11 |     --video ${VIDEO} \
12 |     --outdir ${OUTDIR} \
13 |     --detector yolo  --save_img --save_video
14 | 


--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
1 | set -x
2 | 
3 | CONFIG=$1
4 | EXPID=${2:-"alphapose"}
5 | 
6 | python ./scripts/train.py \
7 |     --exp-id ${EXPID} \
8 |     --cfg ${CONFIG}
9 | 


--------------------------------------------------------------------------------
/scripts/validate.sh:
--------------------------------------------------------------------------------
 1 | set -x
 2 | 
 3 | CONFIG=$1
 4 | CKPT=$2
 5 | BATCH=${3:-"64"}
 6 | GPUS=${4:-"0,1,2,3"}
 7 | 
 8 | python ./scripts/validate.py \
 9 |     --cfg ${CONFIG} \
10 |     --batch ${BATCH} \
11 |     --gpus $GPUS\
12 |     --flip-test \
13 |     --checkpoint ${CKPT}
14 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [easy_install]
2 | index_url=https://pypi.tuna.tsinghua.edu.cn/simple
3 | 


--------------------------------------------------------------------------------
/trackers/PoseFlow/README.md:
--------------------------------------------------------------------------------
  1 | # Pose Flow
  2 | 
  3 | Official implementation of [Pose Flow: Efficient Online Pose Tracking ](https://arxiv.org/abs/1802.00977).
  4 | 
  5 | <p align='center'>
  6 |     <img src="posetrack1.gif", width="360">
  7 |     <img src="posetrack2.gif", width="344">
  8 | </p>
  9 | 
 10 | Results on PoseTrack Challenge validation set:
 11 | 
 12 | 1. Task2: Multi-Person Pose Estimation (mAP)
 13 | <center>
 14 | 
 15 | | Method | Head mAP | Shoulder mAP | Elbow mAP | Wrist mAP | Hip mAP | Knee mAP | Ankle mAP | Total mAP |
 16 | |:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|
 17 | | Detect-and-Track(FAIR) | **67.5** | 70.2 | 62 | 51.7 | 60.7 | 58.7 | 49.8 | 60.6 |
 18 | | **AlphaPose** | 66.7 | **73.3** | **68.3** | **61.1** | **67.5** | **67.0** | **61.3** | **66.5** |
 19 | 
 20 | </center>
 21 | 
 22 | 2. Task3: Pose Tracking (MOTA)
 23 | <center>
 24 | 
 25 | | Method | Head MOTA | Shoulder MOTA | Elbow MOTA | Wrist MOTA | Hip MOTA | Knee MOTA | Ankle MOTA | Total MOTA | Total MOTP| Speed(FPS) |
 26 | |:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|
 27 | | Detect-and-Track(FAIR) | **61.7** | 65.5 | 57.3 | 45.7 | 54.3 | 53.1 | 45.7 | 55.2 | 61.5 |Unknown|
 28 | | **PoseFlow(DeepMatch)** | 59.8 | **67.0** | 59.8 | 51.6 | **60.0** | **58.4** | **50.5** | **58.3** | **67.8**|8|
 29 | | **PoseFlow(OrbMatch)** | 59.0 | 66.8 | **60.0** | **51.8** | 59.4 | **58.4** | 50.3 | 58.0 | 62.2|24|
 30 | 
 31 | </center>
 32 | 
 33 | ## Latest Features
 34 | - Dec 2018: <strong>PoseFlow(General Version)</strong> released! Support ANY DATASET and pose tracking results visualization.
 35 | - Oct 2018: Support generating correspondence files with ORB(OpenCV), 3X FASTER and no need to compile DeepMatching library. 
 36 | 
 37 | ## Requirements
 38 | 
 39 | - Python 2.7.13
 40 | - OpenCV 3.4.2.16
 41 | - OpenCV-contrib 3.4.2.16
 42 | - tqdm 4.19.8
 43 | 
 44 | ## Installation
 45 | 
 46 | 1. Download PoseTrack Dataset from [PoseTrack](https://posetrack.net/) to `AlphaPose/PoseFlow/posetrack_data/`
 47 | 
 48 | ```shell
 49 | pip install -r requirements.txt
 50 | ```
 51 | 
 52 | ## For Any Datasets (General Version)
 53 | 
 54 | 1. Using [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to generate multi-person pose estimation results.
 55 | 
 56 | ```shell
 57 | # pytorch version
 58 | python demo.py --indir ${image_dir}$ --outdir ${results_dir}$
 59 | ```
 60 | 
 61 | 2. Run pose tracking
 62 | 
 63 | 
 64 | ```shell
 65 | # pytorch version
 66 | python tracker-general.py --imgdir ${image_dir}$ 
 67 |                           --in_json ${results_dir}$/alphapose-results.json 
 68 |                           --out_json ${results_dir}$/alphapose-results-forvis-tracked.json
 69 |                           --visdir ${render_dir}$
 70 | ```
 71 | 
 72 | 
 73 | ## For PoseTrack Dataset Evaluation (Paper Baseline)
 74 | 
 75 | 1. Using [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to generate multi-person pose estimation results on videos with format like `alpha-pose-results-sample.json`.
 76 | 2. Using DeepMatching/ORB to generate correspondence files.
 77 | 
 78 | ```shell
 79 | # Generate correspondences by orb
 80 | python matching.py --orb=1
 81 | ```
 82 | 
 83 | 3. Run pose tracking
 84 | 
 85 | 
 86 | ```shell
 87 | python tracker-baseline.py --dataset=val/test  --orb=1
 88 | ```
 89 | 4. Evaluation
 90 | 
 91 | Original [poseval](https://github.com/leonid-pishchulin/poseval) has some instructions on how to convert annotation files from MAT to JSON.
 92 | 
 93 | Evaluate pose tracking results on validation dataset:
 94 | 
 95 | ```shell
 96 | git clone https://github.com/leonid-pishchulin/poseval.git --recursive
 97 | cd poseval/py && export PYTHONPATH=$PWD/../py-motmetrics:$PYTHONPATH
 98 | cd ../../
 99 | python poseval/py/evaluate.py --groundTruth=./posetrack_data/annotations/val \
100 |                     --predictions=./${track_result_dir}/ \
101 |                     --evalPoseTracking --evalPoseEstimation
102 | ```
103 | 
104 | 
105 | ## Citation
106 | 
107 | Please cite these papers in your publications if it helps your research:
108 | 
109 |     @inproceedings{xiu2018poseflow,
110 |       author = {Xiu, Yuliang and Li, Jiefeng and Wang, Haoyu and Fang, Yinghong and Lu, Cewu},
111 |       title = {{Pose Flow}: Efficient Online Pose Tracking},
112 |       booktitle={BMVC},
113 |       year = {2018}
114 |     }
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/trackers/PoseFlow/parallel_process.py:
--------------------------------------------------------------------------------
 1 | # adapted from http://danshiebler.com/2016-09-14-parallel-progress-bar/
 2 | from tqdm import tqdm
 3 | from concurrent.futures import ProcessPoolExecutor, as_completed
 4 | 
 5 | def parallel_process(array, function, n_jobs=16, use_kwargs=False, front_num=3):
 6 |     """
 7 |         A parallel version of the map function with a progress bar. 
 8 | 
 9 |         Args:
10 |             array (array-like): An array to iterate over.
11 |             function (function): A python function to apply to the elements of array
12 |             n_jobs (int, default=16): The number of cores to use
13 |             use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of 
14 |                 keyword arguments to function 
15 |             front_num (int, default=3): The number of iterations to run serially before kicking off the parallel job. 
16 |                 Useful for catching bugs
17 |         Returns:
18 |             [function(array[0]), function(array[1]), ...]
19 |     """
20 |     #We run the first few iterations serially to catch bugs
21 |     if front_num > 0:
22 |         front = [function(**a) if use_kwargs else function(*a) for a in array[:front_num]]
23 |     #If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging.
24 |     if n_jobs==1:
25 |         return front + [function(**a) if use_kwargs else function(*a) for a in tqdm(array[front_num:])]
26 |     #Assemble the workers
27 |     with ProcessPoolExecutor(max_workers=n_jobs) as pool:
28 |         #Pass the elements of array into function
29 |         if use_kwargs:
30 |             futures = [pool.submit(function, **a) for a in array[front_num:]]
31 |         else:
32 |             futures = [pool.submit(function, *a) for a in array[front_num:]]
33 |         kwargs = {
34 |             'total': len(futures),
35 |             'unit': 'it',
36 |             'unit_scale': True,
37 |             'leave': True
38 |         }
39 |         #Print out the progress as tasks complete
40 |         for f in tqdm(as_completed(futures), **kwargs):
41 |             pass
42 |     out = []
43 |     #Get the results from the futures. 
44 |     for i, future in enumerate(futures):
45 |         try:
46 |             out.append(future.result())
47 |         except Exception as e:
48 |             out.append(e)
49 |     return front + out


--------------------------------------------------------------------------------
/trackers/PoseFlow/posetrack1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/PoseFlow/posetrack1.gif


--------------------------------------------------------------------------------
/trackers/PoseFlow/posetrack2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/PoseFlow/posetrack2.gif


--------------------------------------------------------------------------------
/trackers/PoseFlow/posetrack_data:
--------------------------------------------------------------------------------
1 | /home/yuliang/data/posetrack_data/posetrack_data


--------------------------------------------------------------------------------
/trackers/PoseFlow/poseval:
--------------------------------------------------------------------------------
1 | /home/yuliang/data/posetrack_data/poseval


--------------------------------------------------------------------------------
/trackers/PoseFlow/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.14.5
 2 | scipy==1.1.0
 3 | opencv_python==3.4.2.16
 4 | opencv_contrib_python==3.4.2.16
 5 | matplotlib==2.2.2
 6 | tqdm==4.23.4
 7 | Image==1.5.25
 8 | Pillow==5.3.0
 9 | munkres==1.0.12
10 | 


--------------------------------------------------------------------------------
/trackers/README.md:
--------------------------------------------------------------------------------
 1 | # Pose Tracking Module for AlphaPose
 2 | 
 3 | AlphaPose provide three different tracking methods for now, you can try different method to see which one is better for you.
 4 | 
 5 | ## 1. Human-ReID based tracking (Recommended)
 6 | Currently the best performance tracking model. Paper coming soon.
 7 | 
 8 | #### Getting started
 9 | Download  [human reid model](https://mega.nz/#!YTZFnSJY!wlbo_5oa2TpDAGyWCTKTX1hh4d6DvJhh_RUA2z6i_so) and place it into `AlphaPose/trackers/weights/`.
10 | 
11 | Then simply run alphapose with additional flag `--pose_track`
12 | 
13 | You can try different person reid model by modifing `cfg.arch` and `cfg.loadmodel` in `./trackers/tracker_cfg.py`.
14 | 
15 | If you want to train your own reid model, please refer to this [project](https://github.com/KaiyangZhou/deep-person-reid)
16 | 
17 | #### Demo
18 | ``` bash
19 | ./scripts/inference.sh ${CONFIG} ${CHECKPOINT} ${VIDEO_NAME}  ${OUTPUT_DIR}, --pose_track
20 | ```
21 | #### Todo
22 | - [] Evaluation Tools for PoseTrack
23 | - [] More Models
24 | - [] Training code for [PoseTrack Dataset](https://posetrack.net/)
25 | 
26 | ## 2. Detector based human tracking
27 | Use a human detecter with tracking module (JDE). Please refer to [detector/tracker/](../detector/tracker/)
28 | 
29 | #### Getting started
30 | Download detector [JDE-1088x608](https://github.com/Zhongdao/Towards-Realtime-MOT#pretrained-model-and-baseline-models) and place it under `AlphaPose/detector/tracker/data/`
31 | 
32 | Enable tracking by setting the detector as tracker: `--detector tracker`
33 | #### Demo
34 | ``` bash
35 | ./scripts/inference.sh ${CONFIG} ${CHECKPOINT} ${VIDEO_NAME}  ${OUTPUT_DIR}, --detector tracker
36 | ```
37 | 
38 | ## 3. PoseFlow human tracking
39 | This tracker is based on our BMVC 2018 paper PoseFlow, for more info please refer to [PoseFlow/README.md](PoseFlow/)
40 | 
41 | #### Getting started
42 | 
43 | Simply run alphapose with additional flag `--pose_flow`
44 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/ResBnLin.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | """
 3 | @author:  tanghy
 4 | @contact: thutanghy@gmail.com
 5 | """
 6 | import torch
 7 | from torch import nn
 8 | import torch.nn.functional as F
 9 | from ReidModels.ResNet import build_resnet_backbone
10 | from ReidModels.bn_linear import BNneckLinear
11 | class SpatialAttn(nn.Module):
12 |     """Spatial Attention Layer"""
13 |     def __init__(self):
14 |         super(SpatialAttn, self).__init__()
15 | 
16 |     def forward(self, x):
17 |         # global cross-channel averaging # e.g. 32,2048,24,8
18 |         x = x.mean(1, keepdim=True)  # e.g. 32,1,24,8
19 |         h = x.size(2)
20 |         w = x.size(3)
21 |         x = x.view(x.size(0),-1)     # e.g. 32,192
22 |         z = x
23 |         for b in range(x.size(0)):
24 |             z[b] /= torch.sum(z[b])
25 |         z = z.view(x.size(0),1,h,w)
26 |         return z
27 | class ResModel(nn.Module):
28 | 
29 |     def __init__(self, n_ID):
30 |         super().__init__()
31 |         self.backbone = build_resnet_backbone()
32 |         self.head = BNneckLinear(n_ID)
33 |         self.atten = SpatialAttn()
34 |         self.conv1 = nn.Conv2d(17, 17, 1,stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros')
35 |         self.pool = nn.AvgPool2d(2, stride=2, padding=0,)
36 |     def forward(self, input,posemap,map_weight):
37 |         """
38 |         See :class:`ReIDHeads.forward`.
39 |         """
40 |         feat = self.backbone(input)
41 |         b,c,h,w = feat.shape
42 |         att = self.conv1(torch.mul(posemap,map_weight))
43 |         #print('att-1-size={}'.format(att.shape))
44 |         att = F.relu(att)
45 |         att = self.pool(att)
46 |         att = self.conv1(att)
47 |         #print('att-2-size={}'.format(att.shape))
48 |         att = F.softmax(att)
49 |         #print('att-3-size={}'.format(att.shape))
50 |         att = self.atten(att)
51 |         #print('att-4-size={}'.format(att.shape))
52 |         att = att.expand(b,c,h,w)
53 |         _feat = torch.mul(feat,att)
54 |         feat = _feat + feat
55 |         return self.head(feat)


--------------------------------------------------------------------------------
/trackers/ReidModels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/backbone/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/backbone/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/backbone/googlenet.py:
--------------------------------------------------------------------------------
 1 | '''GoogLeNet with PyTorch.'''
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from .lrn import SpatialCrossMapLRN
 7 | 
 8 | 
 9 | class Inception(nn.Module):
10 |     def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
11 |         super(Inception, self).__init__()
12 |         # 1x1 conv branch
13 |         self.b1 = nn.Sequential(
14 |             nn.Conv2d(in_planes, n1x1, kernel_size=1),
15 |             nn.ReLU(True),
16 |         )
17 | 
18 |         # 1x1 conv -> 3x3 conv branch
19 |         self.b2 = nn.Sequential(
20 |             nn.Conv2d(in_planes, n3x3red, kernel_size=1),
21 |             nn.ReLU(True),
22 |             nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
23 |             nn.ReLU(True),
24 |         )
25 | 
26 |         # 1x1 conv -> 5x5 conv branch
27 |         self.b3 = nn.Sequential(
28 |             nn.Conv2d(in_planes, n5x5red, kernel_size=1),
29 |             nn.ReLU(True),
30 | 
31 |             nn.Conv2d(n5x5red, n5x5, kernel_size=5, padding=2),
32 |             nn.ReLU(True),
33 |         )
34 | 
35 |         # 3x3 pool -> 1x1 conv branch
36 |         self.b4 = nn.Sequential(
37 |             nn.MaxPool2d(3, stride=1, padding=1),
38 | 
39 |             nn.Conv2d(in_planes, pool_planes, kernel_size=1),
40 |             nn.ReLU(True),
41 |         )
42 | 
43 |     def forward(self, x):
44 |         y1 = self.b1(x)
45 |         y2 = self.b2(x)
46 |         y3 = self.b3(x)
47 |         y4 = self.b4(x)
48 |         return torch.cat([y1,y2,y3,y4], 1)
49 | 
50 | 
51 | class GoogLeNet(nn.Module):
52 | 
53 |     output_channels = 832
54 | 
55 |     def __init__(self):
56 |         super(GoogLeNet, self).__init__()
57 |         self.pre_layers = nn.Sequential(
58 |             nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
59 |             nn.ReLU(True),
60 | 
61 |             nn.MaxPool2d(3, stride=2, ceil_mode=True),
62 |             SpatialCrossMapLRN(5),
63 | 
64 |             nn.Conv2d(64, 64, 1),
65 |             nn.ReLU(True),
66 | 
67 |             nn.Conv2d(64, 192, 3, padding=1),
68 |             nn.ReLU(True),
69 | 
70 |             SpatialCrossMapLRN(5),
71 |             nn.MaxPool2d(3, stride=2, ceil_mode=True),
72 |         )
73 | 
74 |         self.a3 = Inception(192,  64,  96, 128, 16, 32, 32)
75 |         self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
76 | 
77 |         self.maxpool = nn.MaxPool2d(3, stride=2, ceil_mode=True)
78 | 
79 |         self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
80 |         self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
81 |         self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
82 |         self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
83 |         self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
84 | 
85 |     def forward(self, x):
86 |         out = self.pre_layers(x)
87 |         out = self.a3(out)
88 |         out = self.b3(out)
89 |         out = self.maxpool(out)
90 |         out = self.a4(out)
91 |         out = self.b4(out)
92 |         out = self.c4(out)
93 |         out = self.d4(out)
94 |         out = self.e4(out)
95 | 
96 |         return out
97 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/backbone/lrn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.legacy.nn import SpatialCrossMapLRN as SpatialCrossMapLRNOld
 3 | from torch.autograd import Function, Variable
 4 | from torch.nn import Module
 5 | 
 6 | 
 7 | # function interface, internal, do not use this one!!!
 8 | class SpatialCrossMapLRNFunc(Function):
 9 | 
10 |     def __init__(self, size, alpha=1e-4, beta=0.75, k=1):
11 |         self.size = size
12 |         self.alpha = alpha
13 |         self.beta = beta
14 |         self.k = k
15 | 
16 |     def forward(self, input):
17 |         self.save_for_backward(input)
18 |         self.lrn = SpatialCrossMapLRNOld(self.size, self.alpha, self.beta, self.k)
19 |         self.lrn.type(input.type())
20 |         return self.lrn.forward(input)
21 | 
22 |     def backward(self, grad_output):
23 |         input, = self.saved_tensors
24 |         return self.lrn.backward(input, grad_output)
25 | 
26 | 
27 | # use this one instead
28 | class SpatialCrossMapLRN(Module):
29 |     def __init__(self, size, alpha=1e-4, beta=0.75, k=1):
30 |         super(SpatialCrossMapLRN, self).__init__()
31 |         self.size = size
32 |         self.alpha = alpha
33 |         self.beta = beta
34 |         self.k = k
35 | 
36 |     def forward(self, input):
37 |         return SpatialCrossMapLRNFunc(self.size, self.alpha, self.beta, self.k)(input)


--------------------------------------------------------------------------------
/trackers/ReidModels/backbone/sqeezenet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torchvision import models
 5 | 
 6 | 
 7 | class DilationLayer(nn.Module):
 8 |     def __init__(self, in_channels, out_channels, kernel_size=3, padding='same_padding', dilation=1, bn=False):
 9 |         super(DilationLayer, self).__init__()
10 |         if padding == 'same_padding':
11 |             padding = int((kernel_size - 1) / 2 * dilation)
12 |         self.Dconv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
13 |                                padding=padding, dilation=dilation)
14 |         self.Drelu = nn.ReLU(inplace=True)
15 |         self.Dbn = nn.BatchNorm2d(out_channels) if bn else None
16 | 
17 |     def forward(self, x):
18 |         x = self.Dconv(x)
19 |         if self.Dbn is not None:
20 |             x = self.Dbn(x)
21 |         x = self.Drelu(x)
22 |         return x
23 | 
24 | 
25 | class FeatExtractorSqueezeNetx16(nn.Module):
26 |     n_feats = [64, 128, 256, 512]
27 | 
28 |     def __init__(self, pretrained=True):
29 | 
30 |         super(FeatExtractorSqueezeNetx16, self).__init__()
31 |         print("loading layers from squeezenet1_1...")
32 |         sq = models.squeezenet1_1(pretrained=pretrained)
33 | 
34 |         self.conv1 = nn.Sequential(
35 |             sq.features[0],
36 |             sq.features[1],
37 |         )
38 |         self.conv2 = nn.Sequential(
39 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
40 |             sq.features[3],
41 |             sq.features[4],
42 |         )
43 |         self.conv3 = nn.Sequential(
44 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
45 |             sq.features[6],
46 |             sq.features[7],
47 |         )
48 |         self.conv4 = nn.Sequential(
49 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
50 |             sq.features[9],
51 |             sq.features[10],
52 |             sq.features[11],
53 |             sq.features[12],
54 |         )
55 | 
56 |         self.conv1[0].padding = (1, 1)
57 | 
58 |     def forward(self, x):
59 |         x2 = self.conv1(x)
60 |         x4 = self.conv2(x2)
61 |         x8 = self.conv3(x4)
62 |         x16 = self.conv4(x8)
63 | 
64 |         return x2, x4, x8, x16
65 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/bn_linear.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | """
 3 | @author:  tanghy
 4 | @contact: thutanghy@gmail.com
 5 | """
 6 | 
 7 | from torch import nn
 8 | import torch.nn.functional as F
 9 | 
10 | def bn_no_bias(in_features):
11 |     bn_layer = nn.BatchNorm1d(in_features)
12 |     bn_layer.bias.requires_grad_(False)
13 |     return bn_layer
14 | 
15 | def weights_init_kaiming(m):
16 |     classname = m.__class__.__name__
17 |     if classname.find('Linear') != -1:
18 |         nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out')
19 |         if m.bias is not None:
20 |             nn.init.constant_(m.bias, 0.0)
21 |     elif classname.find('Conv') != -1:
22 |         nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in')
23 |         if m.bias is not None:
24 |             nn.init.constant_(m.bias, 0.0)
25 |     elif classname.find('BatchNorm') != -1:
26 |         if m.affine:
27 |             nn.init.constant_(m.weight, 1.0)
28 |             nn.init.constant_(m.bias, 0.0)
29 | 
30 | 
31 | def weights_init_classifier(m):
32 |     classname = m.__class__.__name__
33 |     if classname.find('Linear') != -1:
34 |         nn.init.normal_(m.weight, std=0.001)
35 |         if m.bias is not None:
36 |             nn.init.constant_(m.bias, 0.0)
37 | 
38 | class BNneckLinear(nn.Module):
39 | 
40 |     def __init__(self, nID):
41 |         super().__init__()
42 |         self._num_classes = nID
43 | 
44 |         self.gap = nn.AdaptiveAvgPool2d(1)
45 |         self.bnneck = bn_no_bias(2048)
46 |         self.bnneck.apply(weights_init_kaiming)
47 | 
48 |         self.classifier = nn.Linear(2048, self._num_classes, bias=False)
49 |         self.classifier.apply(weights_init_classifier)
50 | 
51 |     def forward(self, features):
52 |         """
53 |         See :class:`ReIDHeads.forward`.
54 |         """
55 |         global_features = self.gap(features)
56 |         global_features = global_features.view(global_features.shape[0], -1)
57 |         bn_features = self.bnneck(global_features)
58 | 
59 |         if not self.training:
60 |             return F.normalize(bn_features)
61 | 
62 |         pred_class_logits = self.classifier(bn_features)
63 |         return global_features, pred_class_logits


--------------------------------------------------------------------------------
/trackers/ReidModels/classification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/classification/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/classification/classifier.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | from distutils.version import LooseVersion
  4 | import torch
  5 | from torch.autograd import Variable
  6 | 
  7 | from utils import bbox as bbox_utils
  8 | from models import net_utils
  9 | from models.classification.rfcn_cls import Model as CLSModel
 10 | 
 11 | 
 12 | def _factor_closest(num, factor, is_ceil=True):
 13 |     num = float(num) / factor
 14 |     num = np.ceil(num) if is_ceil else np.floor(num)
 15 |     return int(num) * factor
 16 | 
 17 | 
 18 | def crop_with_factor(im, dest_size, factor=32, pad_val=0, basedon='min'):
 19 |     im_size_min, im_size_max = np.min(im.shape[0:2]), np.max(im.shape[0:2])
 20 |     im_base = {'min': im_size_min,
 21 |                'max': im_size_max,
 22 |                'w': im.shape[1],
 23 |                'h': im.shape[0]}
 24 |     im_scale = float(dest_size) / im_base.get(basedon, im_size_min)
 25 | 
 26 |     # Scale the image.
 27 |     im = cv2.resize(im, None, fx=im_scale, fy=im_scale)
 28 | 
 29 |     # Compute the padded image shape. Ensure it's divisible by factor.
 30 |     h, w = im.shape[:2]
 31 |     new_h, new_w = _factor_closest(h, factor), _factor_closest(w, factor)
 32 |     new_shape = [new_h, new_w] if im.ndim < 3 else [new_h, new_w, im.shape[-1]]
 33 | 
 34 |     # Pad the image.
 35 |     im_padded = np.full(new_shape, fill_value=pad_val, dtype=im.dtype)
 36 |     im_padded[0:h, 0:w] = im
 37 | 
 38 |     return im_padded, im_scale, im.shape
 39 | 
 40 | 
 41 | class PatchClassifier(object):
 42 |     def __init__(self, gpu=0):
 43 |         self.gpu = gpu
 44 | 
 45 |         ckpt = 'data/squeezenet_small40_coco_mot16_ckpt_10.h5'
 46 |         model = CLSModel(extractor='squeezenet')
 47 | 
 48 |         # from mcmtt.network.experiments.rfcn_cls2 import Model as CLSModel
 49 |         # ckpt = '/extra/models/resnet50_small40_coco_kitti/ckpt_31.h5'
 50 |         # model = CLSModel(extractor='resnet50')
 51 | 
 52 |         net_utils.load_net(ckpt, model)
 53 |         model = model.eval()
 54 |         self.model = model.cuda(self.gpu)
 55 |         print('load cls model from: {}'.format(ckpt))
 56 |         self.score_map = None
 57 |         self.im_scale = 1.
 58 | 
 59 |     @staticmethod
 60 |     def im_preprocess(image):
 61 |         # resize and padding
 62 |         # real_inp_size = min_size
 63 |         if min(image.shape[0:2]) > 720:
 64 |             real_inp_size = 640
 65 |         else:
 66 |             real_inp_size = 368
 67 |         im_pad, im_scale, real_shape = crop_with_factor(image, real_inp_size, factor=16, pad_val=0, basedon='min')
 68 | 
 69 |         # preprocess image
 70 |         im_croped = cv2.cvtColor(im_pad, cv2.COLOR_BGR2RGB)
 71 |         im_croped = im_croped.astype(np.float32) / 255. - 0.5
 72 | 
 73 |         return im_croped, im_pad, real_shape, im_scale
 74 | 
 75 |     def update(self, image):
 76 |         im_croped, im_pad, real_shape, im_scale = self.im_preprocess(image)
 77 | 
 78 |         self.im_scale = im_scale
 79 |         self.ori_image_shape = image.shape
 80 |         im_data = torch.from_numpy(im_croped).permute(2, 0, 1)
 81 |         im_data = im_data.unsqueeze(0)
 82 | 
 83 |         # forward
 84 |         if LooseVersion(torch.__version__) > LooseVersion('0.3.1'):
 85 |             with torch.no_grad():
 86 |                 im_var = Variable(im_data).cuda(self.gpu)
 87 |                 self.score_map = self.model(im_var)
 88 |         else:
 89 |             im_var = Variable(im_data, volatile=True).cuda(self.gpu)
 90 |             self.score_map = self.model(im_var)
 91 | 
 92 |         return real_shape, im_scale
 93 | 
 94 |     def predict(self, rois):
 95 |         """
 96 |         :param rois: numpy array [N, 4] ( x1, y1, x2, y2)
 97 |         :return: scores [N]
 98 |         """
 99 |         scaled_rois = rois * self.im_scale
100 |         cls_scores = self.model.get_cls_score_numpy(self.score_map, scaled_rois)
101 | 
102 |         # check area
103 |         rois = rois.reshape(-1, 4)
104 |         clipped_boxes = bbox_utils.clip_boxes(rois, self.ori_image_shape)
105 | 
106 |         ori_areas = (rois[:, 2] - rois[:, 0]) * (rois[:, 3] - rois[:, 1])
107 |         areas = (clipped_boxes[:, 2] - clipped_boxes[:, 0]) * (clipped_boxes[:, 3] - clipped_boxes[:, 1])
108 |         ratios = areas / np.clip(ori_areas, a_min=1e-4, a_max=None)
109 |         cls_scores[ratios < 0.5] = 0
110 | 
111 |         return cls_scores
112 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/psroi_pooling/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/psroi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/_ext/psroi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._psroi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | sources = []
 6 | headers = []
 7 | defines = []
 8 | with_cuda = False
 9 | 
10 | if torch.cuda.is_available():
11 |     print('Including CUDA code.')
12 |     sources += ['src/psroi_pooling_cuda.c']
13 |     headers += ['src/psroi_pooling_cuda.h']
14 |     defines += [('WITH_CUDA', None)]
15 |     with_cuda = True
16 | 
17 | this_file = os.path.dirname(os.path.realpath(__file__))
18 | print(this_file)
19 | extra_objects = ['src/cuda/psroi_pooling.cu.o']
20 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
21 | 
22 | ffi = create_extension(
23 |     '_ext.psroi_pooling',
24 |     headers=headers,
25 |     sources=sources,
26 |     define_macros=defines,
27 |     relative_to=__file__,
28 |     with_cuda=with_cuda,
29 |     extra_objects=extra_objects
30 | )
31 | 
32 | if __name__ == '__main__':
33 |     ffi.build()
34 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/psroi_pooling/functions/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/functions/psroi_pooling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import psroi_pooling
 4 | 
 5 | 
 6 | class PSRoIPoolingFunction(Function):
 7 |     def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim):
 8 |         self.pooled_width = int(pooled_width)
 9 |         self.pooled_height = int(pooled_height)
10 |         self.spatial_scale = float(spatial_scale)
11 |         self.group_size = int(group_size)
12 |         self.output_dim = int(output_dim)
13 | 
14 |         self.output = None
15 |         self.mappingchannel = None
16 |         self.rois = None
17 |         self.feature_size = None
18 | 
19 |     def forward(self, features, rois):
20 |         batch_size, num_channels, data_height, data_width = features.size()
21 |         num_rois = rois.size(0)
22 | 
23 |         output = features.new().resize_(num_rois, self.output_dim, self.pooled_height, self.pooled_width).zero_()
24 |         mappingchannel = torch.IntTensor(num_rois, self.output_dim, self.pooled_height, self.pooled_width).zero_().cuda(features.get_device())
25 | 
26 |         rtn = psroi_pooling.psroi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale,
27 |                                                  self.group_size, self.output_dim,
28 |                                                  features, rois, output, mappingchannel)
29 |         assert rtn > 0
30 |         self.output = output
31 |         self.mappingchannel = mappingchannel
32 |         self.rois = rois
33 |         self.feature_size = features.size()
34 |         # print features.max(), features.min()
35 |         # print rois.max(), rois.min()
36 |         # print output.max(), output.min()
37 |         return output
38 | 
39 |     def backward(self, grad_output):
40 |         assert (self.feature_size is not None and grad_output.is_cuda)
41 | 
42 |         batch_size, num_channels, data_height, data_width = self.feature_size
43 | 
44 |         grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda()
45 | 
46 |         psroi_pooling.psroi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale,
47 |                                                   self.output_dim,
48 |                                                   grad_output, self.rois, grad_input, self.mappingchannel)
49 |         return grad_input, None
50 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda
 4 | 
 5 | cd src/cuda
 6 | echo "Compiling psroi pooling kernels by nvcc..."
 7 | ${CUDA_PATH}/bin/nvcc -c -o psroi_pooling.cu.o psroi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../../
10 | python build.py


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/psroi_pooling/modules/__init__.py


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/modules/psroi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | import sys
 3 | from ..functions.psroi_pooling import PSRoIPoolingFunction
 4 | 
 5 | 
 6 | class PSRoIPool(Module):
 7 |     def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim):
 8 |         super(PSRoIPool, self).__init__()
 9 | 
10 |         self.pooled_width = int(pooled_width)
11 |         self.pooled_height = int(pooled_height)
12 |         self.spatial_scale = float(spatial_scale)
13 |         self.group_size = int(group_size)
14 |         self.output_dim = int(output_dim)
15 | 
16 |     def forward(self, features, rois):
17 |         return PSRoIPoolingFunction(self.pooled_height, self.pooled_width, self.spatial_scale, self.group_size,
18 |                                     self.output_dim)(features, rois)
19 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/src/cuda/psroi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef PS_ROI_POOLING_KERNEL
 2 | #define PS_ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int PSROIPoolForwardLauncher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height, const int pooled_width,
11 |     const float* bottom_rois, const int group_size, const int output_dim, float* top_data, int* mapping_channel, cudaStream_t stream);
12 | 
13 | 
14 | int PSROIPoolBackwardLauncher(const float* top_diff, const int* mapping_channel, const int batch_size, const int num_rois, const float spatial_scale, const int channels, const int height, const int width, const int pooled_width, const int pooled_height, const int output_dim, float* bottom_diff, const float* bottom_rois, cudaStream_t stream);
15 | 
16 | #ifdef __cplusplus
17 | }
18 | 
19 | #endif
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/src/psroi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "cuda/psroi_pooling_kernel.h"
 4 | 
 5 | 
 6 | 
 7 | extern THCState* state;
 8 | 
 9 | int psroi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, int group_size, int output_dim,THCudaTensor *features, THCudaTensor* rois, THCudaTensor* output, THCudaIntTensor* mappingchannel){
10 | 	float* data_in = THCudaTensor_data(state, features);
11 | 	float* rois_in = THCudaTensor_data(state, rois);
12 | 	float* output_out = THCudaTensor_data(state, output);
13 | 	int* mappingchannel_out = THCudaIntTensor_data(state, mappingchannel);
14 | 	//Get # of Rois
15 | 	int num_rois = THCudaTensor_size(state, rois, 0);
16 | 	int size_rois = THCudaTensor_size(state, rois, 1);
17 | 	if (size_rois!=5)
18 | 	{
19 | 		return -1;
20 | 	}
21 | 
22 | 	//Get # of batch_size
23 | 	int batch_size = THCudaTensor_size(state, features, 0);
24 | 
25 | 	int data_height = THCudaTensor_size(state, features, 2);
26 | 	int data_width = THCudaTensor_size(state, features, 3);
27 | 	int num_channels = THCudaTensor_size(state, features, 1);
28 | 
29 | 	cudaStream_t stream = THCState_getCurrentStream(state);
30 | 
31 | 	// call the gpu kernel for psroi_pooling
32 | 	PSROIPoolForwardLauncher(data_in, spatial_scale, num_rois, data_height, data_width, num_channels, pooled_height, pooled_width,rois_in, group_size, 
33 | 	output_dim, output_out, mappingchannel_out,stream);
34 | 	return 1;
35 | }
36 | 
37 | 
38 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim, 
39 | THCudaTensor* top_grad, THCudaTensor* rois, THCudaTensor* bottom_grad, THCudaIntTensor* mappingchannel)
40 | {
41 |     	float *top_grad_flat = THCudaTensor_data(state, top_grad);
42 | 	float *rois_flat = THCudaTensor_data(state, rois);
43 | 
44 | 	float *bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
45 |     	int *mappingchannel_flat = THCudaIntTensor_data(state, mappingchannel);
46 | 
47 |     	// Number of ROIs
48 |     	int num_rois = THCudaTensor_size(state, rois, 0);
49 |     	int size_rois = THCudaTensor_size(state, rois, 1);
50 |     	if (size_rois != 5)
51 |     	{
52 |         	return -1;
53 |     	}
54 |     	// batch size
55 |     	int batch_size = THCudaTensor_size(state, bottom_grad, 0);
56 | 
57 |     	// data height
58 |     	int data_height = THCudaTensor_size(state, bottom_grad, 2);
59 |     	// data width
60 |     	int data_width = THCudaTensor_size(state, bottom_grad, 3);
61 |     	// Number of channels
62 |     	int num_channels = THCudaTensor_size(state, bottom_grad, 1);
63 | 
64 |     	cudaStream_t stream = THCState_getCurrentStream(state);
65 | 
66 |     	PSROIPoolBackwardLauncher(top_grad_flat, mappingchannel_flat, batch_size, num_rois, spatial_scale, num_channels, data_height, data_width, pooled_width,	      pooled_height, output_dim, bottom_grad_flat, rois_flat, stream);
67 |         return 1;
68 | }
69 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/psroi_pooling/src/psroi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int psroi_pooling_forward_cuda( int pooled_height, int pooled_width, float spatial_scale,int group_size, int output_dim,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * mappingchannel);
3 | 
4 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * mappingchannel);
6 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/reid/__init__.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | from distutils.version import LooseVersion
 4 | import torch
 5 | from torch.autograd import Variable
 6 | 
 7 | from utils import bbox as bbox_utils
 8 | from utils.log import logger
 9 | from ReidModels import net_utils
10 | from ReidModels.reid.image_part_aligned import Model
11 | 
12 | 
13 | def load_reid_model():
14 |     model = Model(n_parts=8)
15 |     model.inp_size = (80, 160)
16 |     ckpt = 'data/googlenet_part8_all_xavier_ckpt_56.h5'
17 | 
18 |     net_utils.load_net(ckpt, model)
19 |     logger.info('Load ReID model from {}'.format(ckpt))
20 | 
21 |     model = model.cuda()
22 |     model.eval()
23 |     return model
24 | 
25 | 
26 | def im_preprocess(image):
27 |     image = np.asarray(image, np.float32)
28 |     image -= np.array([104, 117, 123], dtype=np.float32).reshape(1, 1, -1)
29 |     image = image.transpose((2, 0, 1))
30 |     return image
31 | 
32 | 
33 | def extract_image_patches(image, bboxes):
34 |     bboxes = np.round(bboxes).astype(np.int)
35 |     bboxes = bbox_utils.clip_boxes(bboxes, image.shape)
36 |     patches = [image[box[1]:box[3], box[0]:box[2]] for box in bboxes]
37 |     return patches
38 | 
39 | 
40 | def extract_reid_features(reid_model, image, tlbrs):
41 |     if len(tlbrs) == 0:
42 |         return torch.FloatTensor()
43 | 
44 |     patches = extract_image_patches(image, tlbrs)
45 |     patches = np.asarray([im_preprocess(cv2.resize(p, reid_model.inp_size)) for p in patches], dtype=np.float32)
46 | 
47 |     gpu = net_utils.get_device(reid_model)
48 |     with torch.no_grad():
49 |         _img = torch.from_numpy(patches)
50 |         if gpu:
51 |             _img = _img.cuda()
52 |         features,id = reid_model(_img).detach()
53 |     return features
54 | 


--------------------------------------------------------------------------------
/trackers/ReidModels/reid/image_part_aligned.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from models.backbone.googlenet import GoogLeNet
 6 | 
 7 | 
 8 | class Model(nn.Module):
 9 |     def __init__(self, n_parts=8,n_ID=300):
10 |         super(Model, self).__init__()
11 |         self.n_parts = n_parts
12 |         self.nID = n_ID
13 | 
14 |         self.feat_conv = GoogLeNet()
15 |         self.conv_input_feat = nn.Conv2d(self.feat_conv.output_channels, 512, 1)
16 |         # part net
17 |         self.conv_att = nn.Conv2d(512, self.n_parts, 1)
18 | 
19 |         for i in range(self.n_parts):
20 |             setattr(self, 'linear_feature{}'.format(i+1), nn.Linear(512, 64))
21 |         self.id_classifer = nn.Linear(512,self.nID)
22 |     def forward(self, x):
23 |         feature = self.feat_conv(x)
24 |         feature = self.conv_input_feat(feature)
25 | 
26 |         att_weights = torch.sigmoid(self.conv_att(feature))
27 | 
28 |         linear_feautres = []
29 |         for i in range(self.n_parts):
30 |             masked_feature = feature * torch.unsqueeze(att_weights[:, i], 1)
31 |             pooled_feature = F.avg_pool2d(masked_feature, masked_feature.size()[2:4])
32 |             linear_feautres.append(
33 |                 getattr(self, 'linear_feature{}'.format(i+1))(pooled_feature.view(pooled_feature.size(0), -1))
34 |             )
35 | 
36 |         concat_features = torch.cat(linear_feautres, 1)
37 |         normed_feature = concat_features / torch.clamp(torch.norm(concat_features, 2, 1, keepdim=True), min=1e-6)
38 |         out = self.id_classifer(normed_feature)
39 |         return normed_feature,out
40 | 


--------------------------------------------------------------------------------
/trackers/__init__.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | def track(tracker,args,orig_img,inps,boxes,hm,cropped_boxes,im_name,scores):
 4 |     hm = hm.cpu().data.numpy()
 5 |     online_targets = tracker.update(orig_img,inps,boxes,hm,cropped_boxes,im_name,scores,_debug=False)
 6 |     new_boxes,new_scores,new_ids,new_hm,new_crop = [],[],[],[],[]
 7 |     for t in online_targets:
 8 |         tlbr = t.tlbr
 9 |         tid = t.track_id
10 |         thm = t.pose
11 |         tcrop = t.crop_box
12 |         tscore = t.detscore
13 |         new_boxes.append(tlbr)
14 |         new_crop.append(tcrop)
15 |         new_hm.append(thm)
16 |         new_ids.append(tid)
17 |         new_scores.append(tscore)
18 | 
19 |     new_hm = torch.Tensor(new_hm).to(args.device)
20 |     return new_boxes,new_scores,new_ids,new_hm,new_crop
21 | 


--------------------------------------------------------------------------------
/trackers/tracker_cfg.py:
--------------------------------------------------------------------------------
 1 | from easydict import EasyDict as edict
 2 | cfg = edict()
 3 | cfg.nid = 1000
 4 | cfg.arch = "osnet_ain" # "osnet" or "res50-fc512"
 5 | cfg.loadmodel = "trackers/weights/osnet_ain_x1_0_msmt17_256x128_amsgrad_ep50_lr0.0015_coslr_b64_fb10_softmax_labsmth_flip_jitter.pth"
 6 | cfg.frame_rate =  30
 7 | cfg.track_buffer = 240 
 8 | cfg.conf_thres = 0.5
 9 | cfg.nms_thres = 0.4
10 | cfg.iou_thres = 0.5
11 | 


--------------------------------------------------------------------------------
/trackers/tracking/README.md:
--------------------------------------------------------------------------------
1 | ## Introduction
2 | Track Association part adapted from [Towards-Realtime-MOT](https://github.com/Zhongdao/Towards-Realtime-MOT), many thanks to their wonderful work!
3 | 


--------------------------------------------------------------------------------
/trackers/tracking/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/tracking/__init__.py


--------------------------------------------------------------------------------
/trackers/tracking/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 
54 | 


--------------------------------------------------------------------------------
/trackers/tracking/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/tracking/utils/__init__.py


--------------------------------------------------------------------------------
/trackers/tracking/utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import numpy as np
  4 | 
  5 | from utils.log import logger
  6 | 
  7 | 
  8 | def write_results(filename, results_dict: Dict, data_type: str):
  9 |     if not filename:
 10 |         return
 11 |     path = os.path.dirname(filename)
 12 |     if not os.path.exists(path):
 13 |         os.makedirs(path)
 14 | 
 15 |     if data_type in ('mot', 'mcmot', 'lab'):
 16 |         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 17 |     elif data_type == 'kitti':
 18 |         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 19 |     else:
 20 |         raise ValueError(data_type)
 21 | 
 22 |     with open(filename, 'w') as f:
 23 |         for frame_id, frame_data in results_dict.items():
 24 |             if data_type == 'kitti':
 25 |                 frame_id -= 1
 26 |             for tlwh, track_id in frame_data:
 27 |                 if track_id < 0:
 28 |                     continue
 29 |                 x1, y1, w, h = tlwh
 30 |                 x2, y2 = x1 + w, y1 + h
 31 |                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
 32 |                 f.write(line)
 33 |     logger.info('Save results to {}'.format(filename))
 34 | 
 35 | 
 36 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
 37 |     if data_type in ('mot', 'lab'):
 38 |         read_fun = read_mot_results
 39 |     else:
 40 |         raise ValueError('Unknown data type: {}'.format(data_type))
 41 | 
 42 |     return read_fun(filename, is_gt, is_ignore)
 43 | 
 44 | 
 45 | """
 46 | labels={'ped', ...			% 1
 47 | 'person_on_vhcl', ...	% 2
 48 | 'car', ...				% 3
 49 | 'bicycle', ...			% 4
 50 | 'mbike', ...			% 5
 51 | 'non_mot_vhcl', ...		% 6
 52 | 'static_person', ...	% 7
 53 | 'distractor', ...		% 8
 54 | 'occluder', ...			% 9
 55 | 'occluder_on_grnd', ...		%10
 56 | 'occluder_full', ...		% 11
 57 | 'reflection', ...		% 12
 58 | 'crowd' ...			% 13
 59 | };
 60 | """
 61 | 
 62 | 
 63 | def read_mot_results(filename, is_gt, is_ignore):
 64 |     valid_labels = {1}
 65 |     ignore_labels = {2, 7, 8, 12}
 66 |     results_dict = dict()
 67 |     if os.path.isfile(filename):
 68 |         with open(filename, 'r') as f:
 69 |             for line in f.readlines():
 70 |                 linelist = line.split(',')
 71 |                 if len(linelist) < 7:
 72 |                     continue
 73 |                 fid = int(linelist[0])
 74 |                 if fid < 1:
 75 |                     continue
 76 |                 results_dict.setdefault(fid, list())
 77 | 
 78 |                 if is_gt:
 79 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 80 |                         label = int(float(linelist[7]))
 81 |                         mark = int(float(linelist[6]))
 82 |                         if mark == 0 or label not in valid_labels:
 83 |                             continue
 84 |                     score = 1
 85 |                 elif is_ignore:
 86 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 87 |                         label = int(float(linelist[7]))
 88 |                         vis_ratio = float(linelist[8])
 89 |                         if label not in ignore_labels and vis_ratio >= 0:
 90 |                             continue
 91 |                     else:
 92 |                         continue
 93 |                     score = 1
 94 |                 else:
 95 |                     score = float(linelist[6])
 96 | 
 97 |                 tlwh = tuple(map(float, linelist[2:6]))
 98 |                 target_id = int(linelist[1])
 99 | 
100 |                 results_dict[fid].append((tlwh, target_id, score))
101 | 
102 |     return results_dict
103 | 
104 | 
105 | def unzip_objs(objs):
106 |     if len(objs) > 0:
107 |         tlwhs, ids, scores = zip(*objs)
108 |     else:
109 |         tlwhs, ids, scores = [], [], []
110 |     tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
111 | 
112 |     return tlwhs, ids, scores


--------------------------------------------------------------------------------
/trackers/tracking/utils/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from utils import _C
4 | 
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 | 


--------------------------------------------------------------------------------
/trackers/tracking/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | def parse_model_cfg(path):
 2 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 3 |     file = open(path, 'r')
 4 |     lines = file.read().split('\n')
 5 |     lines = [x for x in lines if x and not x.startswith('#')]
 6 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
 7 |     module_defs = []
 8 |     for line in lines:
 9 |         if line.startswith('['):  # This marks the start of a new block
10 |             module_defs.append({})
11 |             module_defs[-1]['type'] = line[1:-1].rstrip()
12 |             if module_defs[-1]['type'] == 'convolutional':
13 |                 module_defs[-1]['batch_normalize'] = 0
14 |         else:
15 |             key, value = line.split("=")
16 |             value = value.strip()
17 |             if value[0] == '$':
18 |                 value = module_defs[0].get(value.strip('$'), None)
19 |             module_defs[-1][key.rstrip()] = value.strip()
20 | 
21 |     return module_defs
22 | 
23 | 
24 | def parse_data_cfg(path):
25 |     """Parses the data configuration file"""
26 |     options = dict()
27 |     options['gpus'] = '0'
28 |     options['num_workers'] = '10'
29 |     with open(path, 'r') as fp:
30 |         lines = fp.readlines()
31 |     for line in lines:
32 |         line = line.strip()
33 |         if line == '' or line.startswith('#'):
34 |             continue
35 |         key, value = line.split('=')
36 |         options[key.strip()] = value.strip()
37 |     return options
38 | 


--------------------------------------------------------------------------------
/trackers/tracking/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |         self.duration = 0.
21 | 
22 |     def tic(self):
23 |         # using time.time instead of time.clock because time time.clock
24 |         # does not normalize for multithreading
25 |         self.start_time = time.time()
26 | 
27 |     def toc(self, average=True):
28 |         self.diff = time.time() - self.start_time
29 |         self.total_time += self.diff
30 |         self.calls += 1
31 |         self.average_time = self.total_time / self.calls
32 |         if average:
33 |             self.duration = self.average_time
34 |         else:
35 |             self.duration = self.diff
36 |         return self.duration
37 | 
38 |     def clear(self):
39 |         self.total_time = 0.
40 |         self.calls = 0
41 |         self.start_time = 0.
42 |         self.diff = 0.
43 |         self.average_time = 0.
44 |         self.duration = 0.
45 | 
46 | 


--------------------------------------------------------------------------------
/trackers/utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import numpy as np
  4 | 
  5 | from utils.log import logger
  6 | 
  7 | 
  8 | def write_results(filename, results_dict: Dict, data_type: str):
  9 |     if not filename:
 10 |         return
 11 |     path = os.path.dirname(filename)
 12 |     if not os.path.exists(path):
 13 |         os.makedirs(path)
 14 | 
 15 |     if data_type in ('mot', 'mcmot', 'lab'):
 16 |         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 17 |     elif data_type == 'kitti':
 18 |         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 19 |     else:
 20 |         raise ValueError(data_type)
 21 | 
 22 |     with open(filename, 'w') as f:
 23 |         for frame_id, frame_data in results_dict.items():
 24 |             if data_type == 'kitti':
 25 |                 frame_id -= 1
 26 |             for tlwh, track_id in frame_data:
 27 |                 if track_id < 0:
 28 |                     continue
 29 |                 x1, y1, w, h = tlwh
 30 |                 x2, y2 = x1 + w, y1 + h
 31 |                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
 32 |                 f.write(line)
 33 |     logger.info('Save results to {}'.format(filename))
 34 | 
 35 | 
 36 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
 37 |     if data_type in ('mot', 'lab'):
 38 |         read_fun = read_mot_results
 39 |     else:
 40 |         raise ValueError('Unknown data type: {}'.format(data_type))
 41 | 
 42 |     return read_fun(filename, is_gt, is_ignore)
 43 | 
 44 | 
 45 | """
 46 | labels={'ped', ...			% 1
 47 | 'person_on_vhcl', ...	% 2
 48 | 'car', ...				% 3
 49 | 'bicycle', ...			% 4
 50 | 'mbike', ...			% 5
 51 | 'non_mot_vhcl', ...		% 6
 52 | 'static_person', ...	% 7
 53 | 'distractor', ...		% 8
 54 | 'occluder', ...			% 9
 55 | 'occluder_on_grnd', ...		%10
 56 | 'occluder_full', ...		% 11
 57 | 'reflection', ...		% 12
 58 | 'crowd' ...			% 13
 59 | };
 60 | """
 61 | 
 62 | 
 63 | def read_mot_results(filename, is_gt, is_ignore):
 64 |     valid_labels = {1}
 65 |     ignore_labels = {2, 7, 8, 12}
 66 |     results_dict = dict()
 67 |     if os.path.isfile(filename):
 68 |         with open(filename, 'r') as f:
 69 |             for line in f.readlines():
 70 |                 linelist = line.split(',')
 71 |                 if len(linelist) < 7:
 72 |                     continue
 73 |                 fid = int(linelist[0])
 74 |                 if fid < 1:
 75 |                     continue
 76 |                 results_dict.setdefault(fid, list())
 77 | 
 78 |                 if is_gt:
 79 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 80 |                         label = int(float(linelist[7]))
 81 |                         mark = int(float(linelist[6]))
 82 |                         if mark == 0 or label not in valid_labels:
 83 |                             continue
 84 |                     score = 1
 85 |                 elif is_ignore:
 86 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 87 |                         label = int(float(linelist[7]))
 88 |                         vis_ratio = float(linelist[8])
 89 |                         if label not in ignore_labels and vis_ratio >= 0:
 90 |                             continue
 91 |                     else:
 92 |                         continue
 93 |                     score = 1
 94 |                 else:
 95 |                     score = float(linelist[6])
 96 | 
 97 |                 tlwh = tuple(map(float, linelist[2:6]))
 98 |                 target_id = int(linelist[1])
 99 | 
100 |                 results_dict[fid].append((tlwh, target_id, score))
101 | 
102 |     return results_dict
103 | 
104 | 
105 | def unzip_objs(objs):
106 |     if len(objs) > 0:
107 |         tlwhs, ids, scores = zip(*objs)
108 |     else:
109 |         tlwhs, ids, scores = [], [], []
110 |     tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
111 | 
112 |     return tlwhs, ids, scores


--------------------------------------------------------------------------------
/trackers/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name='root'):
 5 |     formatter = logging.Formatter(
 6 |         # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
 7 |         fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
 8 | 
 9 |     handler = logging.StreamHandler()
10 |     handler.setFormatter(formatter)
11 | 
12 |     logger = logging.getLogger(name)
13 |     logger.setLevel(logging.DEBUG)
14 |     logger.addHandler(handler)
15 |     return logger
16 | 
17 | 
18 | logger = get_logger('root')
19 | 


--------------------------------------------------------------------------------
/trackers/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | def parse_model_cfg(path):
 2 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 3 |     file = open(path, 'r')
 4 |     lines = file.read().split('\n')
 5 |     lines = [x for x in lines if x and not x.startswith('#')]
 6 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
 7 |     module_defs = []
 8 |     for line in lines:
 9 |         if line.startswith('['):  # This marks the start of a new block
10 |             module_defs.append({})
11 |             module_defs[-1]['type'] = line[1:-1].rstrip()
12 |             if module_defs[-1]['type'] == 'convolutional':
13 |                 module_defs[-1]['batch_normalize'] = 0
14 |         else:
15 |             key, value = line.split("=")
16 |             value = value.strip()
17 |             if value[0] == '$':
18 |                 value = module_defs[0].get(value.strip('$'), None)
19 |             module_defs[-1][key.rstrip()] = value
20 | 
21 |     return module_defs
22 | 
23 | 
24 | def parse_data_cfg(path):
25 |     """Parses the data configuration file"""
26 |     options = dict()
27 |     options['gpus'] = '0'
28 |     options['num_workers'] = '10'
29 |     with open(path, 'r') as fp:
30 |         lines = fp.readlines()
31 |     for line in lines:
32 |         line = line.strip()
33 |         if line == '' or line.startswith('#'):
34 |             continue
35 |         key, value = line.split('=')
36 |         options[key.strip()] = value.strip()
37 |     return options
38 | 


--------------------------------------------------------------------------------
/trackers/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |         self.duration = 0.
21 | 
22 |     def tic(self):
23 |         # using time.time instead of time.clock because time time.clock
24 |         # does not normalize for multithreading
25 |         self.start_time = time.time()
26 | 
27 |     def toc(self, average=True):
28 |         self.diff = time.time() - self.start_time
29 |         self.total_time += self.diff
30 |         self.calls += 1
31 |         self.average_time = self.total_time / self.calls
32 |         if average:
33 |             self.duration = self.average_time
34 |         else:
35 |             self.duration = self.diff
36 |         return self.duration
37 | 
38 |     def clear(self):
39 |         self.total_time = 0.
40 |         self.calls = 0
41 |         self.start_time = 0.
42 |         self.diff = 0.
43 |         self.average_time = 0.
44 |         self.duration = 0.
45 | 
46 | 


--------------------------------------------------------------------------------