├── .gitignore ├── README.md ├── alphapose ├── __init__.py ├── datasets │ ├── __init__.py │ ├── coco_det.py │ ├── coco_wholebody.py │ ├── coco_wholebody_det.py │ ├── concat_dataset.py │ ├── custom.py │ ├── halpe_136.py │ ├── halpe_136_det.py │ ├── halpe_26.py │ ├── halpe_26_det.py │ ├── mpii.py │ └── mscoco.py ├── models │ ├── __init__.py │ ├── builder.py │ ├── criterion.py │ ├── fastpose.py │ ├── fastpose_duc.py │ ├── fastpose_duc_dense.py │ ├── hardnet.py │ ├── hrnet.py │ ├── layers │ │ ├── DUC.py │ │ ├── PixelUnshuffle.py │ │ ├── Resnet.py │ │ ├── SE_Resnet.py │ │ ├── SE_module.py │ │ ├── ShuffleResnet.py │ │ └── dcn │ │ │ ├── DCN.py │ │ │ ├── __init__.py │ │ │ ├── deform_conv.py │ │ │ ├── deform_pool.py │ │ │ └── src │ │ │ ├── deform_conv_cuda.cpp │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ ├── deform_pool_cuda.cpp │ │ │ └── deform_pool_cuda_kernel.cu │ └── simplepose.py ├── opt.py ├── utils │ ├── __init__.py │ ├── bbox.py │ ├── config.py │ ├── detector.py │ ├── env.py │ ├── file_detector.py │ ├── logger.py │ ├── metrics.py │ ├── pPose_nms.py │ ├── presets │ │ ├── __init__.py │ │ └── simple_transform.py │ ├── registry.py │ ├── roi_align │ │ ├── __init__.py │ │ ├── roi_align.py │ │ └── src │ │ │ ├── roi_align_cuda.cpp │ │ │ └── roi_align_kernel.cu │ ├── transforms.py │ ├── vis.py │ ├── webcam_detector.py │ └── writer.py └── version.py ├── configs ├── coco │ ├── hardnet │ │ ├── 256x192_hard68_lr1e-3_1x.yaml │ │ └── 256x192_hard85_lr1e-3_1x.yaml │ ├── hrnet │ │ └── 256x192_w32_lr1e-3.yaml │ └── resnet │ │ ├── 256x192_res152_lr1e-3_1x-duc.yaml │ │ ├── 256x192_res50_lr1e-3_1x-concat.yaml │ │ ├── 256x192_res50_lr1e-3_1x-duc.yaml │ │ ├── 256x192_res50_lr1e-3_1x-simple.yaml │ │ ├── 256x192_res50_lr1e-3_1x.yaml │ │ ├── 256x192_res50_lr1e-3_2x-dcn.yaml │ │ ├── 256x192_res50_lr1e-3_2x-regression.yaml │ │ └── 256x192_res50_lr1e-3_2x.yaml ├── dense_coco │ └── resnet50 │ │ └── 256x192_adam_lr1e-3-duc-dcn_1x_crop.yaml ├── halpe_136 │ ├── hardnet │ │ └── 256x192_hard68_lr1e-3_1x.yaml │ └── resnet │ │ ├── 256x192_res50_lr1e-3_1x.yaml │ │ └── 256x192_res50_lr1e-3_2x-regression.yaml └── halpe_26 │ └── resnet │ └── 256x192_res50_lr1e-3_1x.yaml ├── detector ├── apis.py ├── effdet_api.py ├── effdet_cfg.py ├── efficientdet │ ├── README.md │ ├── effdet │ │ ├── __init__.py │ │ ├── anchors.py │ │ ├── bench.py │ │ ├── config │ │ │ └── config.py │ │ ├── efficientdet.py │ │ ├── helpers.py │ │ └── object_detection │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── argmax_matcher.py │ │ │ ├── box_coder.py │ │ │ ├── box_list.py │ │ │ ├── faster_rcnn_box_coder.py │ │ │ ├── matcher.py │ │ │ ├── region_similarity_calculator.py │ │ │ └── target_assigner.py │ ├── utils.py │ └── weights │ │ └── get_models.sh ├── nms │ ├── __init__.py │ ├── nms_wrapper.py │ └── src │ │ ├── nms_cpu.cpp │ │ ├── nms_cuda.cpp │ │ ├── nms_kernel.cu │ │ ├── soft_nms_cpu.cpp │ │ └── soft_nms_cpu.pyx ├── tracker │ ├── README.md │ ├── __init__.py │ ├── cfg │ │ ├── ccmcpe.json │ │ └── yolov3.cfg │ ├── models.py │ ├── preprocess.py │ ├── tracker │ │ ├── __init__.py │ │ ├── basetrack.py │ │ ├── matching.py │ │ └── multitracker.py │ └── utils │ │ ├── __init__.py │ │ ├── datasets.py │ │ ├── evaluation.py │ │ ├── io.py │ │ ├── kalman_filter.py │ │ ├── log.py │ │ ├── nms.py │ │ ├── parse_config.py │ │ ├── timer.py │ │ ├── utils.py │ │ └── visualization.py ├── tracker_api.py ├── tracker_cfg.py ├── yolo │ ├── README.md │ ├── __init__.py │ ├── bbox.py │ ├── cam_demo.py │ ├── cfg │ │ ├── tiny-yolo-voc.cfg │ │ ├── yolo-voc.cfg │ │ ├── yolo.cfg │ │ ├── yolov3-spp.cfg │ │ └── yolov3.cfg │ ├── darknet.py │ ├── detect.py │ ├── pallete │ ├── preprocess.py │ ├── util.py │ ├── video_demo.py │ └── video_demo_half.py ├── yolo_api.py └── yolo_cfg.py ├── examples ├── demo │ ├── Copy of climbing_106.jpg │ ├── Copy of climbing_269.jpg │ ├── Copy of climbing_62.jpg │ ├── Copy of standing_147.jpg │ ├── Copy of standing_153.jpg │ └── Copy of standing_29.jpg ├── list-coco-demo.txt ├── list-coco-minival500.txt ├── list-coco-val5000.txt └── res │ ├── alphapose-results.json │ ├── final_xgboost_home_security_scaler_model.pickle │ └── final_xgboost_home_securuty_prediction_model_21jan_new_89_87.pickle ├── json_data_preprocessing_colab └── json_data_processing_for_Home_Security.ipynb ├── pretrained_models └── get_models.sh ├── scripts ├── demo_api.py ├── demo_inference.py ├── inference.sh ├── train.py ├── train.sh ├── validate.py └── validate.sh ├── setup.cfg ├── setup.py └── trackers ├── PoseFlow ├── README.md ├── alpha-pose-results-sample.json ├── matching.py ├── parallel_process.py ├── poseflow_infer.py ├── posetrack1.gif ├── posetrack2.gif ├── posetrack_data ├── poseval ├── requirements.txt ├── tracker-baseline.py ├── tracker-general.py └── utils.py ├── README.md ├── ReidModels ├── ResBnLin.py ├── ResNet.py ├── __init__.py ├── backbone │ ├── __init__.py │ ├── googlenet.py │ ├── lrn.py │ └── sqeezenet.py ├── bn_linear.py ├── classification │ ├── __init__.py │ ├── classifier.py │ └── rfcn_cls.py ├── net_utils.py ├── osnet.py ├── osnet_ain.py ├── psroi_pooling │ ├── __init__.py │ ├── _ext │ │ ├── __init__.py │ │ └── psroi_pooling │ │ │ └── __init__.py │ ├── build.py │ ├── functions │ │ ├── __init__.py │ │ └── psroi_pooling.py │ ├── make.sh │ ├── modules │ │ ├── __init__.py │ │ └── psroi_pool.py │ └── src │ │ ├── cuda │ │ ├── psroi_pooling_kernel.cu │ │ └── psroi_pooling_kernel.h │ │ ├── psroi_pooling_cuda.c │ │ └── psroi_pooling_cuda.h ├── reid │ ├── __init__.py │ └── image_part_aligned.py └── resnet_fc.py ├── __init__.py ├── tracker_api.py ├── tracker_cfg.py ├── tracking ├── README.md ├── __init__.py ├── basetrack.py ├── matching.py └── utils │ ├── __init__.py │ ├── io.py │ ├── kalman_filter.py │ ├── nms.py │ ├── parse_config.py │ ├── timer.py │ └── utils.py └── utils ├── basetransforms.py ├── bbox.py ├── io.py ├── kalman_filter.py ├── log.py ├── parse_config.py ├── timer.py ├── transform.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | local_settings.py 56 | db.sqlite3 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # Environments 84 | .env 85 | .venv 86 | env/ 87 | venv/ 88 | ENV/ 89 | env.bak/ 90 | venv.bak/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | .spyproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | 99 | # mkdocs documentation 100 | /site 101 | 102 | # mypy 103 | .mypy_cache/ 104 | .vscode 105 | .tensorboard 106 | exp/coco* 107 | *.pth 108 | *.weights 109 | exp/json/test_kpt.json 110 | exp/json/test_gt_kpt.json 111 | exp/json/validate_rcnn_kpt.json 112 | exp/json/validate_gt_kpt.json 113 | data/ 114 | tmp/ 115 | exp/json 116 | tmp_*/ 117 | example/res/ -------------------------------------------------------------------------------- /alphapose/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /alphapose/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_det import Mscoco_det 2 | from .concat_dataset import ConcatDataset 3 | from .custom import CustomDataset 4 | from .mscoco import Mscoco 5 | from .mpii import Mpii 6 | from .halpe_26 import Halpe_26 7 | from .halpe_136 import Halpe_136 8 | from .halpe_136_det import Halpe_136_det 9 | from .halpe_26_det import Halpe_26_det 10 | __all__ = ['CustomDataset', 'Halpe_136', 'Halpe_26_det', 'Halpe_136_det', 'Halpe_26', 'Mscoco', 'Mscoco_det', 'Mpii', 'ConcatDataset', 'coco_wholebody', 'coco_wholebody_det'] 11 | -------------------------------------------------------------------------------- /alphapose/datasets/coco_det.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | """MS COCO Human Detection Box dataset.""" 7 | import json 8 | import os 9 | 10 | import cv2 11 | import torch 12 | import torch.utils.data as data 13 | from tqdm import tqdm 14 | 15 | from alphapose.utils.presets import SimpleTransform 16 | from detector.apis import get_detector 17 | from alphapose.models.builder import DATASET 18 | 19 | 20 | @DATASET.register_module 21 | class Mscoco_det(data.Dataset): 22 | """ COCO human detection box dataset. 23 | 24 | """ 25 | EVAL_JOINTS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] 26 | 27 | def __init__(self, 28 | det_file=None, 29 | opt=None, 30 | **cfg): 31 | 32 | self._cfg = cfg 33 | self._opt = opt 34 | self._preset_cfg = cfg['PRESET'] 35 | self._root = cfg['ROOT'] 36 | self._img_prefix = cfg['IMG_PREFIX'] 37 | if not det_file: 38 | det_file = cfg['DET_FILE'] 39 | self._ann_file = os.path.join(self._root, cfg['ANN']) 40 | 41 | if os.path.exists(det_file): 42 | print("Detection results exist, will use it") 43 | else: 44 | print("Will create detection results to {}".format(det_file)) 45 | self.write_coco_json(det_file) 46 | 47 | assert os.path.exists(det_file), "Error: no detection results found" 48 | with open(det_file, 'r') as fid: 49 | self._det_json = json.load(fid) 50 | 51 | self._input_size = self._preset_cfg['IMAGE_SIZE'] 52 | self._output_size = self._preset_cfg['HEATMAP_SIZE'] 53 | 54 | self._sigma = self._preset_cfg['SIGMA'] 55 | 56 | if self._preset_cfg['TYPE'] == 'simple': 57 | self.transformation = SimpleTransform( 58 | self, scale_factor=0, 59 | input_size=self._input_size, 60 | output_size=self._output_size, 61 | rot=0, sigma=self._sigma, 62 | train=False, add_dpg=False) 63 | 64 | def __getitem__(self, index): 65 | det_res = self._det_json[index] 66 | if not isinstance(det_res['image_id'], int): 67 | img_id, _ = os.path.splitext(os.path.basename(det_res['image_id'])) 68 | img_id = int(img_id) 69 | else: 70 | img_id = det_res['image_id'] 71 | img_path = './data/coco/val2017/%012d.jpg' % img_id 72 | 73 | # Load image 74 | image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) #scipy.misc.imread(img_path, mode='RGB') 75 | 76 | imght, imgwidth = image.shape[1], image.shape[2] 77 | x1, y1, w, h = det_res['bbox'] 78 | bbox = [x1, y1, x1 + w, y1 + h] 79 | inp, bbox = self.transformation.test_transform(image, bbox) 80 | return inp, torch.Tensor(bbox), torch.Tensor([det_res['bbox']]), torch.Tensor([det_res['image_id']]), torch.Tensor([det_res['score']]), torch.Tensor([imght]), torch.Tensor([imgwidth]) 81 | 82 | def __len__(self): 83 | return len(self._det_json) 84 | 85 | def write_coco_json(self, det_file): 86 | from pycocotools.coco import COCO 87 | import pathlib 88 | 89 | _coco = COCO(self._ann_file) 90 | image_ids = sorted(_coco.getImgIds()) 91 | det_model = get_detector(self._opt) 92 | dets = [] 93 | for entry in tqdm(_coco.loadImgs(image_ids)): 94 | abs_path = os.path.join( 95 | self._root, self._img_prefix, entry['file_name']) 96 | det = det_model.detect_one_img(abs_path) 97 | if det: 98 | dets += det 99 | pathlib.Path(os.path.split(det_file)[0]).mkdir(parents=True, exist_ok=True) 100 | json.dump(dets, open(det_file, 'w')) 101 | 102 | @property 103 | def joint_pairs(self): 104 | """Joint pairs which defines the pairs of joint to be swapped 105 | when the image is flipped horizontally.""" 106 | return [[1, 2], [3, 4], [5, 6], [7, 8], 107 | [9, 10], [11, 12], [13, 14], [15, 16]] 108 | -------------------------------------------------------------------------------- /alphapose/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import bisect 7 | 8 | import torch 9 | import torch.utils.data as data 10 | 11 | from alphapose.models.builder import DATASET, build_dataset 12 | 13 | 14 | @DATASET.register_module 15 | class ConcatDataset(data.Dataset): 16 | """Custom Concat dataset. 17 | Annotation file must be in `coco` format. 18 | 19 | Parameters 20 | ---------- 21 | train: bool, default is True 22 | If true, will set as training mode. 23 | dpg: bool, default is False 24 | If true, will activate `dpg` for data augmentation. 25 | skip_empty: bool, default is False 26 | Whether skip entire image if no valid label is found. 27 | cfg: dict, dataset configuration. 28 | """ 29 | 30 | def __init__(self, 31 | train=True, 32 | dpg=False, 33 | skip_empty=True, 34 | **cfg): 35 | 36 | self._cfg = cfg 37 | self._subset_cfg_list = cfg['SET_LIST'] 38 | self._preset_cfg = cfg['PRESET'] 39 | self._mask_id = [item['MASK_ID'] for item in self._subset_cfg_list] 40 | 41 | self.num_joints = self._preset_cfg['NUM_JOINTS'] 42 | 43 | self._subsets = [] 44 | self._subset_size = [0] 45 | for _subset_cfg in self._subset_cfg_list: 46 | subset = build_dataset(_subset_cfg, preset_cfg=self._preset_cfg, train=train) 47 | self._subsets.append(subset) 48 | self._subset_size.append(len(subset)) 49 | self.cumulative_sizes = self.cumsum(self._subset_size) 50 | 51 | def __getitem__(self, idx): 52 | assert idx >= 0 53 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 54 | dataset_idx -= 1 55 | sample_idx = idx - self.cumulative_sizes[dataset_idx] 56 | 57 | sample = self._subsets[dataset_idx][sample_idx] 58 | img, label, label_mask, img_id, bbox = sample 59 | 60 | K = label.shape[0] # num_joints from `_subsets[dataset_idx]` 61 | expend_label = torch.zeros((self.num_joints, *label.shape[1:]), dtype=label.dtype) 62 | expend_label_mask = torch.zeros((self.num_joints, *label_mask.shape[1:]), dtype=label_mask.dtype) 63 | expend_label[self._mask_id[dataset_idx]:self._mask_id[dataset_idx] + K] = label 64 | expend_label_mask[self._mask_id[dataset_idx]:self._mask_id[dataset_idx] + K] = label_mask 65 | 66 | return img, expend_label, expend_label_mask, img_id, bbox 67 | 68 | def __len__(self): 69 | return self.cumulative_sizes[-1] 70 | 71 | @staticmethod 72 | def cumsum(sequence): 73 | r, s = [], 0 74 | for e in sequence: 75 | r.append(e + s) 76 | s += e 77 | return r 78 | -------------------------------------------------------------------------------- /alphapose/datasets/halpe_26_det.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by HaoyiZhu 4 | # ----------------------------------------------------- 5 | 6 | """Haple_26 Human Detection Box dataset.""" 7 | import json 8 | import os 9 | 10 | import cv2 11 | import torch 12 | import torch.utils.data as data 13 | from tqdm import tqdm 14 | 15 | from alphapose.utils.presets import SimpleTransform 16 | from detector.apis import get_detector 17 | from alphapose.models.builder import DATASET 18 | 19 | 20 | @DATASET.register_module 21 | class Halpe_26_det(data.Dataset): 22 | """ Halpe_26 human detection box dataset. 23 | 24 | """ 25 | EVAL_JOINTS = list(range(26)) 26 | 27 | def __init__(self, 28 | det_file=None, 29 | opt=None, 30 | **cfg): 31 | 32 | self._cfg = cfg 33 | self._opt = opt 34 | self._preset_cfg = cfg['PRESET'] 35 | self._root = cfg['ROOT'] 36 | self._img_prefix = cfg['IMG_PREFIX'] 37 | if not det_file: 38 | det_file = cfg['DET_FILE'] 39 | self._ann_file = os.path.join(self._root, cfg['ANN']) 40 | 41 | if os.path.exists(det_file): 42 | print("Detection results exist, will use it") 43 | else: 44 | print("Will create detection results to {}".format(det_file)) 45 | self.write_coco_json(det_file) 46 | 47 | assert os.path.exists(det_file), "Error: no detection results found" 48 | with open(det_file, 'r') as fid: 49 | self._det_json = json.load(fid) 50 | 51 | self._input_size = self._preset_cfg['IMAGE_SIZE'] 52 | self._output_size = self._preset_cfg['HEATMAP_SIZE'] 53 | 54 | self._sigma = self._preset_cfg['SIGMA'] 55 | 56 | if self._preset_cfg['TYPE'] == 'simple': 57 | self.transformation = SimpleTransform( 58 | self, scale_factor=0, 59 | input_size=self._input_size, 60 | output_size=self._output_size, 61 | rot=0, sigma=self._sigma, 62 | train=False, add_dpg=False) 63 | 64 | def __getitem__(self, index): 65 | det_res = self._det_json[index] 66 | if not isinstance(det_res['image_id'], int): 67 | img_id, _ = os.path.splitext(os.path.basename(det_res['image_id'])) 68 | img_id = int(img_id) 69 | else: 70 | img_id = det_res['image_id'] 71 | img_path = '/DATA1/Benchmark/coco/val2017/%012d.jpg' % img_id 72 | 73 | # Load image 74 | image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) #scipy.misc.imread(img_path, mode='RGB') 75 | 76 | imght, imgwidth = image.shape[1], image.shape[2] 77 | x1, y1, w, h = det_res['bbox'] 78 | bbox = [x1, y1, x1 + w, y1 + h] 79 | inp, bbox = self.transformation.test_transform(image, bbox) 80 | return inp, torch.Tensor(bbox), torch.Tensor([det_res['bbox']]), torch.Tensor([det_res['image_id']]), torch.Tensor([det_res['score']]), torch.Tensor([imght]), torch.Tensor([imgwidth]) 81 | 82 | def __len__(self): 83 | return len(self._det_json) 84 | 85 | def write_coco_json(self, det_file): 86 | from pycocotools.coco import COCO 87 | import pathlib 88 | 89 | _coco = COCO(self._ann_file) 90 | image_ids = sorted(_coco.getImgIds()) 91 | det_model = get_detector(self._opt) 92 | dets = [] 93 | for entry in tqdm(_coco.loadImgs(image_ids)): 94 | abs_path = os.path.join( 95 | '/DATA1/Benchmark/coco', self._img_prefix, entry['file_name']) 96 | det = det_model.detect_one_img(abs_path) 97 | if det: 98 | dets += det 99 | pathlib.Path(os.path.split(det_file)[0]).mkdir(parents=True, exist_ok=True) 100 | json.dump(dets, open(det_file, 'w')) 101 | 102 | @property 103 | def joint_pairs(self): 104 | """Joint pairs which defines the pairs of joint to be swapped 105 | when the image is flipped horizontally.""" 106 | return[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], 107 | [20, 21], [22, 23], [24, 25]] 108 | -------------------------------------------------------------------------------- /alphapose/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .fastpose import FastPose 2 | from .fastpose_duc import FastPose_DUC 3 | from .hrnet import PoseHighResolutionNet 4 | from .simplepose import SimplePose 5 | from .fastpose_duc_dense import FastPose_DUC_Dense 6 | from .hardnet import HarDNetPose 7 | from .criterion import L1JointRegression 8 | 9 | __all__ = ['FastPose', 'SimplePose', 'PoseHighResolutionNet', 10 | 'FastPose_DUC', 'FastPose_DUC_Dense', 'HarDNetPose', 'L1JointRegression'] 11 | -------------------------------------------------------------------------------- /alphapose/models/builder.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from alphapose.utils import Registry, build_from_cfg, retrieve_from_cfg 4 | 5 | 6 | SPPE = Registry('sppe') 7 | LOSS = Registry('loss') 8 | DATASET = Registry('dataset') 9 | 10 | 11 | def build(cfg, registry, default_args=None): 12 | if isinstance(cfg, list): 13 | modules = [ 14 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg 15 | ] 16 | return nn.Sequential(*modules) 17 | else: 18 | return build_from_cfg(cfg, registry, default_args) 19 | 20 | 21 | def build_sppe(cfg, preset_cfg, **kwargs): 22 | default_args = { 23 | 'PRESET': preset_cfg, 24 | } 25 | for key, value in kwargs.items(): 26 | default_args[key] = value 27 | return build(cfg, SPPE, default_args=default_args) 28 | 29 | 30 | def build_loss(cfg): 31 | return build(cfg, LOSS) 32 | 33 | 34 | def build_dataset(cfg, preset_cfg, **kwargs): 35 | exec(f'from ..datasets import {cfg.TYPE}') 36 | default_args = { 37 | 'PRESET': preset_cfg, 38 | } 39 | for key, value in kwargs.items(): 40 | default_args[key] = value 41 | return build(cfg, DATASET, default_args=default_args) 42 | 43 | 44 | def retrieve_dataset(cfg): 45 | exec(f'from ..datasets import {cfg.TYPE}') 46 | return retrieve_from_cfg(cfg, DATASET) 47 | -------------------------------------------------------------------------------- /alphapose/models/criterion.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .builder import LOSS 9 | 10 | from alphapose.utils.transforms import _integral_tensor 11 | 12 | 13 | class IngetralCoordinate(torch.autograd.Function): 14 | ''' Symmetry integral regression function. 15 | ''' 16 | AMPLITUDE = 2 17 | 18 | @staticmethod 19 | def forward(ctx, input): 20 | assert isinstance( 21 | input, torch.Tensor), 'IngetralCoordinate only takes input as torch.Tensor' 22 | input_size = input.size() 23 | weight = torch.arange( 24 | input_size[-1], dtype=input.dtype, layout=input.layout, device=input.device) 25 | ctx.input_size = input_size 26 | output = input.mul(weight) 27 | ctx.save_for_backward(weight, output) 28 | 29 | return output 30 | 31 | @staticmethod 32 | def backward(ctx, grad_output): 33 | weight, output = ctx.saved_tensors 34 | output_coord = output.sum(dim=2, keepdim=True) 35 | weight = weight[None, None, :].repeat( 36 | output_coord.shape[0], output_coord.shape[1], 1) 37 | weight_mask = torch.ones(weight.shape, dtype=grad_output.dtype, 38 | layout=grad_output.layout, device=grad_output.device) 39 | weight_mask[weight < output_coord] = -1 40 | weight_mask[output_coord.repeat( 41 | 1, 1, weight.shape[-1]) > ctx.input_size[-1]] = 1 42 | weight_mask *= IngetralCoordinate.AMPLITUDE 43 | return grad_output.mul(weight_mask) 44 | 45 | 46 | @LOSS.register_module 47 | class L1JointRegression(nn.Module): 48 | ''' L1 Joint Regression Loss 49 | ''' 50 | def __init__(self, OUTPUT_3D=False, size_average=True, reduce=True, NORM_TYPE='softmax'): 51 | super(L1JointRegression, self).__init__() 52 | self.size_average = size_average 53 | self.reduce = reduce 54 | self.output_3d = OUTPUT_3D 55 | self.norm_type = NORM_TYPE 56 | 57 | self.integral_operation = IngetralCoordinate.apply 58 | 59 | def forward(self, preds, *args): 60 | gt_joints = args[0] 61 | gt_joints_vis = args[1] 62 | 63 | if self.output_3d: 64 | num_joints = int(gt_joints_vis.shape[1] / 3) 65 | else: 66 | num_joints = int(gt_joints_vis.shape[1] / 2) 67 | hm_width = preds.shape[-1] 68 | hm_height = preds.shape[-2] 69 | hm_depth = preds.shape[-3] // num_joints if self.output_3d else 1 70 | 71 | pred_jts, pred_scores = _integral_tensor( 72 | preds, num_joints, self.output_3d, hm_width, hm_height, hm_depth, integral_operation=self.integral_operation, norm_type=self.norm_type) 73 | 74 | _assert_no_grad(gt_joints) 75 | _assert_no_grad(gt_joints_vis) 76 | return weighted_l1_loss(pred_jts, pred_scores, gt_joints, gt_joints_vis, self.size_average) 77 | 78 | 79 | def _assert_no_grad(tensor): 80 | assert not tensor.requires_grad, \ 81 | "nn criterions don't compute the gradient w.r.t. targets - please " \ 82 | "mark these tensors as not requiring gradients" 83 | 84 | 85 | def weighted_l1_loss(input, scores, target, weights, size_average): 86 | out = torch.abs(input - target) 87 | out = out * weights 88 | #out_of_scores = torch.abs(scores - torch.ones_like(scores)) 89 | #out_of_scores = out_of_scores.reshape((out_of_scores.shape[0], -1)) 90 | #out_of_scores = out_of_scores * weights[:, 0::2] 91 | if size_average: 92 | return out.sum() / len(input) 93 | else: 94 | return out.sum() 95 | 96 | 97 | LOSS.register_module(torch.nn.MSELoss) 98 | -------------------------------------------------------------------------------- /alphapose/models/fastpose.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch.nn as nn 7 | 8 | from .builder import SPPE 9 | from .layers.DUC import DUC 10 | from .layers.SE_Resnet import SEResnet 11 | 12 | 13 | @SPPE.register_module 14 | class FastPose(nn.Module): 15 | 16 | def __init__(self, norm_layer=nn.BatchNorm2d, **cfg): 17 | super(FastPose, self).__init__() 18 | self._preset_cfg = cfg['PRESET'] 19 | if 'CONV_DIM' in cfg.keys(): 20 | self.conv_dim = cfg['CONV_DIM'] 21 | else: 22 | self.conv_dim = 128 23 | if 'DCN' in cfg.keys(): 24 | stage_with_dcn = cfg['STAGE_WITH_DCN'] 25 | dcn = cfg['DCN'] 26 | self.preact = SEResnet( 27 | f"resnet{cfg['NUM_LAYERS']}", dcn=dcn, stage_with_dcn=stage_with_dcn) 28 | else: 29 | self.preact = SEResnet(f"resnet{cfg['NUM_LAYERS']}") 30 | 31 | # Imagenet pretrain model 32 | import torchvision.models as tm # noqa: F401,F403 33 | assert cfg['NUM_LAYERS'] in [18, 34, 50, 101, 152] 34 | x = eval(f"tm.resnet{cfg['NUM_LAYERS']}(pretrained=True)") 35 | 36 | model_state = self.preact.state_dict() 37 | state = {k: v for k, v in x.state_dict().items() 38 | if k in self.preact.state_dict() and v.size() == self.preact.state_dict()[k].size()} 39 | model_state.update(state) 40 | self.preact.load_state_dict(model_state) 41 | 42 | self.suffle1 = nn.PixelShuffle(2) 43 | self.duc1 = DUC(512, 1024, upscale_factor=2, norm_layer=norm_layer) 44 | if self.conv_dim == 256: 45 | self.duc2 = DUC(256, 1024, upscale_factor=2, norm_layer=norm_layer) 46 | else: 47 | self.duc2 = DUC(256, 512, upscale_factor=2, norm_layer=norm_layer) 48 | self.conv_out = nn.Conv2d( 49 | self.conv_dim, self._preset_cfg['NUM_JOINTS'], kernel_size=3, stride=1, padding=1) 50 | 51 | def forward(self, x): 52 | out = self.preact(x) 53 | out = self.suffle1(out) 54 | out = self.duc1(out) 55 | out = self.duc2(out) 56 | 57 | out = self.conv_out(out) 58 | return out 59 | 60 | def _initialize(self): 61 | for m in self.conv_out.modules(): 62 | if isinstance(m, nn.Conv2d): 63 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 64 | # logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 65 | # logger.info('=> init {}.bias as 0'.format(name)) 66 | nn.init.normal_(m.weight, std=0.001) 67 | nn.init.constant_(m.bias, 0) 68 | -------------------------------------------------------------------------------- /alphapose/models/fastpose_duc.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch.nn as nn 7 | 8 | from .builder import SPPE 9 | from .layers.Resnet import ResNet 10 | from .layers.SE_Resnet import SEResnet 11 | from .layers.ShuffleResnet import ShuffleResnet 12 | 13 | 14 | @SPPE.register_module 15 | class FastPose_DUC(nn.Module): 16 | conv_dim = 256 17 | 18 | def __init__(self, norm_layer=nn.BatchNorm2d, **cfg): 19 | super(FastPose_DUC, self).__init__() 20 | self._preset_cfg = cfg['PRESET'] 21 | if cfg['BACKBONE'] == 'shuffle': 22 | print('Load shuffle backbone...') 23 | backbone = ShuffleResnet 24 | elif cfg['BACKBONE'] == 'se-resnet': 25 | print('Load SE Resnet...') 26 | backbone = SEResnet 27 | else: 28 | print('Load Resnet...') 29 | backbone = ResNet 30 | 31 | if 'DCN' in cfg.keys(): 32 | stage_with_dcn = cfg['STAGE_WITH_DCN'] 33 | dcn = cfg['DCN'] 34 | self.preact = backbone( 35 | f"resnet{cfg['NUM_LAYERS']}", dcn=dcn, stage_with_dcn=stage_with_dcn) 36 | else: 37 | self.preact = backbone(f"resnet{cfg['NUM_LAYERS']}") 38 | 39 | # Imagenet pretrain model 40 | import torchvision.models as tm # noqa: F401,F403 41 | assert cfg['NUM_LAYERS'] in [18, 34, 50, 101, 152] 42 | x = eval(f"tm.resnet{cfg['NUM_LAYERS']}(pretrained=True)") 43 | 44 | model_state = self.preact.state_dict() 45 | state = {k: v for k, v in x.state_dict().items() 46 | if k in self.preact.state_dict() and v.size() == self.preact.state_dict()[k].size()} 47 | model_state.update(state) 48 | self.preact.load_state_dict(model_state) 49 | self.norm_layer = norm_layer 50 | 51 | stage1_cfg = cfg['STAGE1'] 52 | stage2_cfg = cfg['STAGE2'] 53 | stage3_cfg = cfg['STAGE3'] 54 | 55 | self.duc1 = self._make_duc_stage(stage1_cfg, 2048, 1024) 56 | self.duc2 = self._make_duc_stage(stage2_cfg, 1024, 512) 57 | self.duc3 = self._make_duc_stage(stage3_cfg, 512, self.conv_dim) 58 | 59 | self.conv_out = nn.Conv2d( 60 | self.conv_dim, self._preset_cfg['NUM_JOINTS'], kernel_size=3, stride=1, padding=1) 61 | 62 | def forward(self, x): 63 | out = self.preact(x) 64 | out = self.duc1(out) 65 | out = self.duc2(out) 66 | out = self.duc3(out) 67 | 68 | out = self.conv_out(out) 69 | return out 70 | 71 | def _make_duc_stage(self, layer_config, inplanes, outplanes): 72 | layers = [] 73 | 74 | shuffle = nn.PixelShuffle(2) 75 | inplanes //= 4 76 | layers.append(shuffle) 77 | for i in range(layer_config.NUM_CONV - 1): 78 | conv = nn.Conv2d(inplanes, inplanes, kernel_size=3, 79 | padding=1, bias=False) 80 | norm_layer = self.norm_layer(inplanes, momentum=0.1) 81 | relu = nn.ReLU(inplace=True) 82 | layers += [conv, norm_layer, relu] 83 | conv = nn.Conv2d(inplanes, outplanes, kernel_size=3, 84 | padding=1, bias=False) 85 | norm_layer = self.norm_layer(outplanes, momentum=0.1) 86 | relu = nn.ReLU(inplace=True) 87 | layers += [conv, norm_layer, relu] 88 | return nn.Sequential(*layers) 89 | 90 | def _initialize(self): 91 | for m in self.conv_out.modules(): 92 | if isinstance(m, nn.Conv2d): 93 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 94 | # logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 95 | # logger.info('=> init {}.bias as 0'.format(name)) 96 | nn.init.normal_(m.weight, std=0.001) 97 | nn.init.constant_(m.bias, 0) 98 | -------------------------------------------------------------------------------- /alphapose/models/layers/DUC.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch.nn as nn 7 | 8 | 9 | class DUC(nn.Module): 10 | ''' 11 | Initialize: inplanes, planes, upscale_factor 12 | OUTPUT: (planes // upscale_factor^2) * ht * wd 13 | ''' 14 | 15 | def __init__(self, inplanes, planes, 16 | upscale_factor=2, norm_layer=nn.BatchNorm2d): 17 | super(DUC, self).__init__() 18 | self.conv = nn.Conv2d( 19 | inplanes, planes, kernel_size=3, padding=1, bias=False) 20 | self.bn = norm_layer(planes, momentum=0.1) 21 | self.relu = nn.ReLU(inplace=True) 22 | self.pixel_shuffle = nn.PixelShuffle(upscale_factor) 23 | 24 | def forward(self, x): 25 | x = self.conv(x) 26 | x = self.bn(x) 27 | x = self.relu(x) 28 | x = self.pixel_shuffle(x) 29 | return x 30 | -------------------------------------------------------------------------------- /alphapose/models/layers/PixelUnshuffle.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch.nn as nn 7 | 8 | 9 | class PixelUnshuffle(nn.Module): 10 | ''' 11 | Initialize: inplanes, planes, upscale_factor 12 | OUTPUT: (planes // upscale_factor^2) * ht * wd 13 | ''' 14 | 15 | def __init__(self, downscale_factor=2): 16 | super(PixelUnshuffle, self).__init__() 17 | self._r = downscale_factor 18 | 19 | def forward(self, x): 20 | b, c, h, w = x.shape 21 | out_c = c * (self._r * self._r) 22 | out_h = h // self._r 23 | out_w = w // self._r 24 | 25 | x_view = x.contiguous().view(b, c, out_h, self._r, out_w, self._r) 26 | x_prime = x_view.permute(0, 1, 3, 5, 2, 4).contiguous().view(b, out_c, out_h, out_w) 27 | 28 | return x_prime 29 | -------------------------------------------------------------------------------- /alphapose/models/layers/SE_module.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | from torch import nn 7 | 8 | 9 | class SELayer(nn.Module): 10 | def __init__(self, channel, reduction=1): 11 | super(SELayer, self).__init__() 12 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 13 | self.fc = nn.Sequential( 14 | nn.Linear(channel, channel // reduction), 15 | nn.ReLU(inplace=True), 16 | nn.Linear(channel // reduction, channel), 17 | nn.Sigmoid() 18 | ) 19 | 20 | def forward(self, x): 21 | b, c, _, _ = x.size() 22 | y = self.avg_pool(x).view(b, c) 23 | y = self.fc(y).view(b, c, 1, 1) 24 | return x * y 25 | -------------------------------------------------------------------------------- /alphapose/models/layers/dcn/DCN.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch.nn as nn 7 | 8 | from . import DeformConv, ModulatedDeformConv 9 | 10 | 11 | class DCN(nn.Module): 12 | ''' 13 | Initialize: inplanes, planes, upscale_factor 14 | OUTPUT: (planes // upscale_factor^2) * ht * wd 15 | ''' 16 | 17 | def __init__(self, inplanes, planes, dcn, 18 | kernel_size, stride=1, 19 | padding=0, bias=False): 20 | super(DCN, self).__init__() 21 | fallback_on_stride = dcn.get('FALLBACK_ON_STRIDE', False) 22 | self.with_modulated_dcn = dcn.get('MODULATED', False) 23 | if fallback_on_stride: 24 | self.conv = nn.Conv2d(inplanes, planes, kernel_size=kernel_size, stride=stride, 25 | padding=padding, bias=bias) 26 | else: 27 | self.deformable_groups = dcn.get('DEFORM_GROUP', 1) 28 | if not self.with_modulated_dcn: 29 | conv_op = DeformConv 30 | offset_channels = 18 31 | else: 32 | conv_op = ModulatedDeformConv 33 | offset_channels = 27 34 | 35 | self.conv_offset = nn.Conv2d( 36 | inplanes, 37 | self.deformable_groups * offset_channels, 38 | kernel_size=kernel_size, 39 | stride=stride, 40 | padding=padding) 41 | self.conv = conv_op( 42 | inplanes, 43 | planes, 44 | kernel_size=kernel_size, 45 | stride=stride, 46 | padding=padding, 47 | deformable_groups=self.deformable_groups, 48 | bias=bias) 49 | 50 | def forward(self, x): 51 | if self.with_modulated_dcn: 52 | offset_mask = self.conv_offset(x) 53 | offset = offset_mask[:, :18 * self.deformable_groups, :, :] 54 | mask = offset_mask[:, -9 * self.deformable_groups:, :, :] 55 | mask = mask.sigmoid() 56 | out = self.conv(x, offset, mask) 57 | else: 58 | offset = self.conv_offset(x) 59 | out = self.conv(x, offset) 60 | 61 | return out 62 | -------------------------------------------------------------------------------- /alphapose/models/layers/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, deform_conv, 3 | modulated_deform_conv) 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 5 | ModulatedDeformRoIPoolingPack, deform_roi_pooling) 6 | from .DCN import DCN 7 | 8 | __all__ = [ 9 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 10 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 11 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 12 | 'deform_roi_pooling', 'DCN' 13 | ] 14 | -------------------------------------------------------------------------------- /alphapose/models/layers/dcn/src/deform_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | void DeformablePSROIPoolForward( 15 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 16 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 17 | const int height, const int width, const int num_bbox, 18 | const int channels_trans, const int no_trans, const float spatial_scale, 19 | const int output_dim, const int group_size, const int pooled_size, 20 | const int part_size, const int sample_per_part, const float trans_std); 21 | 22 | void DeformablePSROIPoolBackwardAcc( 23 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 24 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 25 | at::Tensor trans_grad, const int batch, const int channels, 26 | const int height, const int width, const int num_bbox, 27 | const int channels_trans, const int no_trans, const float spatial_scale, 28 | const int output_dim, const int group_size, const int pooled_size, 29 | const int part_size, const int sample_per_part, const float trans_std); 30 | 31 | void deform_psroi_pooling_cuda_forward( 32 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 33 | at::Tensor top_count, const int no_trans, const float spatial_scale, 34 | const int output_dim, const int group_size, const int pooled_size, 35 | const int part_size, const int sample_per_part, const float trans_std) { 36 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 37 | at::DeviceGuard guard(input.device()); 38 | 39 | const int batch = input.size(0); 40 | const int channels = input.size(1); 41 | const int height = input.size(2); 42 | const int width = input.size(3); 43 | const int channels_trans = no_trans ? 2 : trans.size(1); 44 | 45 | const int num_bbox = bbox.size(0); 46 | if (num_bbox != out.size(0)) 47 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 48 | out.size(0), num_bbox); 49 | 50 | DeformablePSROIPoolForward( 51 | input, bbox, trans, out, top_count, batch, channels, height, width, 52 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 53 | pooled_size, part_size, sample_per_part, trans_std); 54 | } 55 | 56 | void deform_psroi_pooling_cuda_backward( 57 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 58 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 59 | const int no_trans, const float spatial_scale, const int output_dim, 60 | const int group_size, const int pooled_size, const int part_size, 61 | const int sample_per_part, const float trans_std) { 62 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 63 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 64 | at::DeviceGuard guard(input.device()); 65 | 66 | const int batch = input.size(0); 67 | const int channels = input.size(1); 68 | const int height = input.size(2); 69 | const int width = input.size(3); 70 | const int channels_trans = no_trans ? 2 : trans.size(1); 71 | 72 | const int num_bbox = bbox.size(0); 73 | if (num_bbox != out_grad.size(0)) 74 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 75 | out_grad.size(0), num_bbox); 76 | 77 | DeformablePSROIPoolBackwardAcc( 78 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 79 | channels, height, width, num_bbox, channels_trans, no_trans, 80 | spatial_scale, output_dim, group_size, pooled_size, part_size, 81 | sample_per_part, trans_std); 82 | } 83 | 84 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 85 | m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward, 86 | "deform psroi pooling forward(CUDA)"); 87 | m.def("deform_psroi_pooling_cuda_backward", 88 | &deform_psroi_pooling_cuda_backward, 89 | "deform psroi pooling backward(CUDA)"); 90 | } 91 | -------------------------------------------------------------------------------- /alphapose/models/simplepose.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch.nn as nn 7 | 8 | from .builder import SPPE 9 | from .layers.Resnet import ResNet 10 | 11 | 12 | @SPPE.register_module 13 | class SimplePose(nn.Module): 14 | def __init__(self, norm_layer=nn.BatchNorm2d, **cfg): 15 | super(SimplePose, self).__init__() 16 | self._preset_cfg = cfg['PRESET'] 17 | self.deconv_dim = cfg['NUM_DECONV_FILTERS'] 18 | self._norm_layer = norm_layer 19 | 20 | self.preact = ResNet(f"resnet{cfg['NUM_LAYERS']}") 21 | 22 | # Imagenet pretrain model 23 | import torchvision.models as tm # noqa: F401,F403 24 | assert cfg['NUM_LAYERS'] in [18, 34, 50, 101, 152] 25 | x = eval(f"tm.resnet{cfg['NUM_LAYERS']}(pretrained=True)") 26 | 27 | model_state = self.preact.state_dict() 28 | state = {k: v for k, v in x.state_dict().items() 29 | if k in self.preact.state_dict() and v.size() == self.preact.state_dict()[k].size()} 30 | model_state.update(state) 31 | self.preact.load_state_dict(model_state) 32 | 33 | self.deconv_layers = self._make_deconv_layer() 34 | self.final_layer = nn.Conv2d( 35 | self.deconv_dim[2], self._preset_cfg['NUM_JOINTS'], kernel_size=1, stride=1, padding=0) 36 | 37 | def _make_deconv_layer(self): 38 | deconv_layers = [] 39 | deconv1 = nn.ConvTranspose2d( 40 | 2048, self.deconv_dim[0], kernel_size=4, stride=2, padding=int(4 / 2) - 1, bias=False) 41 | bn1 = self._norm_layer(self.deconv_dim[0]) 42 | deconv2 = nn.ConvTranspose2d( 43 | self.deconv_dim[0], self.deconv_dim[1], kernel_size=4, stride=2, padding=int(4 / 2) - 1, bias=False) 44 | bn2 = self._norm_layer(self.deconv_dim[1]) 45 | deconv3 = nn.ConvTranspose2d( 46 | self.deconv_dim[1], self.deconv_dim[2], kernel_size=4, stride=2, padding=int(4 / 2) - 1, bias=False) 47 | bn3 = self._norm_layer(self.deconv_dim[2]) 48 | 49 | deconv_layers.append(deconv1) 50 | deconv_layers.append(bn1) 51 | deconv_layers.append(nn.ReLU(inplace=True)) 52 | deconv_layers.append(deconv2) 53 | deconv_layers.append(bn2) 54 | deconv_layers.append(nn.ReLU(inplace=True)) 55 | deconv_layers.append(deconv3) 56 | deconv_layers.append(bn3) 57 | deconv_layers.append(nn.ReLU(inplace=True)) 58 | 59 | return nn.Sequential(*deconv_layers) 60 | 61 | def _initialize(self): 62 | for name, m in self.deconv_layers.named_modules(): 63 | if isinstance(m, nn.ConvTranspose2d): 64 | # logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 65 | # logger.info('=> init {}.bias as 0'.format(name)) 66 | nn.init.normal_(m.weight, std=0.001) 67 | # if self.deconv_with_bias: 68 | # nn.init.constant_(m.bias, 0) 69 | elif isinstance(m, nn.BatchNorm2d): 70 | # logger.info('=> init {}.weight as 1'.format(name)) 71 | # logger.info('=> init {}.bias as 0'.format(name)) 72 | nn.init.constant_(m.weight, 1) 73 | nn.init.constant_(m.bias, 0) 74 | for m in self.final_layer.modules(): 75 | if isinstance(m, nn.Conv2d): 76 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 77 | # logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 78 | # logger.info('=> init {}.bias as 0'.format(name)) 79 | nn.init.normal_(m.weight, std=0.001) 80 | nn.init.constant_(m.bias, 0) 81 | 82 | def forward(self, x): 83 | out = self.preact(x) 84 | out = self.deconv_layers(out) 85 | out = self.final_layer(out) 86 | return out 87 | -------------------------------------------------------------------------------- /alphapose/opt.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | import argparse 6 | import logging 7 | import os 8 | from types import MethodType 9 | 10 | import torch 11 | 12 | from .utils.config import update_config 13 | 14 | parser = argparse.ArgumentParser(description='AlphaPose Training') 15 | 16 | "----------------------------- Experiment options -----------------------------" 17 | parser.add_argument('--cfg', 18 | help='experiment configure file name', 19 | required=True, 20 | type=str) 21 | parser.add_argument('--exp-id', default='default', type=str, 22 | help='Experiment ID') 23 | 24 | "----------------------------- General options -----------------------------" 25 | parser.add_argument('--nThreads', default=60, type=int, 26 | help='Number of data loading threads') 27 | parser.add_argument('--snapshot', default=2, type=int, 28 | help='How often to take a snapshot of the model (0 = never)') 29 | 30 | parser.add_argument('--rank', default=-1, type=int, 31 | help='node rank for distributed training') 32 | parser.add_argument('--dist-url', default='tcp://192.168.1.214:23345', type=str, 33 | help='url used to set up distributed training') 34 | parser.add_argument('--dist-backend', default='nccl', type=str, 35 | help='distributed backend') 36 | parser.add_argument('--launcher', choices=['none', 'pytorch', 'slurm', 'mpi'], default='none', 37 | help='job launcher') 38 | 39 | "----------------------------- Training options -----------------------------" 40 | parser.add_argument('--sync', default=False, dest='sync', 41 | help='Use Sync Batchnorm', action='store_true') 42 | parser.add_argument('--detector', dest='detector', 43 | help='detector name', default="yolo") 44 | 45 | "----------------------------- Log options -----------------------------" 46 | parser.add_argument('--board', default=True, dest='board', 47 | help='Logging with tensorboard', action='store_true') 48 | parser.add_argument('--debug', default=False, dest='debug', 49 | help='Visualization debug', action='store_true') 50 | parser.add_argument('--map', default=True, dest='map', 51 | help='Evaluate mAP per epoch', action='store_true') 52 | 53 | 54 | opt = parser.parse_args() 55 | cfg_file_name = os.path.basename(opt.cfg) 56 | cfg = update_config(opt.cfg) 57 | 58 | cfg['FILE_NAME'] = cfg_file_name 59 | cfg.TRAIN.DPG_STEP = [i - cfg.TRAIN.DPG_MILESTONE for i in cfg.TRAIN.DPG_STEP] 60 | opt.world_size = cfg.TRAIN.WORLD_SIZE 61 | opt.work_dir = './exp/{}-{}/'.format(opt.exp_id, cfg_file_name) 62 | opt.gpus = [i for i in range(torch.cuda.device_count())] 63 | opt.device = torch.device("cuda:" + str(opt.gpus[0]) if opt.gpus[0] >= 0 else "cpu") 64 | 65 | if not os.path.exists("./exp/{}-{}".format(opt.exp_id, cfg_file_name)): 66 | os.makedirs("./exp/{}-{}".format(opt.exp_id, cfg_file_name)) 67 | 68 | filehandler = logging.FileHandler( 69 | './exp/{}-{}/training.log'.format(opt.exp_id, cfg_file_name)) 70 | streamhandler = logging.StreamHandler() 71 | 72 | logger = logging.getLogger('') 73 | logger.setLevel(logging.INFO) 74 | logger.addHandler(filehandler) 75 | logger.addHandler(streamhandler) 76 | 77 | 78 | def epochInfo(self, set, idx, loss, acc): 79 | self.info('{set}-{idx:d} epoch | loss:{loss:.8f} | acc:{acc:.4f}'.format( 80 | set=set, 81 | idx=idx, 82 | loss=loss, 83 | acc=acc 84 | )) 85 | 86 | 87 | logger.epochInfo = MethodType(epochInfo, logger) 88 | -------------------------------------------------------------------------------- /alphapose/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .registry import Registry, build_from_cfg, retrieve_from_cfg 2 | 3 | __all__ = [ 4 | 'Registry', 'build_from_cfg', 'retrieve_from_cfg' 5 | ] 6 | -------------------------------------------------------------------------------- /alphapose/utils/config.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from easydict import EasyDict as edict 3 | 4 | 5 | def update_config(config_file): 6 | with open(config_file) as f: 7 | config = edict(yaml.load(f, Loader=yaml.FullLoader)) 8 | return config 9 | -------------------------------------------------------------------------------- /alphapose/utils/env.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import os 7 | import torch 8 | import torch.distributed as dist 9 | 10 | 11 | def init_dist(opt): 12 | """Initialize distributed computing environment.""" 13 | opt.ngpus_per_node = torch.cuda.device_count() 14 | 15 | torch.cuda.set_device(opt.gpu) 16 | 17 | if opt.launcher == 'pytorch': 18 | _init_dist_pytorch(opt) 19 | elif opt.launcher == 'mpi': 20 | _init_dist_mpi(opt) 21 | elif opt.launcher == 'slurm': 22 | _init_dist_slurm(opt) 23 | else: 24 | raise ValueError('Invalid launcher type: {}'.format(opt.launcher)) 25 | 26 | 27 | def _init_dist_pytorch(opt, **kwargs): 28 | """Set up environment.""" 29 | # TODO: use local_rank instead of rank % num_gpus 30 | opt.rank = opt.rank * opt.ngpus_per_node + opt.gpu 31 | opt.world_size = opt.world_size 32 | dist.init_process_group(backend=opt.dist_backend, init_method=opt.dist_url, 33 | world_size=opt.world_size, rank=opt.rank) 34 | print(f"{opt.dist_url}, ws:{opt.world_size}, rank:{opt.rank}") 35 | 36 | if opt.rank % opt.ngpus_per_node == 0: 37 | opt.log = True 38 | else: 39 | opt.log = False 40 | 41 | 42 | def _init_dist_slurm(opt, port=23348, **kwargs): 43 | """Set up slurm environment.""" 44 | proc_id = int(os.environ['SLURM_PROCID']) 45 | ntasks = int(os.environ['SLURM_NTASKS']) 46 | node_list = os.environ['SLURM_NODELIST'] 47 | num_gpus = torch.cuda.device_count() 48 | torch.cuda.set_device(proc_id % num_gpus) 49 | if '[' in node_list: 50 | beg = node_list.find('[') 51 | pos1 = node_list.find('-', beg) 52 | if pos1 < 0: 53 | pos1 = 1000 54 | pos2 = node_list.find(',', beg) 55 | if pos2 < 0: 56 | pos2 = 1000 57 | node_list = node_list[:min(pos1, pos2)].replace('[', '') 58 | addr = node_list[8:].replace('-', '.') 59 | os.environ['MASTER_PORT'] = str(port) 60 | os.environ['MASTER_ADDR'] = addr 61 | os.environ['WORLD_SIZE'] = str(ntasks) 62 | os.environ['RANK'] = str(proc_id) 63 | 64 | opt.ngpus_per_node = num_gpus 65 | opt.rank = int(proc_id) 66 | opt.rank = proc_id * num_gpus + opt.gpu 67 | opt.world_size = int(ntasks) * num_gpus 68 | 69 | print(f"tcp://{node_list}:{port}, ws:{opt.world_size}, rank:{opt.rank}, proc_id:{proc_id}") 70 | dist.init_process_group(backend=opt.dist_backend, 71 | init_method=f'tcp://{node_list}:{port}', 72 | world_size=opt.world_size, 73 | rank=opt.rank) 74 | if opt.rank == 0: 75 | opt.log = True 76 | else: 77 | opt.log = False 78 | 79 | 80 | def _init_dist_mpi(backend, **kwargs): 81 | raise NotImplementedError 82 | -------------------------------------------------------------------------------- /alphapose/utils/logger.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Jiefeng Li (jeff.lee.sjtu@gmail.com) 4 | # ----------------------------------------------------- 5 | 6 | import torch 7 | import torch.nn.functional as F 8 | 9 | 10 | def board_writing(writer, loss, acc, iterations, dataset='Train'): 11 | writer.add_scalar( 12 | '{}/Loss'.format(dataset), loss, iterations) 13 | writer.add_scalar( 14 | '{}/acc'.format(dataset), acc, iterations) 15 | 16 | 17 | def debug_writing(writer, outputs, labels, inputs, iterations): 18 | tmp_tar = torch.unsqueeze(labels.cpu().data[0], dim=1) 19 | # tmp_out = torch.unsqueeze(outputs.cpu().data[0], dim=1) 20 | 21 | tmp_inp = inputs.cpu().data[0] 22 | tmp_inp[0] += 0.406 23 | tmp_inp[1] += 0.457 24 | tmp_inp[2] += 0.480 25 | 26 | tmp_inp[0] += torch.sum(F.interpolate(tmp_tar, scale_factor=4, mode='bilinear'), dim=0)[0] 27 | tmp_inp.clamp_(0, 1) 28 | 29 | writer.add_image('Data/input', tmp_inp, iterations) 30 | -------------------------------------------------------------------------------- /alphapose/utils/presets/__init__.py: -------------------------------------------------------------------------------- 1 | from .simple_transform import SimpleTransform 2 | 3 | __all__ = ['SimpleTransform'] 4 | -------------------------------------------------------------------------------- /alphapose/utils/registry.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | 4 | class Registry(object): 5 | 6 | def __init__(self, name): 7 | self._name = name 8 | self._module_dict = dict() 9 | 10 | def __repr__(self): 11 | format_str = self.__class__.__name__ + '(name={}, items={})'.format( 12 | self._name, list(self._module_dict.keys())) 13 | return format_str 14 | 15 | @property 16 | def name(self): 17 | return self._name 18 | 19 | @property 20 | def module_dict(self): 21 | return self._module_dict 22 | 23 | def get(self, key): 24 | return self._module_dict.get(key, None) 25 | 26 | def _register_module(self, module_class): 27 | """Register a module. 28 | 29 | Args: 30 | module (:obj:`nn.Module`): Module to be registered. 31 | """ 32 | if not inspect.isclass(module_class): 33 | raise TypeError('module must be a class, but got {}'.format( 34 | type(module_class))) 35 | module_name = module_class.__name__ 36 | if module_name in self._module_dict: 37 | raise KeyError('{} is already registered in {}'.format( 38 | module_name, self.name)) 39 | self._module_dict[module_name] = module_class 40 | 41 | def register_module(self, cls): 42 | self._register_module(cls) 43 | return cls 44 | 45 | 46 | def build_from_cfg(cfg, registry, default_args=None): 47 | """Build a module from config dict. 48 | 49 | Args: 50 | cfg (dict): Config dict. It should at least contain the key "type". 51 | registry (:obj:`Registry`): The registry to search the type from. 52 | default_args (dict, optional): Default initialization arguments. 53 | 54 | Returns: 55 | obj: The constructed object. 56 | """ 57 | assert isinstance(cfg, dict) and 'TYPE' in cfg 58 | assert isinstance(default_args, dict) or default_args is None 59 | args = cfg.copy() 60 | obj_type = args.pop('TYPE') 61 | 62 | if isinstance(obj_type, str): 63 | obj_cls = registry.get(obj_type) 64 | if obj_cls is None: 65 | raise KeyError('{} is not in the {} registry'.format( 66 | obj_type, registry.name)) 67 | elif inspect.isclass(obj_type): 68 | obj_cls = obj_type 69 | else: 70 | raise TypeError('type must be a str or valid type, but got {}'.format( 71 | type(obj_type))) 72 | if default_args is not None: 73 | for name, value in default_args.items(): 74 | args.setdefault(name, value) 75 | return obj_cls(**args) 76 | 77 | 78 | def retrieve_from_cfg(cfg, registry): 79 | """Retrieve a module class from config dict. 80 | 81 | Args: 82 | cfg (dict): Config dict. It should at least contain the key "type". 83 | registry (:obj:`Registry`): The registry to search the type from. 84 | 85 | Returns: 86 | class: The class. 87 | """ 88 | assert isinstance(cfg, dict) and 'TYPE' in cfg 89 | args = cfg.copy() 90 | obj_type = args.pop('TYPE') 91 | 92 | if isinstance(obj_type, str): 93 | obj_cls = registry.get(obj_type) 94 | if obj_cls is None: 95 | raise KeyError('{} is not in the {} registry'.format( 96 | obj_type, registry.name)) 97 | elif inspect.isclass(obj_type): 98 | obj_cls = obj_type 99 | else: 100 | raise TypeError('type must be a str or valid type, but got {}'.format( 101 | type(obj_type))) 102 | 103 | return obj_cls 104 | -------------------------------------------------------------------------------- /alphapose/utils/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_align import roi_align, RoIAlign 2 | 3 | __all__ = ['roi_align', 'RoIAlign'] 4 | -------------------------------------------------------------------------------- /alphapose/utils/roi_align/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | from torch.nn.modules.utils import _pair 5 | 6 | from . import roi_align_cuda 7 | 8 | 9 | class RoIAlignFunction(Function): 10 | 11 | @staticmethod 12 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 13 | out_h, out_w = _pair(out_size) 14 | assert isinstance(out_h, int) and isinstance(out_w, int) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sample_num = sample_num 17 | ctx.save_for_backward(rois) 18 | ctx.feature_size = features.size() 19 | 20 | batch_size, num_channels, data_height, data_width = features.size() 21 | num_rois = rois.size(0) 22 | 23 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 24 | if features.is_cuda: 25 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 26 | sample_num, output) 27 | else: 28 | raise NotImplementedError 29 | 30 | return output 31 | 32 | @staticmethod 33 | @once_differentiable 34 | def backward(ctx, grad_output): 35 | feature_size = ctx.feature_size 36 | spatial_scale = ctx.spatial_scale 37 | sample_num = ctx.sample_num 38 | rois = ctx.saved_tensors[0] 39 | assert (feature_size is not None and grad_output.is_cuda) 40 | 41 | batch_size, num_channels, data_height, data_width = feature_size 42 | out_w = grad_output.size(3) 43 | out_h = grad_output.size(2) 44 | 45 | grad_input = grad_rois = None 46 | if ctx.needs_input_grad[0]: 47 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 48 | data_width) 49 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 50 | out_w, spatial_scale, sample_num, 51 | grad_input) 52 | 53 | return grad_input, grad_rois, None, None, None 54 | 55 | 56 | roi_align = RoIAlignFunction.apply 57 | 58 | 59 | class RoIAlign(nn.Module): 60 | 61 | def __init__(self, 62 | out_size, 63 | spatial_scale=1, 64 | sample_num=0, 65 | use_torchvision=False): 66 | super(RoIAlign, self).__init__() 67 | 68 | self.out_size = out_size 69 | self.spatial_scale = float(spatial_scale) 70 | self.sample_num = int(sample_num) 71 | self.use_torchvision = use_torchvision 72 | 73 | def forward(self, features, rois): 74 | if self.use_torchvision: 75 | from torchvision.ops import roi_align as tv_roi_align 76 | return tv_roi_align(features, rois, _pair(self.out_size), 77 | self.spatial_scale, self.sample_num) 78 | else: 79 | return roi_align(features, rois, self.out_size, self.spatial_scale, 80 | self.sample_num) 81 | 82 | def __repr__(self): 83 | format_str = self.__class__.__name__ 84 | format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format( 85 | self.out_size, self.spatial_scale, self.sample_num) 86 | format_str += ', use_torchvision={})'.format(self.use_torchvision) 87 | return format_str 88 | -------------------------------------------------------------------------------- /alphapose/utils/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int sample_num, 8 | const int channels, const int height, 9 | const int width, const int num_rois, 10 | const int pooled_height, const int pooled_width, 11 | at::Tensor output); 12 | 13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 14 | const float spatial_scale, const int sample_num, 15 | const int channels, const int height, 16 | const int width, const int num_rois, 17 | const int pooled_height, const int pooled_width, 18 | at::Tensor bottom_grad); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 28 | int pooled_height, int pooled_width, 29 | float spatial_scale, int sample_num, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(rois); 33 | CHECK_INPUT(output); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int num_channels = features.size(1); 45 | int data_height = features.size(2); 46 | int data_width = features.size(3); 47 | 48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 49 | num_channels, data_height, data_width, num_rois, 50 | pooled_height, pooled_width, output); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | int pooled_height, int pooled_width, 57 | float spatial_scale, int sample_num, 58 | at::Tensor bottom_grad) { 59 | CHECK_INPUT(top_grad); 60 | CHECK_INPUT(rois); 61 | CHECK_INPUT(bottom_grad); 62 | 63 | // Number of ROIs 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | if (size_rois != 5) { 67 | printf("wrong roi size\n"); 68 | return 0; 69 | } 70 | 71 | int num_channels = bottom_grad.size(1); 72 | int data_height = bottom_grad.size(2); 73 | int data_width = bottom_grad.size(3); 74 | 75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 76 | num_channels, data_height, data_width, num_rois, 77 | pooled_height, pooled_width, bottom_grad); 78 | 79 | return 1; 80 | } 81 | 82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 85 | } -------------------------------------------------------------------------------- /alphapose/version.py: -------------------------------------------------------------------------------- 1 | # GENERATED VERSION FILE 2 | # TIME: Thu Jan 21 12:06:57 2021 3 | 4 | __version__ = '0.3.0+4d58914' 5 | short_version = '0.3.0' 6 | -------------------------------------------------------------------------------- /configs/coco/hardnet/256x192_hard68_lr1e-3_1x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'HarDNetPose' 36 | INIT_WEIGHTS: '' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | FINAL_CONV_KERNEL: 1 40 | NUM_LAYERS: 68 41 | DOWN_RATIO: 4 42 | TRT: False 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | DPG_MILESTONE: 140 63 | DPG_STEP: 64 | - 160 65 | - 190 66 | -------------------------------------------------------------------------------- /configs/coco/hardnet/256x192_hard85_lr1e-3_1x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'HarDNetPose' 36 | INIT_WEIGHTS: '' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | FINAL_CONV_KERNEL: 1 40 | NUM_LAYERS: 85 41 | DOWN_RATIO: 4 42 | TRT: False 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | DPG_MILESTONE: 140 63 | DPG_STEP: 64 | - 160 65 | - 190 66 | -------------------------------------------------------------------------------- /configs/coco/hrnet/256x192_w32_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'PoseHighResolutionNet' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_LAYERS: 50 39 | FINAL_CONV_KERNEL: 1 40 | PRETRAINED_LAYERS: ['*'] 41 | STAGE2: 42 | NUM_MODULES: 1 43 | NUM_BRANCHES: 2 44 | NUM_BLOCKS: [4, 4] 45 | NUM_CHANNELS: [32, 64] 46 | BLOCK: 'BASIC' 47 | FUSE_METHOD: 'SUM' 48 | STAGE3: 49 | NUM_MODULES: 4 50 | NUM_BRANCHES: 3 51 | NUM_BLOCKS: [4, 4, 4] 52 | NUM_CHANNELS: [32, 64, 128] 53 | BLOCK: 'BASIC' 54 | FUSE_METHOD: 'SUM' 55 | STAGE4: 56 | NUM_MODULES: 3 57 | NUM_BRANCHES: 4 58 | NUM_BLOCKS: [4, 4, 4, 4] 59 | NUM_CHANNELS: [32, 64, 128, 256] 60 | BLOCK: 'BASIC' 61 | FUSE_METHOD: 'SUM' 62 | LOSS: 63 | TYPE: 'MSELoss' 64 | DETECTOR: 65 | NAME: 'yolo' 66 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 67 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 68 | NMS_THRES: 0.6 69 | CONFIDENCE: 0.05 70 | TRAIN: 71 | WORLD_SIZE: 4 72 | BATCH_SIZE: 32 73 | BEGIN_EPOCH: 0 74 | END_EPOCH: 270 75 | OPTIMIZER: 'adam' 76 | LR: 0.001 77 | LR_FACTOR: 0.1 78 | LR_STEP: 79 | - 170 80 | - 200 81 | DPG_MILESTONE: 210 82 | DPG_STEP: 83 | - 230 84 | - 250 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res152_lr1e-3_1x-duc.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose_DUC' 36 | BACKBONE: 'se-resnet' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 152 44 | FINAL_CONV_KERNEL: 1 45 | STAGE1: 46 | NUM_CONV: 4 47 | STAGE2: 48 | NUM_CONV: 2 49 | STAGE3: 50 | NUM_CONV: 1 51 | LOSS: 52 | TYPE: 'MSELoss' 53 | DETECTOR: 54 | NAME: 'yolo' 55 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 56 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 57 | NMS_THRES: 0.6 58 | CONFIDENCE: 0.05 59 | TRAIN: 60 | WORLD_SIZE: 4 61 | BATCH_SIZE: 32 62 | BEGIN_EPOCH: 0 63 | END_EPOCH: 200 64 | OPTIMIZER: 'adam' 65 | LR: 0.001 66 | LR_FACTOR: 0.1 67 | LR_STEP: 68 | - 90 69 | - 120 70 | DPG_MILESTONE: 140 71 | DPG_STEP: 72 | - 160 73 | - 190 74 | -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_1x-concat.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'ConcatDataset' 4 | SET_LIST: 5 | - TYPE: 'Mscoco' 6 | MASK_ID: 0 7 | ROOT: './data/coco/' 8 | IMG_PREFIX: 'train2017' 9 | ANN: 'annotations/person_keypoints_train2017.json' 10 | AUG: 11 | FLIP: true 12 | ROT_FACTOR: 40 13 | SCALE_FACTOR: 0.3 14 | NUM_JOINTS_HALF_BODY: 8 15 | PROB_HALF_BODY: -1 16 | - TYPE: 'Mpii' 17 | MASK_ID: 17 18 | ROOT: './data/mpii/' 19 | IMG_PREFIX: 'images' 20 | ANN: 'annot_mpii.json' 21 | AUG: 22 | FLIP: true 23 | ROT_FACTOR: 40 24 | SCALE_FACTOR: 0.3 25 | NUM_JOINTS_HALF_BODY: 8 26 | PROB_HALF_BODY: -1 27 | VAL: 28 | TYPE: 'Mscoco' 29 | ROOT: './data/coco/' 30 | IMG_PREFIX: 'val2017' 31 | ANN: 'annotations/person_keypoints_val2017.json' 32 | TEST: 33 | TYPE: 'Mscoco_det' 34 | ROOT: './data/coco/' 35 | IMG_PREFIX: 'val2017' 36 | DET_FILE: './exp/json/test_det_yolo.json' 37 | ANN: 'annotations/person_keypoints_val2017.json' 38 | DATA_PRESET: 39 | TYPE: 'simple' 40 | SIGMA: 2 41 | NUM_JOINTS: 33 42 | IMAGE_SIZE: 43 | - 256 44 | - 192 45 | HEATMAP_SIZE: 46 | - 64 47 | - 48 48 | MODEL: 49 | TYPE: 'FastPose' 50 | PRETRAINED: '' 51 | TRY_LOAD: '' 52 | NUM_DECONV_FILTERS: 53 | - 256 54 | - 256 55 | - 256 56 | NUM_LAYERS: 50 57 | LOSS: 58 | TYPE: 'MSELoss' 59 | DETECTOR: 60 | NAME: 'yolo' 61 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 62 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 63 | NMS_THRES: 0.6 64 | CONFIDENCE: 0.05 65 | TRAIN: 66 | WORLD_SIZE: 4 67 | BATCH_SIZE: 32 68 | BEGIN_EPOCH: 0 69 | END_EPOCH: 200 70 | OPTIMIZER: 'adam' 71 | LR: 0.001 72 | LR_FACTOR: 0.1 73 | LR_STEP: 74 | - 90 75 | - 120 76 | DPG_MILESTONE: 140 77 | DPG_STEP: 78 | - 160 79 | - 190 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_1x-duc.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose_DUC' 36 | BACKBONE: 'shuffle' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | FINAL_CONV_KERNEL: 1 45 | STAGE1: 46 | NUM_CONV: 4 47 | STAGE2: 48 | NUM_CONV: 2 49 | STAGE3: 50 | NUM_CONV: 1 51 | LOSS: 52 | TYPE: 'MSELoss' 53 | DETECTOR: 54 | NAME: 'yolo' 55 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 56 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 57 | NMS_THRES: 0.6 58 | CONFIDENCE: 0.05 59 | TRAIN: 60 | WORLD_SIZE: 4 61 | BATCH_SIZE: 32 62 | BEGIN_EPOCH: 0 63 | END_EPOCH: 200 64 | OPTIMIZER: 'adam' 65 | LR: 0.001 66 | LR_FACTOR: 0.1 67 | LR_STEP: 68 | - 90 69 | - 120 70 | DPG_MILESTONE: 140 71 | DPG_STEP: 72 | - 160 73 | - 190 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_1x-simple.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'SimplePose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.1 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | DPG_MILESTONE: 140 63 | DPG_STEP: 64 | - 160 65 | - 190 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_1x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | DPG_MILESTONE: 140 63 | DPG_STEP: 64 | - 160 65 | - 190 66 | -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_2x-dcn.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | DCN: 44 | MODULATED: false 45 | DEFORM_GROUP: 1 46 | FALLBACK_ON_STRIDE: false 47 | STAGE_WITH_DCN: 48 | - false 49 | - true 50 | - true 51 | - true 52 | LOSS: 53 | TYPE: 'MSELoss' 54 | DETECTOR: 55 | NAME: 'yolo' 56 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 57 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 58 | NMS_THRES: 0.6 59 | CONFIDENCE: 0.05 60 | TRAIN: 61 | WORLD_SIZE: 4 62 | BATCH_SIZE: 32 63 | BEGIN_EPOCH: 0 64 | END_EPOCH: 270 65 | OPTIMIZER: 'adam' 66 | LR: 0.001 67 | LR_FACTOR: 0.1 68 | LR_STEP: 69 | - 170 70 | - 200 71 | DPG_MILESTONE: 210 72 | DPG_STEP: 73 | - 230 74 | - 250 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_2x-regression.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'L1JointRegression' 27 | SIGMA: 2 28 | NUM_JOINTS: 17 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | LOSS: 45 | TYPE: 'L1JointRegression' 46 | NORM_TYPE: 'sigmoid' 47 | OUTPUT_3D: False 48 | DETECTOR: 49 | NAME: 'yolo' 50 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 51 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 52 | NMS_THRES: 0.6 53 | CONFIDENCE: 0.05 54 | TRAIN: 55 | WORLD_SIZE: 4 56 | BATCH_SIZE: 32 57 | BEGIN_EPOCH: 0 58 | END_EPOCH: 270 59 | OPTIMIZER: 'adam' 60 | LR: 0.001 61 | LR_FACTOR: 0.1 62 | LR_STEP: 63 | - 170 64 | - 200 65 | DPG_MILESTONE: 210 66 | DPG_STEP: 67 | - 230 68 | - 250 -------------------------------------------------------------------------------- /configs/coco/resnet/256x192_res50_lr1e-3_2x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Mscoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Mscoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Mscoco_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 17 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 270 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 170 61 | - 200 62 | DPG_MILESTONE: 210 63 | DPG_STEP: 64 | - 230 65 | - 250 -------------------------------------------------------------------------------- /configs/dense_coco/resnet50/256x192_adam_lr1e-3-duc-dcn_1x_crop.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'densecoco' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017_dense.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 16 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'densecoco' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017_dense.json' 18 | TEST: 19 | TYPE: 'densecoco' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'test2017' 22 | ANN: 'annotations/person_keypoints_val2017_dense.json' 23 | DATA_PRESET: 24 | TYPE: 'simple' 25 | SIGMA: 2 26 | NUM_JOINTS: 17 27 | NUM_JOINTS_DENSE: 49 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose_DUC_Dense' 36 | BACKBONE: 'se-resnet' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | FINAL_CONV_KERNEL: 1 45 | STAGE1: 46 | NUM_CONV: 4 47 | STAGE2: 48 | NUM_CONV: 2 49 | STAGE3: 50 | NUM_CONV: 1 51 | DCN: 52 | MODULATED: false 53 | DEFORM_GROUP: 1 54 | FALLBACK_ON_STRIDE: false 55 | STAGE_WITH_DCN: 56 | - false 57 | - true 58 | - true 59 | - true 60 | LOSS: 61 | TYPE: 'MSELoss' 62 | DETECTOR: 63 | NAME: 'yolo' 64 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 65 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 66 | NMS_THRES: 0.6 67 | CONFIDENCE: 0.05 68 | TRAIN: 69 | WORLD_SIZE: 4 70 | BATCH_SIZE: 32 71 | BEGIN_EPOCH: 0 72 | END_EPOCH: 200 73 | OPTIMIZER: 'adam' 74 | LR: 0.001 75 | LR_FACTOR: 0.1 76 | LR_STEP: 77 | - 90 78 | - 120 79 | DPG_MILESTONE: 140 80 | DPG_STEP: 81 | - 160 82 | - 190 -------------------------------------------------------------------------------- /configs/halpe_136/hardnet/256x192_hard68_lr1e-3_1x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_136' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Halpe_136' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Halpe_136_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 136 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'HarDNetPose' 36 | INIT_WEIGHTS: '' 37 | PRETRAINED: '' 38 | TRY_LOAD: '' 39 | FINAL_CONV_KERNEL: 1 40 | NUM_LAYERS: 68 41 | DOWN_RATIO: 4 42 | TRT: False 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | DPG_MILESTONE: 140 63 | DPG_STEP: 64 | - 160 65 | - 190 66 | -------------------------------------------------------------------------------- /configs/halpe_136/resnet/256x192_res50_lr1e-3_1x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_136' 4 | ROOT: './data/coco/' 5 | IMG_PREFIX: 'train2017' 6 | ANN: 'annotations/person_keypoints_train2017.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Halpe_136' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'annotations/person_keypoints_val2017.json' 18 | TEST: 19 | TYPE: 'Halpe_136_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'annotations/person_keypoints_val2017.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 136 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 32 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 90 61 | - 120 62 | DPG_MILESTONE: 140 63 | DPG_STEP: 64 | - 160 65 | - 190 66 | -------------------------------------------------------------------------------- /configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_136' 4 | ROOT: '' 5 | IMG_PREFIX: 'train2017' 6 | ANN: '/home/group3/hico-coco.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 45 10 | SCALE_FACTOR: 0.35 11 | NUM_JOINTS_HALF_BODY: 8 12 | PROB_HALF_BODY: 0.3 13 | VAL: 14 | TYPE: 'Halpe_136' 15 | ROOT: './data/coco/' 16 | IMG_PREFIX: 'val2017' 17 | ANN: 'coco_val_full_finetuned.json' 18 | TEST: 19 | TYPE: 'Halpe_136_det' 20 | ROOT: './data/coco/' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: 'coco_val_full_finetuned.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | LOSS_TYPE: 'L1JointRegression' 27 | SIGMA: 2 28 | NUM_JOINTS: 136 29 | IMAGE_SIZE: 30 | - 256 31 | - 192 32 | HEATMAP_SIZE: 33 | - 64 34 | - 48 35 | MODEL: 36 | TYPE: 'FastPose' 37 | PRETRAINED: '/home/group3/newrepo/AlphaPose/exp/hico-coco-_regression-256x192_res50_lr1e-3_2x-regression-frei.yaml/model_3.pth' 38 | TRY_LOAD: '' 39 | NUM_DECONV_FILTERS: 40 | - 256 41 | - 256 42 | - 256 43 | NUM_LAYERS: 50 44 | CONV_DIM: 256 45 | LOSS: 46 | TYPE: 'L1JointRegression' 47 | NORM_TYPE: 'sigmoid' 48 | OUTPUT_3D: False 49 | DETECTOR: 50 | NAME: 'yolo' 51 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 52 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 53 | NMS_THRES: 0.6 54 | CONFIDENCE: 0.05 55 | TRAIN: 56 | WORLD_SIZE: 4 57 | BATCH_SIZE: 48 58 | BEGIN_EPOCH: 5 59 | END_EPOCH: 270 60 | OPTIMIZER: 'adam' 61 | LR: 0.001 62 | LR_FACTOR: 0.1 63 | LR_STEP: 64 | - 170 65 | - 200 66 | DPG_MILESTONE: 210 67 | DPG_STEP: 68 | - 230 69 | - 250 70 | -------------------------------------------------------------------------------- /configs/halpe_26/resnet/256x192_res50_lr1e-3_1x.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | TRAIN: 3 | TYPE: 'Halpe_26' 4 | ROOT: '' 5 | IMG_PREFIX: 'train2017' 6 | ANN: '/home/group3/hico_and_coco_26kpts.json' 7 | AUG: 8 | FLIP: true 9 | ROT_FACTOR: 40 10 | SCALE_FACTOR: 0.3 11 | NUM_JOINTS_HALF_BODY: 11 12 | PROB_HALF_BODY: -1 13 | VAL: 14 | TYPE: 'Halpe_26' 15 | ROOT: '' 16 | IMG_PREFIX: 'val2017' 17 | ANN: '/home/group3/coco_val_full_26.json' 18 | TEST: 19 | TYPE: 'Halpe_26' 20 | ROOT: '' 21 | IMG_PREFIX: 'val2017' 22 | DET_FILE: './exp/json/test_det_yolo.json' 23 | ANN: '/home/group3/coco_val_full_26.json' 24 | DATA_PRESET: 25 | TYPE: 'simple' 26 | SIGMA: 2 27 | NUM_JOINTS: 26 28 | IMAGE_SIZE: 29 | - 256 30 | - 192 31 | HEATMAP_SIZE: 32 | - 64 33 | - 48 34 | MODEL: 35 | TYPE: 'FastPose' 36 | PRETRAINED: '' 37 | TRY_LOAD: '/home/group3/AlphaPose/exp/pami_hico_and_coco-256x192_res50_lr1e-3_1x.yaml/model_6.pth' 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_LAYERS: 50 43 | LOSS: 44 | TYPE: 'MSELoss' 45 | DETECTOR: 46 | NAME: 'yolo' 47 | CONFIG: 'detector/yolo/cfg/yolov3-spp.cfg' 48 | WEIGHTS: 'detector/yolo/data/yolov3-spp.weights' 49 | NMS_THRES: 0.6 50 | CONFIDENCE: 0.05 51 | TRAIN: 52 | WORLD_SIZE: 4 53 | BATCH_SIZE: 48 54 | BEGIN_EPOCH: 0 55 | END_EPOCH: 200 56 | OPTIMIZER: 'adam' 57 | LR: 0.001 58 | LR_FACTOR: 0.1 59 | LR_STEP: 60 | - 50 61 | - 70 62 | DPG_MILESTONE: 90 63 | DPG_STEP: 64 | - 110 65 | - 130 66 | -------------------------------------------------------------------------------- /detector/apis.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------- 2 | # Copyright (c) Shanghai Jiao Tong University. All rights reserved. 3 | # Written by Chao Xu (xuchao.19962007@sjtu.edu.cn) 4 | # ----------------------------------------------------- 5 | 6 | """API of detector""" 7 | from abc import ABC, abstractmethod 8 | 9 | 10 | def get_detector(opt=None): 11 | if opt.detector == 'yolo': 12 | from detector.yolo_api import YOLODetector 13 | from detector.yolo_cfg import cfg 14 | return YOLODetector(cfg, opt) 15 | elif opt.detector == 'tracker': 16 | from detector.tracker_api import Tracker 17 | from detector.tracker_cfg import cfg 18 | return Tracker(cfg, opt) 19 | elif opt.detector.startswith('efficientdet_d'): 20 | from detector.effdet_api import EffDetDetector 21 | from detector.effdet_cfg import cfg 22 | return EffDetDetector(cfg, opt) 23 | else: 24 | raise NotImplementedError 25 | 26 | 27 | class BaseDetector(ABC): 28 | def __init__(self): 29 | pass 30 | 31 | @abstractmethod 32 | def image_preprocess(self, img_name): 33 | pass 34 | 35 | @abstractmethod 36 | def images_detection(self, imgs, orig_dim_list): 37 | pass 38 | 39 | @abstractmethod 40 | def detect_one_img(self, img_name): 41 | pass 42 | -------------------------------------------------------------------------------- /detector/effdet_cfg.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | cfg = edict() 4 | 5 | cfg.NMS_THRES = 0.6 # 0.6(0.713) 0.5(0.707) 6 | cfg.CONFIDENCE = 0.2 # 0.15 0.1 7 | cfg.NUM_CLASSES = 80 8 | cfg.MAX_DETECTIONS = 200 # 100 9 | -------------------------------------------------------------------------------- /detector/efficientdet/README.md: -------------------------------------------------------------------------------- 1 | # A PyTorch implementation of a EfficientDet Object Detector 2 | 3 | Forked and modified from https://github.com/rwightman/efficientdet-pytorch, many thanks! 4 | -------------------------------------------------------------------------------- /detector/efficientdet/effdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .efficientdet import EfficientDet 2 | from .bench import DetBenchEval, DetBenchTrain 3 | from .config.config import get_efficientdet_config 4 | from .helpers import load_checkpoint, load_pretrained -------------------------------------------------------------------------------- /detector/efficientdet/effdet/helpers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | from collections import OrderedDict 4 | try: 5 | from torch.hub import load_state_dict_from_url 6 | except ImportError: 7 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 8 | 9 | 10 | def load_checkpoint(model, checkpoint_path): 11 | if checkpoint_path and os.path.isfile(checkpoint_path): 12 | print("=> Loading checkpoint '{}'".format(checkpoint_path)) 13 | checkpoint = torch.load(checkpoint_path) 14 | if isinstance(checkpoint, dict) and 'state_dict' in checkpoint: 15 | new_state_dict = OrderedDict() 16 | for k, v in checkpoint['state_dict'].items(): 17 | if k.startswith('module'): 18 | name = k[7:] # remove `module.` 19 | else: 20 | name = k 21 | new_state_dict[name] = v 22 | model.load_state_dict(new_state_dict) 23 | else: 24 | model.load_state_dict(checkpoint) 25 | print("=> Loaded checkpoint '{}'".format(checkpoint_path)) 26 | else: 27 | print("=> Error: No checkpoint found at '{}'".format(checkpoint_path)) 28 | raise FileNotFoundError() 29 | 30 | 31 | def load_pretrained(model, url, filter_fn=None, strict=True): 32 | if not url: 33 | print("=> Warning: Pretrained model URL is empty, using random initialization.") 34 | return 35 | state_dict = load_state_dict_from_url(url, progress=False, map_location='cpu') 36 | if filter_fn is not None: 37 | state_dict = filter_fn(state_dict) 38 | model.load_state_dict(state_dict, strict=strict) 39 | -------------------------------------------------------------------------------- /detector/efficientdet/effdet/object_detection/README.md: -------------------------------------------------------------------------------- 1 | # Tensorflow Object Detection 2 | 3 | All of this code is adapted/ported/copied from https://github.com/google/automl/tree/552d0facd14f4fe9205a67fb13ecb5690a4d1c94/efficientdet/object_detection -------------------------------------------------------------------------------- /detector/efficientdet/effdet/object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | # Object detection data loaders and libraries are mostly based on RetinaNet: 16 | # https://github.com/tensorflow/tpu/tree/master/models/official/retinanet 17 | -------------------------------------------------------------------------------- /detector/efficientdet/effdet/object_detection/faster_rcnn_box_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Research. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Faster RCNN box coder. 16 | 17 | Faster RCNN box coder follows the coding schema described below: 18 | ty = (y - ya) / ha 19 | tx = (x - xa) / wa 20 | th = log(h / ha) 21 | tw = log(w / wa) 22 | where x, y, w, h denote the box's center coordinates, width and height 23 | respectively. Similarly, xa, ya, wa, ha denote the anchor's center 24 | coordinates, width and height. tx, ty, tw and th denote the anchor-encoded 25 | center, width and height respectively. 26 | 27 | See http://arxiv.org/abs/1506.01497 for details. 28 | """ 29 | 30 | import torch 31 | 32 | from . import box_coder 33 | from . import box_list 34 | 35 | EPS = 1e-8 36 | 37 | 38 | class FasterRcnnBoxCoder(box_coder.BoxCoder): 39 | """Faster RCNN box coder.""" 40 | 41 | def __init__(self, scale_factors=None): 42 | """Constructor for FasterRcnnBoxCoder. 43 | 44 | Args: 45 | scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. 46 | If set to None, does not perform scaling. For Faster RCNN, 47 | the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0]. 48 | """ 49 | if scale_factors: 50 | assert len(scale_factors) == 4 51 | for scalar in scale_factors: 52 | assert scalar > 0 53 | self._scale_factors = scale_factors 54 | 55 | @property 56 | def code_size(self): 57 | return 4 58 | 59 | def _encode(self, boxes, anchors): 60 | """Encode a box collection with respect to anchor collection. 61 | 62 | Args: 63 | boxes: BoxList holding N boxes to be encoded. 64 | anchors: BoxList of anchors. 65 | 66 | Returns: 67 | a tensor representing N anchor-encoded boxes of the format [ty, tx, th, tw]. 68 | """ 69 | # Convert anchors to the center coordinate representation. 70 | ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() 71 | ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() 72 | # Avoid NaN in division and log below. 73 | ha += EPS 74 | wa += EPS 75 | h += EPS 76 | w += EPS 77 | 78 | tx = (xcenter - xcenter_a) / wa 79 | ty = (ycenter - ycenter_a) / ha 80 | tw = torch.log(w / wa) 81 | th = torch.log(h / ha) 82 | # Scales location targets as used in paper for joint training. 83 | if self._scale_factors: 84 | ty *= self._scale_factors[0] 85 | tx *= self._scale_factors[1] 86 | th *= self._scale_factors[2] 87 | tw *= self._scale_factors[3] 88 | return torch.stack([ty, tx, th, tw]).T 89 | 90 | def _decode(self, rel_codes, anchors): 91 | """Decode relative codes to boxes. 92 | 93 | Args: 94 | rel_codes: a tensor representing N anchor-encoded boxes. 95 | anchors: BoxList of anchors. 96 | 97 | Returns: 98 | boxes: BoxList holding N bounding boxes. 99 | """ 100 | ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() 101 | 102 | ty, tx, th, tw = rel_codes.T.unbind() 103 | if self._scale_factors: 104 | ty /= self._scale_factors[0] 105 | tx /= self._scale_factors[1] 106 | th /= self._scale_factors[2] 107 | tw /= self._scale_factors[3] 108 | w = torch.exp(tw) * wa 109 | h = torch.exp(th) * ha 110 | ycenter = ty * ha + ycenter_a 111 | xcenter = tx * wa + xcenter_a 112 | ymin = ycenter - h / 2. 113 | xmin = xcenter - w / 2. 114 | ymax = ycenter + h / 2. 115 | xmax = xcenter + w / 2. 116 | return box_list.BoxList(torch.stack([ymin, xmin, ymax, xmax]).T) 117 | -------------------------------------------------------------------------------- /detector/efficientdet/weights/get_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/efficientdet/weights/get_models.sh -------------------------------------------------------------------------------- /detector/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /detector/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cpu, nms_cuda 5 | from .soft_nms_cpu import soft_nms_cpu 6 | 7 | 8 | def nms(dets, iou_thr, device_id=None): 9 | """Dispatch to either CPU or GPU NMS implementations. 10 | 11 | The input can be either a torch tensor or numpy array. GPU NMS will be used 12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 13 | will be used. The returned type will always be the same as inputs. 14 | 15 | Arguments: 16 | dets (torch.Tensor or np.ndarray): bboxes with scores. 17 | iou_thr (float): IoU threshold for NMS. 18 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 19 | is None, then cpu nms is used, otherwise gpu_nms will be used. 20 | 21 | Returns: 22 | tuple: kept bboxes and indice, which is always the same data type as 23 | the input. 24 | """ 25 | # convert dets (tensor or numpy array) to tensor 26 | if isinstance(dets, torch.Tensor): 27 | is_numpy = False 28 | dets_th = dets.to('cpu') 29 | elif isinstance(dets, np.ndarray): 30 | is_numpy = True 31 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 32 | dets_th = torch.from_numpy(dets).to(device) 33 | else: 34 | raise TypeError( 35 | 'dets must be either a Tensor or numpy array, but got {}'.format( 36 | type(dets))) 37 | 38 | # execute cpu or cuda nms 39 | if dets_th.shape[0] == 0: 40 | inds = dets_th.new_zeros(0, dtype=torch.long) 41 | else: 42 | if dets_th.is_cuda: 43 | inds = nms_cuda.nms(dets_th, iou_thr) 44 | else: 45 | inds = nms_cpu.nms(dets_th, iou_thr) 46 | 47 | if is_numpy: 48 | inds = inds.cpu().numpy() 49 | return dets[inds, :], inds 50 | 51 | 52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 53 | if isinstance(dets, torch.Tensor): 54 | is_tensor = True 55 | dets_np = dets.detach().cpu().numpy() 56 | elif isinstance(dets, np.ndarray): 57 | is_tensor = False 58 | dets_np = dets 59 | else: 60 | raise TypeError( 61 | 'dets must be either a Tensor or numpy array, but got {}'.format( 62 | type(dets))) 63 | 64 | method_codes = {'linear': 1, 'gaussian': 2} 65 | if method not in method_codes: 66 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 67 | new_dets, inds = soft_nms_cpu( 68 | dets_np, 69 | iou_thr, 70 | method=method_codes[method], 71 | sigma=sigma, 72 | min_score=min_score) 73 | 74 | if is_tensor: 75 | return dets.new_tensor(new_dets), dets.new_tensor( 76 | inds, dtype=torch.long) 77 | else: 78 | return new_dets.astype(np.float32), inds.astype(np.int64) 79 | -------------------------------------------------------------------------------- /detector/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 7 | 8 | if (dets.numel() == 0) { 9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 10 | } 11 | 12 | auto x1_t = dets.select(1, 0).contiguous(); 13 | auto y1_t = dets.select(1, 1).contiguous(); 14 | auto x2_t = dets.select(1, 2).contiguous(); 15 | auto y2_t = dets.select(1, 3).contiguous(); 16 | auto scores = dets.select(1, 4).contiguous(); 17 | 18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 19 | 20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 21 | 22 | auto ndets = dets.size(0); 23 | at::Tensor suppressed_t = 24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 25 | 26 | auto suppressed = suppressed_t.data(); 27 | auto order = order_t.data(); 28 | auto x1 = x1_t.data(); 29 | auto y1 = y1_t.data(); 30 | auto x2 = x2_t.data(); 31 | auto y2 = y2_t.data(); 32 | auto areas = areas_t.data(); 33 | 34 | for (int64_t _i = 0; _i < ndets; _i++) { 35 | auto i = order[_i]; 36 | if (suppressed[i] == 1) continue; 37 | auto ix1 = x1[i]; 38 | auto iy1 = y1[i]; 39 | auto ix2 = x2[i]; 40 | auto iy2 = y2[i]; 41 | auto iarea = areas[i]; 42 | 43 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 44 | auto j = order[_j]; 45 | if (suppressed[j] == 1) continue; 46 | auto xx1 = std::max(ix1, x1[j]); 47 | auto yy1 = std::max(iy1, y1[j]); 48 | auto xx2 = std::min(ix2, x2[j]); 49 | auto yy2 = std::min(iy2, y2[j]); 50 | 51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 53 | auto inter = w * h; 54 | auto ovr = inter / (iarea + areas[j] - inter); 55 | if (ovr >= threshold) suppressed[j] = 1; 56 | } 57 | } 58 | return at::nonzero(suppressed_t == 0).squeeze(1); 59 | } 60 | 61 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 62 | at::Tensor result; 63 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { 64 | result = nms_cpu_kernel(dets, threshold); 65 | }); 66 | return result; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("nms", &nms, "non-maximum suppression"); 71 | } -------------------------------------------------------------------------------- /detector/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } -------------------------------------------------------------------------------- /detector/nms/src/soft_nms_cpu.pyx: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------- 2 | # Soft-NMS: Improving Object Detection With One Line of Code 3 | # Copyright (c) University of Maryland, College Park 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Navaneeth Bodla and Bharat Singh 6 | # Modified by Kai Chen 7 | # ---------------------------------------------------------- 8 | 9 | # cython: language_level=3, boundscheck=False 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | 15 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 16 | return a if a >= b else b 17 | 18 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 19 | return a if a <= b else b 20 | 21 | 22 | def soft_nms_cpu( 23 | np.ndarray[float, ndim=2] boxes_in, 24 | float iou_thr, 25 | unsigned int method=1, 26 | float sigma=0.5, 27 | float min_score=0.001, 28 | ): 29 | boxes = boxes_in.copy() 30 | cdef int N = boxes.shape[0] 31 | cdef float iw, ih, box_area 32 | cdef float ua 33 | cdef int pos = 0 34 | cdef float maxscore = 0 35 | cdef int maxpos = 0 36 | cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov 37 | inds = np.arange(N) 38 | 39 | for i in range(N): 40 | maxscore = boxes[i, 4] 41 | maxpos = i 42 | 43 | tx1 = boxes[i, 0] 44 | ty1 = boxes[i, 1] 45 | tx2 = boxes[i, 2] 46 | ty2 = boxes[i, 3] 47 | ts = boxes[i, 4] 48 | ti = inds[i] 49 | 50 | pos = i + 1 51 | # get max box 52 | while pos < N: 53 | if maxscore < boxes[pos, 4]: 54 | maxscore = boxes[pos, 4] 55 | maxpos = pos 56 | pos = pos + 1 57 | 58 | # add max box as a detection 59 | boxes[i, 0] = boxes[maxpos, 0] 60 | boxes[i, 1] = boxes[maxpos, 1] 61 | boxes[i, 2] = boxes[maxpos, 2] 62 | boxes[i, 3] = boxes[maxpos, 3] 63 | boxes[i, 4] = boxes[maxpos, 4] 64 | inds[i] = inds[maxpos] 65 | 66 | # swap ith box with position of max box 67 | boxes[maxpos, 0] = tx1 68 | boxes[maxpos, 1] = ty1 69 | boxes[maxpos, 2] = tx2 70 | boxes[maxpos, 3] = ty2 71 | boxes[maxpos, 4] = ts 72 | inds[maxpos] = ti 73 | 74 | tx1 = boxes[i, 0] 75 | ty1 = boxes[i, 1] 76 | tx2 = boxes[i, 2] 77 | ty2 = boxes[i, 3] 78 | ts = boxes[i, 4] 79 | 80 | pos = i + 1 81 | # NMS iterations, note that N changes if detection boxes fall below 82 | # threshold 83 | while pos < N: 84 | x1 = boxes[pos, 0] 85 | y1 = boxes[pos, 1] 86 | x2 = boxes[pos, 2] 87 | y2 = boxes[pos, 3] 88 | s = boxes[pos, 4] 89 | 90 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 91 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 92 | if iw > 0: 93 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 94 | if ih > 0: 95 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 96 | ov = iw * ih / ua # iou between max box and detection box 97 | 98 | if method == 1: # linear 99 | if ov > iou_thr: 100 | weight = 1 - ov 101 | else: 102 | weight = 1 103 | elif method == 2: # gaussian 104 | weight = np.exp(-(ov * ov) / sigma) 105 | else: # original NMS 106 | if ov > iou_thr: 107 | weight = 0 108 | else: 109 | weight = 1 110 | 111 | boxes[pos, 4] = weight * boxes[pos, 4] 112 | 113 | # if box score falls below threshold, discard the box by 114 | # swapping with last box update N 115 | if boxes[pos, 4] < min_score: 116 | boxes[pos, 0] = boxes[N-1, 0] 117 | boxes[pos, 1] = boxes[N-1, 1] 118 | boxes[pos, 2] = boxes[N-1, 2] 119 | boxes[pos, 3] = boxes[N-1, 3] 120 | boxes[pos, 4] = boxes[N-1, 4] 121 | inds[pos] = inds[N - 1] 122 | N = N - 1 123 | pos = pos - 1 124 | 125 | pos = pos + 1 126 | 127 | return boxes[:N], inds[:N] 128 | -------------------------------------------------------------------------------- /detector/tracker/README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | MOT Tracker adapted from [Towards-Realtime-MOT](https://github.com/Zhongdao/Towards-Realtime-MOT), many thanks to their wonderful work! 3 | 4 | #### Getting started 5 | Download detector [JDE-1088x608](https://github.com/Zhongdao/Towards-Realtime-MOT#pretrained-model-and-baseline-models) and place it under `AlphaPose/detector/tracker/data/` 6 | 7 | Enable tracking by setting the detector as tracker: `--detector tracker` -------------------------------------------------------------------------------- /detector/tracker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/tracker/__init__.py -------------------------------------------------------------------------------- /detector/tracker/cfg/ccmcpe.json: -------------------------------------------------------------------------------- 1 | { 2 | "root":"/home/wangzd/datasets/MOT", 3 | "train": 4 | { 5 | "mot17":"./data/mot17.train", 6 | "caltech":"./data/caltech.train", 7 | "citypersons":"./data/citypersons.train", 8 | "cuhksysu":"./data/cuhksysu.train", 9 | "prw":"./data/prw.train", 10 | "eth":"./data/eth.train" 11 | }, 12 | "test_emb": 13 | { 14 | "caltech":"./data/caltech.10k.val", 15 | "cuhksysu":"./data/cuhksysu.val", 16 | "prw":"./data/prw.val" 17 | }, 18 | "test": 19 | { 20 | "mot19":"./data/mot19.train", 21 | "caltech":"./data/caltech.val", 22 | "citypersons":"./data/citypersons.val" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /detector/tracker/preprocess.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | import numpy as np 8 | import cv2 9 | import matplotlib.pyplot as plt 10 | try: 11 | from util import count_parameters as count 12 | from util import convert2cpu as cpu 13 | except ImportError: 14 | from yolo.util import count_parameters as count 15 | from yolo.util import convert2cpu as cpu 16 | from PIL import Image, ImageDraw 17 | 18 | 19 | def letterbox_image(img, img_size=(1088, 608), color=(127.5, 127.5, 127.5)): 20 | # resize a rectangular image to a padded rectangular 21 | height=img_size[1] 22 | width=img_size[0] 23 | shape = img.shape[:2] # shape = [height, width] 24 | ratio = min(float(height)/shape[0], float(width)/shape[1]) 25 | new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height] 26 | dw = (width - new_shape[0]) / 2 # width padding 27 | dh = (height - new_shape[1]) / 2 # height padding 28 | top, bottom = round(dh - 0.1), round(dh + 0.1) 29 | left, right = round(dw - 0.1), round(dw + 0.1) 30 | img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border 31 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded rectangular 32 | return img 33 | 34 | 35 | def prep_image(img, img_size=(1088, 608)): 36 | """ 37 | Prepare image for inputting to the neural network. 38 | 39 | Returns a Variable 40 | """ 41 | 42 | orig_im = cv2.imread(img) 43 | dim = orig_im.shape[1], orig_im.shape[0] 44 | img = (letterbox_image(orig_im, img_size)) 45 | img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() 46 | img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) 47 | return img_, orig_im, dim 48 | 49 | 50 | def prep_frame(img, img_size=(1088, 608)): 51 | """ 52 | Prepare image for inputting to the neural network. 53 | 54 | Returns a Variable 55 | """ 56 | 57 | orig_im = img 58 | dim = orig_im.shape[1], orig_im.shape[0] 59 | img = (letterbox_image(orig_im, img_size)) 60 | img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() 61 | img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) 62 | return img_, orig_im, dim 63 | 64 | -------------------------------------------------------------------------------- /detector/tracker/tracker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/tracker/tracker/__init__.py -------------------------------------------------------------------------------- /detector/tracker/tracker/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed 53 | 54 | -------------------------------------------------------------------------------- /detector/tracker/tracker/matching.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import scipy 4 | from scipy.spatial.distance import cdist 5 | from scipy.optimize import linear_sum_assignment 6 | 7 | from cython_bbox import bbox_overlaps as bbox_ious 8 | from tracker.utils import kalman_filter 9 | import time 10 | 11 | def merge_matches(m1, m2, shape): 12 | O,P,Q = shape 13 | m1 = np.asarray(m1) 14 | m2 = np.asarray(m2) 15 | 16 | M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) 17 | M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) 18 | 19 | mask = M1*M2 20 | match = mask.nonzero() 21 | match = list(zip(match[0], match[1])) 22 | unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) 23 | unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) 24 | 25 | return match, unmatched_O, unmatched_Q 26 | 27 | 28 | def _indices_to_matches(cost_matrix, indices, thresh): 29 | matched_cost = cost_matrix[tuple(zip(*indices))] 30 | matched_mask = (matched_cost <= thresh) 31 | 32 | matches = indices[matched_mask] 33 | unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) 34 | unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) 35 | 36 | return matches, unmatched_a, unmatched_b 37 | 38 | 39 | def linear_assignment(cost_matrix, thresh): 40 | """ 41 | Simple linear assignment 42 | :type cost_matrix: np.ndarray 43 | :type thresh: float 44 | :return: matches, unmatched_a, unmatched_b 45 | """ 46 | if cost_matrix.size == 0: 47 | return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) 48 | 49 | cost_matrix[cost_matrix > thresh] = thresh + 1e-4 50 | row_ind, col_ind = linear_sum_assignment(cost_matrix) 51 | indices = np.column_stack((row_ind, col_ind)) 52 | 53 | return _indices_to_matches(cost_matrix, indices, thresh) 54 | 55 | 56 | def ious(atlbrs, btlbrs): 57 | """ 58 | Compute cost based on IoU 59 | :type atlbrs: list[tlbr] | np.ndarray 60 | :type atlbrs: list[tlbr] | np.ndarray 61 | 62 | :rtype ious np.ndarray 63 | """ 64 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) 65 | if ious.size == 0: 66 | return ious 67 | 68 | ious = bbox_ious( 69 | np.ascontiguousarray(atlbrs, dtype=np.float), 70 | np.ascontiguousarray(btlbrs, dtype=np.float) 71 | ) 72 | 73 | return ious 74 | 75 | 76 | def iou_distance(atracks, btracks): 77 | """ 78 | Compute cost based on IoU 79 | :type atracks: list[STrack] 80 | :type btracks: list[STrack] 81 | 82 | :rtype cost_matrix np.ndarray 83 | """ 84 | 85 | if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): 86 | atlbrs = atracks 87 | btlbrs = btracks 88 | else: 89 | atlbrs = [track.tlbr for track in atracks] 90 | btlbrs = [track.tlbr for track in btracks] 91 | _ious = ious(atlbrs, btlbrs) 92 | cost_matrix = 1 - _ious 93 | 94 | return cost_matrix 95 | 96 | def embedding_distance(tracks, detections, metric='cosine'): 97 | """ 98 | :param tracks: list[STrack] 99 | :param detections: list[BaseTrack] 100 | :param metric: 101 | :return: cost_matrix np.ndarray 102 | """ 103 | 104 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) 105 | if cost_matrix.size == 0: 106 | return cost_matrix 107 | det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) 108 | for i, track in enumerate(tracks): 109 | cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) 110 | return cost_matrix 111 | 112 | 113 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): 114 | if cost_matrix.size == 0: 115 | return cost_matrix 116 | gating_dim = 2 if only_position else 4 117 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 118 | measurements = np.asarray([det.to_xyah() for det in detections]) 119 | for row, track in enumerate(tracks): 120 | gating_distance = kf.gating_distance( 121 | track.mean, track.covariance, measurements, only_position) 122 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 123 | return cost_matrix 124 | -------------------------------------------------------------------------------- /detector/tracker/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/tracker/utils/__init__.py -------------------------------------------------------------------------------- /detector/tracker/utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | 6 | from utils.io import read_results, unzip_objs 7 | 8 | 9 | class Evaluator(object): 10 | 11 | def __init__(self, data_root, seq_name, data_type): 12 | self.data_root = data_root 13 | self.seq_name = seq_name 14 | self.data_type = data_type 15 | 16 | self.load_annotations() 17 | self.reset_accumulator() 18 | 19 | def load_annotations(self): 20 | assert self.data_type == 'mot' 21 | 22 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 23 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 24 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 25 | 26 | def reset_accumulator(self): 27 | self.acc = mm.MOTAccumulator(auto_id=True) 28 | 29 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 30 | # results 31 | trk_tlwhs = np.copy(trk_tlwhs) 32 | trk_ids = np.copy(trk_ids) 33 | 34 | # gts 35 | gt_objs = self.gt_frame_dict.get(frame_id, []) 36 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 37 | 38 | # ignore boxes 39 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 40 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 41 | 42 | # remove ignored results 43 | keep = np.ones(len(trk_tlwhs), dtype=bool) 44 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 45 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 46 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 47 | match_ious = iou_distance[match_is, match_js] 48 | 49 | match_js = np.asarray(match_js, dtype=int) 50 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 51 | keep[match_js] = False 52 | trk_tlwhs = trk_tlwhs[keep] 53 | trk_ids = trk_ids[keep] 54 | 55 | # get distance matrix 56 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 57 | 58 | # acc 59 | self.acc.update(gt_ids, trk_ids, iou_distance) 60 | 61 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 62 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 63 | else: 64 | events = None 65 | return events 66 | 67 | def eval_file(self, filename): 68 | self.reset_accumulator() 69 | 70 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 71 | frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 72 | for frame_id in frames: 73 | trk_objs = result_frame_dict.get(frame_id, []) 74 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 75 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 76 | 77 | return self.acc 78 | 79 | @staticmethod 80 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 81 | names = copy.deepcopy(names) 82 | if metrics is None: 83 | metrics = mm.metrics.motchallenge_metrics 84 | metrics = copy.deepcopy(metrics) 85 | 86 | mh = mm.metrics.create() 87 | summary = mh.compute_many( 88 | accs, 89 | metrics=metrics, 90 | names=names, 91 | generate_overall=True 92 | ) 93 | 94 | return summary 95 | 96 | @staticmethod 97 | def save_summary(summary, filename): 98 | import pandas as pd 99 | writer = pd.ExcelWriter(filename) 100 | summary.to_excel(writer) 101 | writer.save() 102 | -------------------------------------------------------------------------------- /detector/tracker/utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | from utils.log import logger 6 | 7 | 8 | def write_results(filename, results_dict: Dict, data_type: str): 9 | if not filename: 10 | return 11 | path = os.path.dirname(filename) 12 | if not os.path.exists(path): 13 | os.makedirs(path) 14 | 15 | if data_type in ('mot', 'mcmot', 'lab'): 16 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 17 | elif data_type == 'kitti': 18 | save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 19 | else: 20 | raise ValueError(data_type) 21 | 22 | with open(filename, 'w') as f: 23 | for frame_id, frame_data in results_dict.items(): 24 | if data_type == 'kitti': 25 | frame_id -= 1 26 | for tlwh, track_id in frame_data: 27 | if track_id < 0: 28 | continue 29 | x1, y1, w, h = tlwh 30 | x2, y2 = x1 + w, y1 + h 31 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 32 | f.write(line) 33 | logger.info('Save results to {}'.format(filename)) 34 | 35 | 36 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 37 | if data_type in ('mot', 'lab'): 38 | read_fun = read_mot_results 39 | else: 40 | raise ValueError('Unknown data type: {}'.format(data_type)) 41 | 42 | return read_fun(filename, is_gt, is_ignore) 43 | 44 | 45 | """ 46 | labels={'ped', ... % 1 47 | 'person_on_vhcl', ... % 2 48 | 'car', ... % 3 49 | 'bicycle', ... % 4 50 | 'mbike', ... % 5 51 | 'non_mot_vhcl', ... % 6 52 | 'static_person', ... % 7 53 | 'distractor', ... % 8 54 | 'occluder', ... % 9 55 | 'occluder_on_grnd', ... %10 56 | 'occluder_full', ... % 11 57 | 'reflection', ... % 12 58 | 'crowd' ... % 13 59 | }; 60 | """ 61 | 62 | 63 | def read_mot_results(filename, is_gt, is_ignore): 64 | valid_labels = {1} 65 | ignore_labels = {2, 7, 8, 12} 66 | results_dict = dict() 67 | if os.path.isfile(filename): 68 | with open(filename, 'r') as f: 69 | for line in f.readlines(): 70 | linelist = line.split(',') 71 | if len(linelist) < 7: 72 | continue 73 | fid = int(linelist[0]) 74 | if fid < 1: 75 | continue 76 | results_dict.setdefault(fid, list()) 77 | 78 | if is_gt: 79 | if 'MOT16-' in filename or 'MOT17-' in filename: 80 | label = int(float(linelist[7])) 81 | mark = int(float(linelist[6])) 82 | if mark == 0 or label not in valid_labels: 83 | continue 84 | score = 1 85 | elif is_ignore: 86 | if 'MOT16-' in filename or 'MOT17-' in filename: 87 | label = int(float(linelist[7])) 88 | vis_ratio = float(linelist[8]) 89 | if label not in ignore_labels and vis_ratio >= 0: 90 | continue 91 | else: 92 | continue 93 | score = 1 94 | else: 95 | score = float(linelist[6]) 96 | 97 | tlwh = tuple(map(float, linelist[2:6])) 98 | target_id = int(linelist[1]) 99 | 100 | results_dict[fid].append((tlwh, target_id, score)) 101 | 102 | return results_dict 103 | 104 | 105 | def unzip_objs(objs): 106 | if len(objs) > 0: 107 | tlwhs, ids, scores = zip(*objs) 108 | else: 109 | tlwhs, ids, scores = [], [], [] 110 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 111 | 112 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /detector/tracker/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.DEBUG) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | logger = get_logger('root') 19 | -------------------------------------------------------------------------------- /detector/tracker/utils/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from utils import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /detector/tracker/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | def parse_model_cfg(path): 2 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 3 | file = open(path, 'r') 4 | lines = file.read().split('\n') 5 | lines = [x for x in lines if x and not x.startswith('#')] 6 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 7 | module_defs = [] 8 | for line in lines: 9 | if line.startswith('['): # This marks the start of a new block 10 | module_defs.append({}) 11 | module_defs[-1]['type'] = line[1:-1].rstrip() 12 | if module_defs[-1]['type'] == 'convolutional': 13 | module_defs[-1]['batch_normalize'] = 0 14 | else: 15 | key, value = line.split("=") 16 | value = value.strip() 17 | module_defs[-1][key.rstrip()] = value.strip() 18 | 19 | return module_defs 20 | 21 | 22 | def parse_data_cfg(path): 23 | """Parses the data configuration file""" 24 | options = dict() 25 | options['gpus'] = '0' 26 | options['num_workers'] = '10' 27 | with open(path, 'r') as fp: 28 | lines = fp.readlines() 29 | for line in lines: 30 | line = line.strip() 31 | if line == '' or line.startswith('#'): 32 | continue 33 | key, value = line.split('=') 34 | options[key.strip()] = value.strip() 35 | return options 36 | -------------------------------------------------------------------------------- /detector/tracker/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | self.duration = 0. 21 | 22 | def tic(self): 23 | # using time.time instead of time.clock because time time.clock 24 | # does not normalize for multithreading 25 | self.start_time = time.time() 26 | 27 | def toc(self, average=True): 28 | self.diff = time.time() - self.start_time 29 | self.total_time += self.diff 30 | self.calls += 1 31 | self.average_time = self.total_time / self.calls 32 | if average: 33 | self.duration = self.average_time 34 | else: 35 | self.duration = self.diff 36 | return self.duration 37 | 38 | def clear(self): 39 | self.total_time = 0. 40 | self.calls = 0 41 | self.start_time = 0. 42 | self.diff = 0. 43 | self.average_time = 0. 44 | self.duration = 0. 45 | 46 | -------------------------------------------------------------------------------- /detector/tracker/utils/visualization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def tlwhs_to_tlbrs(tlwhs): 6 | tlbrs = np.copy(tlwhs) 7 | if len(tlbrs) == 0: 8 | return tlbrs 9 | tlbrs[:, 2] += tlwhs[:, 0] 10 | tlbrs[:, 3] += tlwhs[:, 1] 11 | return tlbrs 12 | 13 | 14 | def get_color(idx): 15 | idx = idx * 3 16 | color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) 17 | 18 | return color 19 | 20 | 21 | def resize_image(image, max_size=800): 22 | if max(image.shape[:2]) > max_size: 23 | scale = float(max_size) / max(image.shape[:2]) 24 | image = cv2.resize(image, None, fx=scale, fy=scale) 25 | return image 26 | 27 | 28 | def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None): 29 | im = np.ascontiguousarray(np.copy(image)) 30 | im_h, im_w = im.shape[:2] 31 | 32 | top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255 33 | 34 | text_scale = max(1, image.shape[1] / 1600.) 35 | text_thickness = 1 if text_scale > 1.1 else 1 36 | line_thickness = max(1, int(image.shape[1] / 500.)) 37 | 38 | radius = max(5, int(im_w/140.)) 39 | cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)), 40 | (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2) 41 | 42 | for i, tlwh in enumerate(tlwhs): 43 | x1, y1, w, h = tlwh 44 | intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) 45 | obj_id = int(obj_ids[i]) 46 | id_text = '{}'.format(int(obj_id)) 47 | if ids2 is not None: 48 | id_text = id_text + ', {}'.format(int(ids2[i])) 49 | _line_thickness = 1 if obj_id <= 0 else line_thickness 50 | color = get_color(abs(obj_id)) 51 | cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness) 52 | cv2.putText(im, id_text, (intbox[0], intbox[1] + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), 53 | thickness=text_thickness) 54 | return im 55 | 56 | 57 | def plot_trajectory(image, tlwhs, track_ids): 58 | image = image.copy() 59 | for one_tlwhs, track_id in zip(tlwhs, track_ids): 60 | color = get_color(int(track_id)) 61 | for tlwh in one_tlwhs: 62 | x1, y1, w, h = tuple(map(int, tlwh)) 63 | cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2) 64 | 65 | return image 66 | 67 | 68 | def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None): 69 | im = np.copy(image) 70 | text_scale = max(1, image.shape[1] / 800.) 71 | thickness = 2 if text_scale > 1.3 else 1 72 | for i, det in enumerate(tlbrs): 73 | x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int) 74 | if len(det) >= 7: 75 | label = 'det' if det[5] > 0 else 'trk' 76 | if ids is not None: 77 | text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i]) 78 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), 79 | thickness=thickness) 80 | else: 81 | text = '{}# {:.2f}'.format(label, det[6]) 82 | 83 | if scores is not None: 84 | text = '{:.2f}'.format(scores[i]) 85 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), 86 | thickness=thickness) 87 | 88 | cv2.rectangle(im, (x1, y1), (x2, y2), color, 2) 89 | 90 | return im 91 | -------------------------------------------------------------------------------- /detector/tracker_cfg.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | cfg = edict() 4 | cfg.CONFIG = 'detector/tracker/cfg/yolov3.cfg' 5 | cfg.WEIGHTS = 'detector/tracker/data/jde.1088x608.uncertainty.pt' 6 | cfg.IMG_SIZE = (1088, 608) 7 | cfg.NMS_THRES = 0.6 8 | cfg.CONFIDENCE = 0.4 9 | cfg.BUFFER_SIZE = 30 # frame buffer -------------------------------------------------------------------------------- /detector/yolo/README.md: -------------------------------------------------------------------------------- 1 | # A PyTorch implementation of a YOLO v3 Object Detector 2 | 3 | Forked from https://github.com/ayooshkathuria/pytorch-yolo-v3 4 | -------------------------------------------------------------------------------- /detector/yolo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/yolo/__init__.py -------------------------------------------------------------------------------- /detector/yolo/bbox.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import random 5 | 6 | import numpy as np 7 | import cv2 8 | 9 | def confidence_filter(result, confidence): 10 | conf_mask = (result[:,:,4] > confidence).float().unsqueeze(2) 11 | result = result*conf_mask 12 | 13 | return result 14 | 15 | def confidence_filter_cls(result, confidence): 16 | max_scores = torch.max(result[:,:,5:25], 2)[0] 17 | res = torch.cat((result, max_scores),2) 18 | print(res.shape) 19 | 20 | 21 | cond_1 = (res[:,:,4] > confidence).float() 22 | cond_2 = (res[:,:,25] > 0.995).float() 23 | 24 | conf = cond_1 + cond_2 25 | conf = torch.clamp(conf, 0.0, 1.0) 26 | conf = conf.unsqueeze(2) 27 | result = result*conf 28 | return result 29 | 30 | 31 | 32 | def get_abs_coord(box): 33 | box[2], box[3] = abs(box[2]), abs(box[3]) 34 | x1 = (box[0] - box[2]/2) - 1 35 | y1 = (box[1] - box[3]/2) - 1 36 | x2 = (box[0] + box[2]/2) - 1 37 | y2 = (box[1] + box[3]/2) - 1 38 | return x1, y1, x2, y2 39 | 40 | 41 | 42 | def sanity_fix(box): 43 | if (box[0] > box[2]): 44 | box[0], box[2] = box[2], box[0] 45 | 46 | if (box[1] > box[3]): 47 | box[1], box[3] = box[3], box[1] 48 | 49 | return box 50 | 51 | def bbox_iou(box1, box2, args=None): 52 | """ 53 | Returns the IoU of two bounding boxes 54 | 55 | 56 | """ 57 | #Get the coordinates of bounding boxes 58 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3] 59 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3] 60 | 61 | #get the corrdinates of the intersection rectangle 62 | inter_rect_x1 = torch.max(b1_x1, b2_x1) 63 | inter_rect_y1 = torch.max(b1_y1, b2_y1) 64 | inter_rect_x2 = torch.min(b1_x2, b2_x2) 65 | inter_rect_y2 = torch.min(b1_y2, b2_y2) 66 | 67 | #Intersection area 68 | if not args: 69 | inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape).cuda())*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape).cuda()) 70 | else: 71 | inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape).to(args.device))*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape).to(args.device)) 72 | #Union Area 73 | b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1) 74 | b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1) 75 | 76 | iou = inter_area / (b1_area + b2_area - inter_area) 77 | 78 | return iou 79 | 80 | 81 | def pred_corner_coord(prediction): 82 | #Get indices of non-zero confidence bboxes 83 | ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous() 84 | 85 | box = prediction[ind_nz[0], ind_nz[1]] 86 | 87 | 88 | box_a = box.new(box.shape) 89 | box_a[:,0] = (box[:,0] - box[:,2]/2) 90 | box_a[:,1] = (box[:,1] - box[:,3]/2) 91 | box_a[:,2] = (box[:,0] + box[:,2]/2) 92 | box_a[:,3] = (box[:,1] + box[:,3]/2) 93 | box[:,:4] = box_a[:,:4] 94 | 95 | prediction[ind_nz[0], ind_nz[1]] = box 96 | 97 | return prediction 98 | 99 | 100 | 101 | 102 | def write(x, batches, results, colors, classes): 103 | c1 = tuple(x[1:3].int()) 104 | c2 = tuple(x[3:5].int()) 105 | img = results[int(x[0])] 106 | cls = int(x[-1]) 107 | label = "{0}".format(classes[cls]) 108 | color = random.choice(colors) 109 | cv2.rectangle(img, c1, c2,color, 1) 110 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0] 111 | c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 112 | cv2.rectangle(img, c1, c2,color, -1) 113 | cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1); 114 | return img 115 | -------------------------------------------------------------------------------- /detector/yolo/cfg/tiny-yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40200 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=125 115 | activation=linear 116 | 117 | [region] 118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 119 | bias_match=1 120 | classes=20 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .6 134 | random=1 135 | -------------------------------------------------------------------------------- /detector/yolo/cfg/yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=64 4 | subdivisions=8 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | height=416 9 | width=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 80200 21 | policy=steps 22 | steps=-1,500,40000,60000 23 | scales=0.1,10,.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=125 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 243 | bias_match=1 244 | classes=20 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /detector/yolo/cfg/yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /detector/yolo/detect.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import time 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | import numpy as np 7 | import cv2 8 | from util import * 9 | import argparse 10 | import os 11 | import os.path as osp 12 | from darknet import Darknet 13 | from preprocess import prep_image, inp_to_image 14 | import pandas as pd 15 | import random 16 | import pickle as pkl 17 | import itertools 18 | 19 | 20 | if __name__ == '__main__': 21 | 22 | scales = "1,2,3" 23 | images = "imgs/messi.jpg" 24 | batch_size = 1 25 | confidence = 0.5 26 | nms_thesh = 0.4 27 | 28 | CUDA = torch.cuda.is_available() 29 | 30 | num_classes = 80 31 | classes = load_classes('data/coco.names') 32 | 33 | #Set up the neural network 34 | print("Loading network.....") 35 | model = Darknet("cfg/yolov3-spp.cfg") 36 | model.load_weights("yolov3-spp.weights") 37 | print("Network successfully loaded") 38 | 39 | model.net_info["height"] = "608" 40 | inp_dim = int(model.net_info["height"]) 41 | assert inp_dim % 32 == 0 42 | assert inp_dim > 32 43 | 44 | #If there's a GPU availible, put the model on GPU 45 | if CUDA: 46 | model.cuda() 47 | 48 | #Set the model in evaluation mode 49 | model.eval() 50 | 51 | #Detection phase 52 | try: 53 | imlist = [] 54 | imlist.append(osp.join(osp.realpath('.'), images)) 55 | except FileNotFoundError: 56 | print ("No file or directory with the name {}".format(images)) 57 | exit() 58 | 59 | batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))])) 60 | im_batches = [x[0] for x in batches] 61 | orig_ims = [x[1] for x in batches] 62 | im_dim_list = [x[2] for x in batches] 63 | im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) 64 | 65 | if CUDA: 66 | im_dim_list = im_dim_list.cuda() 67 | 68 | 69 | for batch in im_batches: 70 | #load the image 71 | if CUDA: 72 | batch = batch.cuda() 73 | with torch.no_grad(): 74 | prediction = model(Variable(batch), CUDA) 75 | 76 | prediction = write_results(prediction, confidence, num_classes, nms=True, nms_conf=nms_thesh) 77 | output = prediction 78 | 79 | if CUDA: 80 | torch.cuda.synchronize() 81 | 82 | try: 83 | output 84 | except NameError: 85 | print("No detections were made") 86 | exit() 87 | print(im_dim_list.shape) 88 | im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long()) 89 | 90 | scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1) 91 | 92 | 93 | output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2 94 | output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2 95 | 96 | output[:,1:5] /= scaling_factor 97 | 98 | for i in range(output.shape[0]): 99 | output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0]) 100 | output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1]) 101 | 102 | print(output) 103 | print(output.shape) 104 | -------------------------------------------------------------------------------- /detector/yolo/pallete: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/detector/yolo/pallete -------------------------------------------------------------------------------- /detector/yolo/preprocess.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | import numpy as np 8 | import cv2 9 | import matplotlib.pyplot as plt 10 | try: 11 | from util import count_parameters as count 12 | from util import convert2cpu as cpu 13 | except ImportError: 14 | from yolo.util import count_parameters as count 15 | from yolo.util import convert2cpu as cpu 16 | from PIL import Image, ImageDraw 17 | 18 | 19 | def letterbox_image(img, inp_dim): 20 | '''resize image with unchanged aspect ratio using padding''' 21 | img_w, img_h = img.shape[1], img.shape[0] 22 | w, h = inp_dim 23 | new_w = int(img_w * min(w / img_w, h / img_h)) 24 | new_h = int(img_h * min(w / img_w, h / img_h)) 25 | resized_image = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC) 26 | 27 | canvas = np.full((inp_dim[1], inp_dim[0], 3), 128) 28 | 29 | canvas[(h - new_h) // 2:(h - new_h) // 2 + new_h, (w - new_w) // 2:(w - new_w) // 2 + new_w, :] = resized_image 30 | 31 | return canvas 32 | 33 | 34 | def prep_image(img, inp_dim): 35 | """ 36 | Prepare image for inputting to the neural network. 37 | 38 | Returns a Variable 39 | """ 40 | 41 | orig_im = cv2.imread(img) 42 | dim = orig_im.shape[1], orig_im.shape[0] 43 | img = (letterbox_image(orig_im, (inp_dim, inp_dim))) 44 | img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() 45 | img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) 46 | return img_, orig_im, dim 47 | 48 | 49 | def prep_frame(img, inp_dim): 50 | """ 51 | Prepare image for inputting to the neural network. 52 | 53 | Returns a Variable 54 | """ 55 | 56 | orig_im = img 57 | dim = orig_im.shape[1], orig_im.shape[0] 58 | img = (letterbox_image(orig_im, (inp_dim, inp_dim))) 59 | img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() 60 | img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) 61 | return img_, orig_im, dim 62 | 63 | 64 | def prep_image_pil(img, network_dim): 65 | orig_im = Image.open(img) 66 | img = orig_im.convert('RGB') 67 | dim = img.size 68 | img = img.resize(network_dim) 69 | img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())) 70 | img = img.view(*network_dim, 3).transpose(0, 1).transpose(0, 2).contiguous() 71 | img = img.view(1, 3, *network_dim) 72 | img = img.float().div(255.0) 73 | return (img, orig_im, dim) 74 | 75 | 76 | def inp_to_image(inp): 77 | inp = inp.cpu().squeeze() 78 | inp = inp * 255 79 | try: 80 | inp = inp.data.numpy() 81 | except RuntimeError: 82 | inp = inp.numpy() 83 | inp = inp.transpose(1, 2, 0) 84 | 85 | inp = inp[:, :, ::-1] 86 | return inp 87 | -------------------------------------------------------------------------------- /detector/yolo_cfg.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | 3 | cfg = edict() 4 | cfg.CONFIG = 'detector/yolo/cfg/yolov3-spp.cfg' 5 | cfg.WEIGHTS = 'detector/yolo/data/yolov3-spp.weights' 6 | cfg.INP_DIM = 608 7 | cfg.NMS_THRES = 0.6 8 | cfg.CONFIDENCE = 0.1 9 | cfg.NUM_CLASSES = 80 10 | -------------------------------------------------------------------------------- /examples/demo/Copy of climbing_106.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of climbing_106.jpg -------------------------------------------------------------------------------- /examples/demo/Copy of climbing_269.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of climbing_269.jpg -------------------------------------------------------------------------------- /examples/demo/Copy of climbing_62.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of climbing_62.jpg -------------------------------------------------------------------------------- /examples/demo/Copy of standing_147.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of standing_147.jpg -------------------------------------------------------------------------------- /examples/demo/Copy of standing_153.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of standing_153.jpg -------------------------------------------------------------------------------- /examples/demo/Copy of standing_29.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/demo/Copy of standing_29.jpg -------------------------------------------------------------------------------- /examples/list-coco-demo.txt: -------------------------------------------------------------------------------- 1 | 000000375530.jpg 2 | 000000244462.jpg 3 | 000000113397.jpg 4 | 000000113408.jpg 5 | 000000375554.jpg 6 | 000000171819.jpg 7 | 000000375566.jpg 8 | 000000244496.jpg 9 | 000000139077.jpg 10 | 000000506656.jpg 11 | 000000375606.jpg 12 | 000000244539.jpg 13 | 000000565045.jpg 14 | 000000113473.jpg 15 | 000000375621.jpg 16 | 000000244550.jpg 17 | 000000492605.jpg 18 | 000000506707.jpg 19 | 000000113493.jpg 20 | 000000215524.jpg 21 | -------------------------------------------------------------------------------- /examples/res/final_xgboost_home_security_scaler_model.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/res/final_xgboost_home_security_scaler_model.pickle -------------------------------------------------------------------------------- /examples/res/final_xgboost_home_securuty_prediction_model_21jan_new_89_87.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/examples/res/final_xgboost_home_securuty_prediction_model_21jan_new_89_87.pickle -------------------------------------------------------------------------------- /pretrained_models/get_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/pretrained_models/get_models.sh -------------------------------------------------------------------------------- /scripts/inference.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | CONFIG=$1 4 | CKPT=$2 5 | VIDEO=$3 6 | OUTDIR=${4:-"./examples/res"} 7 | 8 | python scripts/demo_inference.py \ 9 | --cfg ${CONFIG} \ 10 | --checkpoint ${CKPT} \ 11 | --video ${VIDEO} \ 12 | --outdir ${OUTDIR} \ 13 | --detector yolo --save_img --save_video 14 | -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | CONFIG=$1 4 | EXPID=${2:-"alphapose"} 5 | 6 | python ./scripts/train.py \ 7 | --exp-id ${EXPID} \ 8 | --cfg ${CONFIG} 9 | -------------------------------------------------------------------------------- /scripts/validate.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | CONFIG=$1 4 | CKPT=$2 5 | BATCH=${3:-"64"} 6 | GPUS=${4:-"0,1,2,3"} 7 | 8 | python ./scripts/validate.py \ 9 | --cfg ${CONFIG} \ 10 | --batch ${BATCH} \ 11 | --gpus $GPUS\ 12 | --flip-test \ 13 | --checkpoint ${CKPT} 14 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [easy_install] 2 | index_url=https://pypi.tuna.tsinghua.edu.cn/simple 3 | -------------------------------------------------------------------------------- /trackers/PoseFlow/README.md: -------------------------------------------------------------------------------- 1 | # Pose Flow 2 | 3 | Official implementation of [Pose Flow: Efficient Online Pose Tracking ](https://arxiv.org/abs/1802.00977). 4 | 5 |

6 | 7 | 8 |

9 | 10 | Results on PoseTrack Challenge validation set: 11 | 12 | 1. Task2: Multi-Person Pose Estimation (mAP) 13 |
14 | 15 | | Method | Head mAP | Shoulder mAP | Elbow mAP | Wrist mAP | Hip mAP | Knee mAP | Ankle mAP | Total mAP | 16 | |:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:| 17 | | Detect-and-Track(FAIR) | **67.5** | 70.2 | 62 | 51.7 | 60.7 | 58.7 | 49.8 | 60.6 | 18 | | **AlphaPose** | 66.7 | **73.3** | **68.3** | **61.1** | **67.5** | **67.0** | **61.3** | **66.5** | 19 | 20 |
21 | 22 | 2. Task3: Pose Tracking (MOTA) 23 |
24 | 25 | | Method | Head MOTA | Shoulder MOTA | Elbow MOTA | Wrist MOTA | Hip MOTA | Knee MOTA | Ankle MOTA | Total MOTA | Total MOTP| Speed(FPS) | 26 | |:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:|:-------:| 27 | | Detect-and-Track(FAIR) | **61.7** | 65.5 | 57.3 | 45.7 | 54.3 | 53.1 | 45.7 | 55.2 | 61.5 |Unknown| 28 | | **PoseFlow(DeepMatch)** | 59.8 | **67.0** | 59.8 | 51.6 | **60.0** | **58.4** | **50.5** | **58.3** | **67.8**|8| 29 | | **PoseFlow(OrbMatch)** | 59.0 | 66.8 | **60.0** | **51.8** | 59.4 | **58.4** | 50.3 | 58.0 | 62.2|24| 30 | 31 |
32 | 33 | ## Latest Features 34 | - Dec 2018: PoseFlow(General Version) released! Support ANY DATASET and pose tracking results visualization. 35 | - Oct 2018: Support generating correspondence files with ORB(OpenCV), 3X FASTER and no need to compile DeepMatching library. 36 | 37 | ## Requirements 38 | 39 | - Python 2.7.13 40 | - OpenCV 3.4.2.16 41 | - OpenCV-contrib 3.4.2.16 42 | - tqdm 4.19.8 43 | 44 | ## Installation 45 | 46 | 1. Download PoseTrack Dataset from [PoseTrack](https://posetrack.net/) to `AlphaPose/PoseFlow/posetrack_data/` 47 | 48 | ```shell 49 | pip install -r requirements.txt 50 | ``` 51 | 52 | ## For Any Datasets (General Version) 53 | 54 | 1. Using [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to generate multi-person pose estimation results. 55 | 56 | ```shell 57 | # pytorch version 58 | python demo.py --indir ${image_dir}$ --outdir ${results_dir}$ 59 | ``` 60 | 61 | 2. Run pose tracking 62 | 63 | 64 | ```shell 65 | # pytorch version 66 | python tracker-general.py --imgdir ${image_dir}$ 67 | --in_json ${results_dir}$/alphapose-results.json 68 | --out_json ${results_dir}$/alphapose-results-forvis-tracked.json 69 | --visdir ${render_dir}$ 70 | ``` 71 | 72 | 73 | ## For PoseTrack Dataset Evaluation (Paper Baseline) 74 | 75 | 1. Using [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to generate multi-person pose estimation results on videos with format like `alpha-pose-results-sample.json`. 76 | 2. Using DeepMatching/ORB to generate correspondence files. 77 | 78 | ```shell 79 | # Generate correspondences by orb 80 | python matching.py --orb=1 81 | ``` 82 | 83 | 3. Run pose tracking 84 | 85 | 86 | ```shell 87 | python tracker-baseline.py --dataset=val/test --orb=1 88 | ``` 89 | 4. Evaluation 90 | 91 | Original [poseval](https://github.com/leonid-pishchulin/poseval) has some instructions on how to convert annotation files from MAT to JSON. 92 | 93 | Evaluate pose tracking results on validation dataset: 94 | 95 | ```shell 96 | git clone https://github.com/leonid-pishchulin/poseval.git --recursive 97 | cd poseval/py && export PYTHONPATH=$PWD/../py-motmetrics:$PYTHONPATH 98 | cd ../../ 99 | python poseval/py/evaluate.py --groundTruth=./posetrack_data/annotations/val \ 100 | --predictions=./${track_result_dir}/ \ 101 | --evalPoseTracking --evalPoseEstimation 102 | ``` 103 | 104 | 105 | ## Citation 106 | 107 | Please cite these papers in your publications if it helps your research: 108 | 109 | @inproceedings{xiu2018poseflow, 110 | author = {Xiu, Yuliang and Li, Jiefeng and Wang, Haoyu and Fang, Yinghong and Lu, Cewu}, 111 | title = {{Pose Flow}: Efficient Online Pose Tracking}, 112 | booktitle={BMVC}, 113 | year = {2018} 114 | } 115 | 116 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /trackers/PoseFlow/parallel_process.py: -------------------------------------------------------------------------------- 1 | # adapted from http://danshiebler.com/2016-09-14-parallel-progress-bar/ 2 | from tqdm import tqdm 3 | from concurrent.futures import ProcessPoolExecutor, as_completed 4 | 5 | def parallel_process(array, function, n_jobs=16, use_kwargs=False, front_num=3): 6 | """ 7 | A parallel version of the map function with a progress bar. 8 | 9 | Args: 10 | array (array-like): An array to iterate over. 11 | function (function): A python function to apply to the elements of array 12 | n_jobs (int, default=16): The number of cores to use 13 | use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of 14 | keyword arguments to function 15 | front_num (int, default=3): The number of iterations to run serially before kicking off the parallel job. 16 | Useful for catching bugs 17 | Returns: 18 | [function(array[0]), function(array[1]), ...] 19 | """ 20 | #We run the first few iterations serially to catch bugs 21 | if front_num > 0: 22 | front = [function(**a) if use_kwargs else function(*a) for a in array[:front_num]] 23 | #If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging. 24 | if n_jobs==1: 25 | return front + [function(**a) if use_kwargs else function(*a) for a in tqdm(array[front_num:])] 26 | #Assemble the workers 27 | with ProcessPoolExecutor(max_workers=n_jobs) as pool: 28 | #Pass the elements of array into function 29 | if use_kwargs: 30 | futures = [pool.submit(function, **a) for a in array[front_num:]] 31 | else: 32 | futures = [pool.submit(function, *a) for a in array[front_num:]] 33 | kwargs = { 34 | 'total': len(futures), 35 | 'unit': 'it', 36 | 'unit_scale': True, 37 | 'leave': True 38 | } 39 | #Print out the progress as tasks complete 40 | for f in tqdm(as_completed(futures), **kwargs): 41 | pass 42 | out = [] 43 | #Get the results from the futures. 44 | for i, future in enumerate(futures): 45 | try: 46 | out.append(future.result()) 47 | except Exception as e: 48 | out.append(e) 49 | return front + out -------------------------------------------------------------------------------- /trackers/PoseFlow/posetrack1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/PoseFlow/posetrack1.gif -------------------------------------------------------------------------------- /trackers/PoseFlow/posetrack2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/PoseFlow/posetrack2.gif -------------------------------------------------------------------------------- /trackers/PoseFlow/posetrack_data: -------------------------------------------------------------------------------- 1 | /home/yuliang/data/posetrack_data/posetrack_data -------------------------------------------------------------------------------- /trackers/PoseFlow/poseval: -------------------------------------------------------------------------------- 1 | /home/yuliang/data/posetrack_data/poseval -------------------------------------------------------------------------------- /trackers/PoseFlow/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.14.5 2 | scipy==1.1.0 3 | opencv_python==3.4.2.16 4 | opencv_contrib_python==3.4.2.16 5 | matplotlib==2.2.2 6 | tqdm==4.23.4 7 | Image==1.5.25 8 | Pillow==5.3.0 9 | munkres==1.0.12 10 | -------------------------------------------------------------------------------- /trackers/README.md: -------------------------------------------------------------------------------- 1 | # Pose Tracking Module for AlphaPose 2 | 3 | AlphaPose provide three different tracking methods for now, you can try different method to see which one is better for you. 4 | 5 | ## 1. Human-ReID based tracking (Recommended) 6 | Currently the best performance tracking model. Paper coming soon. 7 | 8 | #### Getting started 9 | Download [human reid model](https://mega.nz/#!YTZFnSJY!wlbo_5oa2TpDAGyWCTKTX1hh4d6DvJhh_RUA2z6i_so) and place it into `AlphaPose/trackers/weights/`. 10 | 11 | Then simply run alphapose with additional flag `--pose_track` 12 | 13 | You can try different person reid model by modifing `cfg.arch` and `cfg.loadmodel` in `./trackers/tracker_cfg.py`. 14 | 15 | If you want to train your own reid model, please refer to this [project](https://github.com/KaiyangZhou/deep-person-reid) 16 | 17 | #### Demo 18 | ``` bash 19 | ./scripts/inference.sh ${CONFIG} ${CHECKPOINT} ${VIDEO_NAME} ${OUTPUT_DIR}, --pose_track 20 | ``` 21 | #### Todo 22 | - [] Evaluation Tools for PoseTrack 23 | - [] More Models 24 | - [] Training code for [PoseTrack Dataset](https://posetrack.net/) 25 | 26 | ## 2. Detector based human tracking 27 | Use a human detecter with tracking module (JDE). Please refer to [detector/tracker/](../detector/tracker/) 28 | 29 | #### Getting started 30 | Download detector [JDE-1088x608](https://github.com/Zhongdao/Towards-Realtime-MOT#pretrained-model-and-baseline-models) and place it under `AlphaPose/detector/tracker/data/` 31 | 32 | Enable tracking by setting the detector as tracker: `--detector tracker` 33 | #### Demo 34 | ``` bash 35 | ./scripts/inference.sh ${CONFIG} ${CHECKPOINT} ${VIDEO_NAME} ${OUTPUT_DIR}, --detector tracker 36 | ``` 37 | 38 | ## 3. PoseFlow human tracking 39 | This tracker is based on our BMVC 2018 paper PoseFlow, for more info please refer to [PoseFlow/README.md](PoseFlow/) 40 | 41 | #### Getting started 42 | 43 | Simply run alphapose with additional flag `--pose_flow` 44 | -------------------------------------------------------------------------------- /trackers/ReidModels/ResBnLin.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: tanghy 4 | @contact: thutanghy@gmail.com 5 | """ 6 | import torch 7 | from torch import nn 8 | import torch.nn.functional as F 9 | from ReidModels.ResNet import build_resnet_backbone 10 | from ReidModels.bn_linear import BNneckLinear 11 | class SpatialAttn(nn.Module): 12 | """Spatial Attention Layer""" 13 | def __init__(self): 14 | super(SpatialAttn, self).__init__() 15 | 16 | def forward(self, x): 17 | # global cross-channel averaging # e.g. 32,2048,24,8 18 | x = x.mean(1, keepdim=True) # e.g. 32,1,24,8 19 | h = x.size(2) 20 | w = x.size(3) 21 | x = x.view(x.size(0),-1) # e.g. 32,192 22 | z = x 23 | for b in range(x.size(0)): 24 | z[b] /= torch.sum(z[b]) 25 | z = z.view(x.size(0),1,h,w) 26 | return z 27 | class ResModel(nn.Module): 28 | 29 | def __init__(self, n_ID): 30 | super().__init__() 31 | self.backbone = build_resnet_backbone() 32 | self.head = BNneckLinear(n_ID) 33 | self.atten = SpatialAttn() 34 | self.conv1 = nn.Conv2d(17, 17, 1,stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros') 35 | self.pool = nn.AvgPool2d(2, stride=2, padding=0,) 36 | def forward(self, input,posemap,map_weight): 37 | """ 38 | See :class:`ReIDHeads.forward`. 39 | """ 40 | feat = self.backbone(input) 41 | b,c,h,w = feat.shape 42 | att = self.conv1(torch.mul(posemap,map_weight)) 43 | #print('att-1-size={}'.format(att.shape)) 44 | att = F.relu(att) 45 | att = self.pool(att) 46 | att = self.conv1(att) 47 | #print('att-2-size={}'.format(att.shape)) 48 | att = F.softmax(att) 49 | #print('att-3-size={}'.format(att.shape)) 50 | att = self.atten(att) 51 | #print('att-4-size={}'.format(att.shape)) 52 | att = att.expand(b,c,h,w) 53 | _feat = torch.mul(feat,att) 54 | feat = _feat + feat 55 | return self.head(feat) -------------------------------------------------------------------------------- /trackers/ReidModels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/backbone/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/backbone/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/backbone/googlenet.py: -------------------------------------------------------------------------------- 1 | '''GoogLeNet with PyTorch.''' 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from .lrn import SpatialCrossMapLRN 7 | 8 | 9 | class Inception(nn.Module): 10 | def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): 11 | super(Inception, self).__init__() 12 | # 1x1 conv branch 13 | self.b1 = nn.Sequential( 14 | nn.Conv2d(in_planes, n1x1, kernel_size=1), 15 | nn.ReLU(True), 16 | ) 17 | 18 | # 1x1 conv -> 3x3 conv branch 19 | self.b2 = nn.Sequential( 20 | nn.Conv2d(in_planes, n3x3red, kernel_size=1), 21 | nn.ReLU(True), 22 | nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1), 23 | nn.ReLU(True), 24 | ) 25 | 26 | # 1x1 conv -> 5x5 conv branch 27 | self.b3 = nn.Sequential( 28 | nn.Conv2d(in_planes, n5x5red, kernel_size=1), 29 | nn.ReLU(True), 30 | 31 | nn.Conv2d(n5x5red, n5x5, kernel_size=5, padding=2), 32 | nn.ReLU(True), 33 | ) 34 | 35 | # 3x3 pool -> 1x1 conv branch 36 | self.b4 = nn.Sequential( 37 | nn.MaxPool2d(3, stride=1, padding=1), 38 | 39 | nn.Conv2d(in_planes, pool_planes, kernel_size=1), 40 | nn.ReLU(True), 41 | ) 42 | 43 | def forward(self, x): 44 | y1 = self.b1(x) 45 | y2 = self.b2(x) 46 | y3 = self.b3(x) 47 | y4 = self.b4(x) 48 | return torch.cat([y1,y2,y3,y4], 1) 49 | 50 | 51 | class GoogLeNet(nn.Module): 52 | 53 | output_channels = 832 54 | 55 | def __init__(self): 56 | super(GoogLeNet, self).__init__() 57 | self.pre_layers = nn.Sequential( 58 | nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3), 59 | nn.ReLU(True), 60 | 61 | nn.MaxPool2d(3, stride=2, ceil_mode=True), 62 | SpatialCrossMapLRN(5), 63 | 64 | nn.Conv2d(64, 64, 1), 65 | nn.ReLU(True), 66 | 67 | nn.Conv2d(64, 192, 3, padding=1), 68 | nn.ReLU(True), 69 | 70 | SpatialCrossMapLRN(5), 71 | nn.MaxPool2d(3, stride=2, ceil_mode=True), 72 | ) 73 | 74 | self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) 75 | self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) 76 | 77 | self.maxpool = nn.MaxPool2d(3, stride=2, ceil_mode=True) 78 | 79 | self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) 80 | self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) 81 | self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) 82 | self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) 83 | self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) 84 | 85 | def forward(self, x): 86 | out = self.pre_layers(x) 87 | out = self.a3(out) 88 | out = self.b3(out) 89 | out = self.maxpool(out) 90 | out = self.a4(out) 91 | out = self.b4(out) 92 | out = self.c4(out) 93 | out = self.d4(out) 94 | out = self.e4(out) 95 | 96 | return out 97 | -------------------------------------------------------------------------------- /trackers/ReidModels/backbone/lrn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.legacy.nn import SpatialCrossMapLRN as SpatialCrossMapLRNOld 3 | from torch.autograd import Function, Variable 4 | from torch.nn import Module 5 | 6 | 7 | # function interface, internal, do not use this one!!! 8 | class SpatialCrossMapLRNFunc(Function): 9 | 10 | def __init__(self, size, alpha=1e-4, beta=0.75, k=1): 11 | self.size = size 12 | self.alpha = alpha 13 | self.beta = beta 14 | self.k = k 15 | 16 | def forward(self, input): 17 | self.save_for_backward(input) 18 | self.lrn = SpatialCrossMapLRNOld(self.size, self.alpha, self.beta, self.k) 19 | self.lrn.type(input.type()) 20 | return self.lrn.forward(input) 21 | 22 | def backward(self, grad_output): 23 | input, = self.saved_tensors 24 | return self.lrn.backward(input, grad_output) 25 | 26 | 27 | # use this one instead 28 | class SpatialCrossMapLRN(Module): 29 | def __init__(self, size, alpha=1e-4, beta=0.75, k=1): 30 | super(SpatialCrossMapLRN, self).__init__() 31 | self.size = size 32 | self.alpha = alpha 33 | self.beta = beta 34 | self.k = k 35 | 36 | def forward(self, input): 37 | return SpatialCrossMapLRNFunc(self.size, self.alpha, self.beta, self.k)(input) -------------------------------------------------------------------------------- /trackers/ReidModels/backbone/sqeezenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torchvision import models 5 | 6 | 7 | class DilationLayer(nn.Module): 8 | def __init__(self, in_channels, out_channels, kernel_size=3, padding='same_padding', dilation=1, bn=False): 9 | super(DilationLayer, self).__init__() 10 | if padding == 'same_padding': 11 | padding = int((kernel_size - 1) / 2 * dilation) 12 | self.Dconv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, 13 | padding=padding, dilation=dilation) 14 | self.Drelu = nn.ReLU(inplace=True) 15 | self.Dbn = nn.BatchNorm2d(out_channels) if bn else None 16 | 17 | def forward(self, x): 18 | x = self.Dconv(x) 19 | if self.Dbn is not None: 20 | x = self.Dbn(x) 21 | x = self.Drelu(x) 22 | return x 23 | 24 | 25 | class FeatExtractorSqueezeNetx16(nn.Module): 26 | n_feats = [64, 128, 256, 512] 27 | 28 | def __init__(self, pretrained=True): 29 | 30 | super(FeatExtractorSqueezeNetx16, self).__init__() 31 | print("loading layers from squeezenet1_1...") 32 | sq = models.squeezenet1_1(pretrained=pretrained) 33 | 34 | self.conv1 = nn.Sequential( 35 | sq.features[0], 36 | sq.features[1], 37 | ) 38 | self.conv2 = nn.Sequential( 39 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 40 | sq.features[3], 41 | sq.features[4], 42 | ) 43 | self.conv3 = nn.Sequential( 44 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 45 | sq.features[6], 46 | sq.features[7], 47 | ) 48 | self.conv4 = nn.Sequential( 49 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 50 | sq.features[9], 51 | sq.features[10], 52 | sq.features[11], 53 | sq.features[12], 54 | ) 55 | 56 | self.conv1[0].padding = (1, 1) 57 | 58 | def forward(self, x): 59 | x2 = self.conv1(x) 60 | x4 = self.conv2(x2) 61 | x8 = self.conv3(x4) 62 | x16 = self.conv4(x8) 63 | 64 | return x2, x4, x8, x16 65 | -------------------------------------------------------------------------------- /trackers/ReidModels/bn_linear.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: tanghy 4 | @contact: thutanghy@gmail.com 5 | """ 6 | 7 | from torch import nn 8 | import torch.nn.functional as F 9 | 10 | def bn_no_bias(in_features): 11 | bn_layer = nn.BatchNorm1d(in_features) 12 | bn_layer.bias.requires_grad_(False) 13 | return bn_layer 14 | 15 | def weights_init_kaiming(m): 16 | classname = m.__class__.__name__ 17 | if classname.find('Linear') != -1: 18 | nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') 19 | if m.bias is not None: 20 | nn.init.constant_(m.bias, 0.0) 21 | elif classname.find('Conv') != -1: 22 | nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') 23 | if m.bias is not None: 24 | nn.init.constant_(m.bias, 0.0) 25 | elif classname.find('BatchNorm') != -1: 26 | if m.affine: 27 | nn.init.constant_(m.weight, 1.0) 28 | nn.init.constant_(m.bias, 0.0) 29 | 30 | 31 | def weights_init_classifier(m): 32 | classname = m.__class__.__name__ 33 | if classname.find('Linear') != -1: 34 | nn.init.normal_(m.weight, std=0.001) 35 | if m.bias is not None: 36 | nn.init.constant_(m.bias, 0.0) 37 | 38 | class BNneckLinear(nn.Module): 39 | 40 | def __init__(self, nID): 41 | super().__init__() 42 | self._num_classes = nID 43 | 44 | self.gap = nn.AdaptiveAvgPool2d(1) 45 | self.bnneck = bn_no_bias(2048) 46 | self.bnneck.apply(weights_init_kaiming) 47 | 48 | self.classifier = nn.Linear(2048, self._num_classes, bias=False) 49 | self.classifier.apply(weights_init_classifier) 50 | 51 | def forward(self, features): 52 | """ 53 | See :class:`ReIDHeads.forward`. 54 | """ 55 | global_features = self.gap(features) 56 | global_features = global_features.view(global_features.shape[0], -1) 57 | bn_features = self.bnneck(global_features) 58 | 59 | if not self.training: 60 | return F.normalize(bn_features) 61 | 62 | pred_class_logits = self.classifier(bn_features) 63 | return global_features, pred_class_logits -------------------------------------------------------------------------------- /trackers/ReidModels/classification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/classification/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/classification/classifier.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from distutils.version import LooseVersion 4 | import torch 5 | from torch.autograd import Variable 6 | 7 | from utils import bbox as bbox_utils 8 | from models import net_utils 9 | from models.classification.rfcn_cls import Model as CLSModel 10 | 11 | 12 | def _factor_closest(num, factor, is_ceil=True): 13 | num = float(num) / factor 14 | num = np.ceil(num) if is_ceil else np.floor(num) 15 | return int(num) * factor 16 | 17 | 18 | def crop_with_factor(im, dest_size, factor=32, pad_val=0, basedon='min'): 19 | im_size_min, im_size_max = np.min(im.shape[0:2]), np.max(im.shape[0:2]) 20 | im_base = {'min': im_size_min, 21 | 'max': im_size_max, 22 | 'w': im.shape[1], 23 | 'h': im.shape[0]} 24 | im_scale = float(dest_size) / im_base.get(basedon, im_size_min) 25 | 26 | # Scale the image. 27 | im = cv2.resize(im, None, fx=im_scale, fy=im_scale) 28 | 29 | # Compute the padded image shape. Ensure it's divisible by factor. 30 | h, w = im.shape[:2] 31 | new_h, new_w = _factor_closest(h, factor), _factor_closest(w, factor) 32 | new_shape = [new_h, new_w] if im.ndim < 3 else [new_h, new_w, im.shape[-1]] 33 | 34 | # Pad the image. 35 | im_padded = np.full(new_shape, fill_value=pad_val, dtype=im.dtype) 36 | im_padded[0:h, 0:w] = im 37 | 38 | return im_padded, im_scale, im.shape 39 | 40 | 41 | class PatchClassifier(object): 42 | def __init__(self, gpu=0): 43 | self.gpu = gpu 44 | 45 | ckpt = 'data/squeezenet_small40_coco_mot16_ckpt_10.h5' 46 | model = CLSModel(extractor='squeezenet') 47 | 48 | # from mcmtt.network.experiments.rfcn_cls2 import Model as CLSModel 49 | # ckpt = '/extra/models/resnet50_small40_coco_kitti/ckpt_31.h5' 50 | # model = CLSModel(extractor='resnet50') 51 | 52 | net_utils.load_net(ckpt, model) 53 | model = model.eval() 54 | self.model = model.cuda(self.gpu) 55 | print('load cls model from: {}'.format(ckpt)) 56 | self.score_map = None 57 | self.im_scale = 1. 58 | 59 | @staticmethod 60 | def im_preprocess(image): 61 | # resize and padding 62 | # real_inp_size = min_size 63 | if min(image.shape[0:2]) > 720: 64 | real_inp_size = 640 65 | else: 66 | real_inp_size = 368 67 | im_pad, im_scale, real_shape = crop_with_factor(image, real_inp_size, factor=16, pad_val=0, basedon='min') 68 | 69 | # preprocess image 70 | im_croped = cv2.cvtColor(im_pad, cv2.COLOR_BGR2RGB) 71 | im_croped = im_croped.astype(np.float32) / 255. - 0.5 72 | 73 | return im_croped, im_pad, real_shape, im_scale 74 | 75 | def update(self, image): 76 | im_croped, im_pad, real_shape, im_scale = self.im_preprocess(image) 77 | 78 | self.im_scale = im_scale 79 | self.ori_image_shape = image.shape 80 | im_data = torch.from_numpy(im_croped).permute(2, 0, 1) 81 | im_data = im_data.unsqueeze(0) 82 | 83 | # forward 84 | if LooseVersion(torch.__version__) > LooseVersion('0.3.1'): 85 | with torch.no_grad(): 86 | im_var = Variable(im_data).cuda(self.gpu) 87 | self.score_map = self.model(im_var) 88 | else: 89 | im_var = Variable(im_data, volatile=True).cuda(self.gpu) 90 | self.score_map = self.model(im_var) 91 | 92 | return real_shape, im_scale 93 | 94 | def predict(self, rois): 95 | """ 96 | :param rois: numpy array [N, 4] ( x1, y1, x2, y2) 97 | :return: scores [N] 98 | """ 99 | scaled_rois = rois * self.im_scale 100 | cls_scores = self.model.get_cls_score_numpy(self.score_map, scaled_rois) 101 | 102 | # check area 103 | rois = rois.reshape(-1, 4) 104 | clipped_boxes = bbox_utils.clip_boxes(rois, self.ori_image_shape) 105 | 106 | ori_areas = (rois[:, 2] - rois[:, 0]) * (rois[:, 3] - rois[:, 1]) 107 | areas = (clipped_boxes[:, 2] - clipped_boxes[:, 0]) * (clipped_boxes[:, 3] - clipped_boxes[:, 1]) 108 | ratios = areas / np.clip(ori_areas, a_min=1e-4, a_max=None) 109 | cls_scores[ratios < 0.5] = 0 110 | 111 | return cls_scores 112 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/psroi_pooling/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/psroi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/_ext/psroi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._psroi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | sources = [] 6 | headers = [] 7 | defines = [] 8 | with_cuda = False 9 | 10 | if torch.cuda.is_available(): 11 | print('Including CUDA code.') 12 | sources += ['src/psroi_pooling_cuda.c'] 13 | headers += ['src/psroi_pooling_cuda.h'] 14 | defines += [('WITH_CUDA', None)] 15 | with_cuda = True 16 | 17 | this_file = os.path.dirname(os.path.realpath(__file__)) 18 | print(this_file) 19 | extra_objects = ['src/cuda/psroi_pooling.cu.o'] 20 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 21 | 22 | ffi = create_extension( 23 | '_ext.psroi_pooling', 24 | headers=headers, 25 | sources=sources, 26 | define_macros=defines, 27 | relative_to=__file__, 28 | with_cuda=with_cuda, 29 | extra_objects=extra_objects 30 | ) 31 | 32 | if __name__ == '__main__': 33 | ffi.build() 34 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/psroi_pooling/functions/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/functions/psroi_pooling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import psroi_pooling 4 | 5 | 6 | class PSRoIPoolingFunction(Function): 7 | def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim): 8 | self.pooled_width = int(pooled_width) 9 | self.pooled_height = int(pooled_height) 10 | self.spatial_scale = float(spatial_scale) 11 | self.group_size = int(group_size) 12 | self.output_dim = int(output_dim) 13 | 14 | self.output = None 15 | self.mappingchannel = None 16 | self.rois = None 17 | self.feature_size = None 18 | 19 | def forward(self, features, rois): 20 | batch_size, num_channels, data_height, data_width = features.size() 21 | num_rois = rois.size(0) 22 | 23 | output = features.new().resize_(num_rois, self.output_dim, self.pooled_height, self.pooled_width).zero_() 24 | mappingchannel = torch.IntTensor(num_rois, self.output_dim, self.pooled_height, self.pooled_width).zero_().cuda(features.get_device()) 25 | 26 | rtn = psroi_pooling.psroi_pooling_forward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, 27 | self.group_size, self.output_dim, 28 | features, rois, output, mappingchannel) 29 | assert rtn > 0 30 | self.output = output 31 | self.mappingchannel = mappingchannel 32 | self.rois = rois 33 | self.feature_size = features.size() 34 | # print features.max(), features.min() 35 | # print rois.max(), rois.min() 36 | # print output.max(), output.min() 37 | return output 38 | 39 | def backward(self, grad_output): 40 | assert (self.feature_size is not None and grad_output.is_cuda) 41 | 42 | batch_size, num_channels, data_height, data_width = self.feature_size 43 | 44 | grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).cuda() 45 | 46 | psroi_pooling.psroi_pooling_backward_cuda(self.pooled_height, self.pooled_width, self.spatial_scale, 47 | self.output_dim, 48 | grad_output, self.rois, grad_input, self.mappingchannel) 49 | return grad_input, None 50 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda 4 | 5 | cd src/cuda 6 | echo "Compiling psroi pooling kernels by nvcc..." 7 | ${CUDA_PATH}/bin/nvcc -c -o psroi_pooling.cu.o psroi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../../ 10 | python build.py -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/ReidModels/psroi_pooling/modules/__init__.py -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/modules/psroi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | import sys 3 | from ..functions.psroi_pooling import PSRoIPoolingFunction 4 | 5 | 6 | class PSRoIPool(Module): 7 | def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim): 8 | super(PSRoIPool, self).__init__() 9 | 10 | self.pooled_width = int(pooled_width) 11 | self.pooled_height = int(pooled_height) 12 | self.spatial_scale = float(spatial_scale) 13 | self.group_size = int(group_size) 14 | self.output_dim = int(output_dim) 15 | 16 | def forward(self, features, rois): 17 | return PSRoIPoolingFunction(self.pooled_height, self.pooled_width, self.spatial_scale, self.group_size, 18 | self.output_dim)(features, rois) 19 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/src/cuda/psroi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef PS_ROI_POOLING_KERNEL 2 | #define PS_ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int PSROIPoolForwardLauncher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, const int pooled_width, 11 | const float* bottom_rois, const int group_size, const int output_dim, float* top_data, int* mapping_channel, cudaStream_t stream); 12 | 13 | 14 | int PSROIPoolBackwardLauncher(const float* top_diff, const int* mapping_channel, const int batch_size, const int num_rois, const float spatial_scale, const int channels, const int height, const int width, const int pooled_width, const int pooled_height, const int output_dim, float* bottom_diff, const float* bottom_rois, cudaStream_t stream); 15 | 16 | #ifdef __cplusplus 17 | } 18 | 19 | #endif 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/src/psroi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/psroi_pooling_kernel.h" 4 | 5 | 6 | 7 | extern THCState* state; 8 | 9 | int psroi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, int group_size, int output_dim,THCudaTensor *features, THCudaTensor* rois, THCudaTensor* output, THCudaIntTensor* mappingchannel){ 10 | float* data_in = THCudaTensor_data(state, features); 11 | float* rois_in = THCudaTensor_data(state, rois); 12 | float* output_out = THCudaTensor_data(state, output); 13 | int* mappingchannel_out = THCudaIntTensor_data(state, mappingchannel); 14 | //Get # of Rois 15 | int num_rois = THCudaTensor_size(state, rois, 0); 16 | int size_rois = THCudaTensor_size(state, rois, 1); 17 | if (size_rois!=5) 18 | { 19 | return -1; 20 | } 21 | 22 | //Get # of batch_size 23 | int batch_size = THCudaTensor_size(state, features, 0); 24 | 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | int data_width = THCudaTensor_size(state, features, 3); 27 | int num_channels = THCudaTensor_size(state, features, 1); 28 | 29 | cudaStream_t stream = THCState_getCurrentStream(state); 30 | 31 | // call the gpu kernel for psroi_pooling 32 | PSROIPoolForwardLauncher(data_in, spatial_scale, num_rois, data_height, data_width, num_channels, pooled_height, pooled_width,rois_in, group_size, 33 | output_dim, output_out, mappingchannel_out,stream); 34 | return 1; 35 | } 36 | 37 | 38 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim, 39 | THCudaTensor* top_grad, THCudaTensor* rois, THCudaTensor* bottom_grad, THCudaIntTensor* mappingchannel) 40 | { 41 | float *top_grad_flat = THCudaTensor_data(state, top_grad); 42 | float *rois_flat = THCudaTensor_data(state, rois); 43 | 44 | float *bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 45 | int *mappingchannel_flat = THCudaIntTensor_data(state, mappingchannel); 46 | 47 | // Number of ROIs 48 | int num_rois = THCudaTensor_size(state, rois, 0); 49 | int size_rois = THCudaTensor_size(state, rois, 1); 50 | if (size_rois != 5) 51 | { 52 | return -1; 53 | } 54 | // batch size 55 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 56 | 57 | // data height 58 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 59 | // data width 60 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 61 | // Number of channels 62 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 63 | 64 | cudaStream_t stream = THCState_getCurrentStream(state); 65 | 66 | PSROIPoolBackwardLauncher(top_grad_flat, mappingchannel_flat, batch_size, num_rois, spatial_scale, num_channels, data_height, data_width, pooled_width, pooled_height, output_dim, bottom_grad_flat, rois_flat, stream); 67 | return 1; 68 | } 69 | -------------------------------------------------------------------------------- /trackers/ReidModels/psroi_pooling/src/psroi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int psroi_pooling_forward_cuda( int pooled_height, int pooled_width, float spatial_scale,int group_size, int output_dim, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * mappingchannel); 3 | 4 | int psroi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, int output_dim, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * mappingchannel); 6 | -------------------------------------------------------------------------------- /trackers/ReidModels/reid/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from distutils.version import LooseVersion 4 | import torch 5 | from torch.autograd import Variable 6 | 7 | from utils import bbox as bbox_utils 8 | from utils.log import logger 9 | from ReidModels import net_utils 10 | from ReidModels.reid.image_part_aligned import Model 11 | 12 | 13 | def load_reid_model(): 14 | model = Model(n_parts=8) 15 | model.inp_size = (80, 160) 16 | ckpt = 'data/googlenet_part8_all_xavier_ckpt_56.h5' 17 | 18 | net_utils.load_net(ckpt, model) 19 | logger.info('Load ReID model from {}'.format(ckpt)) 20 | 21 | model = model.cuda() 22 | model.eval() 23 | return model 24 | 25 | 26 | def im_preprocess(image): 27 | image = np.asarray(image, np.float32) 28 | image -= np.array([104, 117, 123], dtype=np.float32).reshape(1, 1, -1) 29 | image = image.transpose((2, 0, 1)) 30 | return image 31 | 32 | 33 | def extract_image_patches(image, bboxes): 34 | bboxes = np.round(bboxes).astype(np.int) 35 | bboxes = bbox_utils.clip_boxes(bboxes, image.shape) 36 | patches = [image[box[1]:box[3], box[0]:box[2]] for box in bboxes] 37 | return patches 38 | 39 | 40 | def extract_reid_features(reid_model, image, tlbrs): 41 | if len(tlbrs) == 0: 42 | return torch.FloatTensor() 43 | 44 | patches = extract_image_patches(image, tlbrs) 45 | patches = np.asarray([im_preprocess(cv2.resize(p, reid_model.inp_size)) for p in patches], dtype=np.float32) 46 | 47 | gpu = net_utils.get_device(reid_model) 48 | with torch.no_grad(): 49 | _img = torch.from_numpy(patches) 50 | if gpu: 51 | _img = _img.cuda() 52 | features,id = reid_model(_img).detach() 53 | return features 54 | -------------------------------------------------------------------------------- /trackers/ReidModels/reid/image_part_aligned.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from models.backbone.googlenet import GoogLeNet 6 | 7 | 8 | class Model(nn.Module): 9 | def __init__(self, n_parts=8,n_ID=300): 10 | super(Model, self).__init__() 11 | self.n_parts = n_parts 12 | self.nID = n_ID 13 | 14 | self.feat_conv = GoogLeNet() 15 | self.conv_input_feat = nn.Conv2d(self.feat_conv.output_channels, 512, 1) 16 | # part net 17 | self.conv_att = nn.Conv2d(512, self.n_parts, 1) 18 | 19 | for i in range(self.n_parts): 20 | setattr(self, 'linear_feature{}'.format(i+1), nn.Linear(512, 64)) 21 | self.id_classifer = nn.Linear(512,self.nID) 22 | def forward(self, x): 23 | feature = self.feat_conv(x) 24 | feature = self.conv_input_feat(feature) 25 | 26 | att_weights = torch.sigmoid(self.conv_att(feature)) 27 | 28 | linear_feautres = [] 29 | for i in range(self.n_parts): 30 | masked_feature = feature * torch.unsqueeze(att_weights[:, i], 1) 31 | pooled_feature = F.avg_pool2d(masked_feature, masked_feature.size()[2:4]) 32 | linear_feautres.append( 33 | getattr(self, 'linear_feature{}'.format(i+1))(pooled_feature.view(pooled_feature.size(0), -1)) 34 | ) 35 | 36 | concat_features = torch.cat(linear_feautres, 1) 37 | normed_feature = concat_features / torch.clamp(torch.norm(concat_features, 2, 1, keepdim=True), min=1e-6) 38 | out = self.id_classifer(normed_feature) 39 | return normed_feature,out 40 | -------------------------------------------------------------------------------- /trackers/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | def track(tracker,args,orig_img,inps,boxes,hm,cropped_boxes,im_name,scores): 4 | hm = hm.cpu().data.numpy() 5 | online_targets = tracker.update(orig_img,inps,boxes,hm,cropped_boxes,im_name,scores,_debug=False) 6 | new_boxes,new_scores,new_ids,new_hm,new_crop = [],[],[],[],[] 7 | for t in online_targets: 8 | tlbr = t.tlbr 9 | tid = t.track_id 10 | thm = t.pose 11 | tcrop = t.crop_box 12 | tscore = t.detscore 13 | new_boxes.append(tlbr) 14 | new_crop.append(tcrop) 15 | new_hm.append(thm) 16 | new_ids.append(tid) 17 | new_scores.append(tscore) 18 | 19 | new_hm = torch.Tensor(new_hm).to(args.device) 20 | return new_boxes,new_scores,new_ids,new_hm,new_crop 21 | -------------------------------------------------------------------------------- /trackers/tracker_cfg.py: -------------------------------------------------------------------------------- 1 | from easydict import EasyDict as edict 2 | cfg = edict() 3 | cfg.nid = 1000 4 | cfg.arch = "osnet_ain" # "osnet" or "res50-fc512" 5 | cfg.loadmodel = "trackers/weights/osnet_ain_x1_0_msmt17_256x128_amsgrad_ep50_lr0.0015_coslr_b64_fb10_softmax_labsmth_flip_jitter.pth" 6 | cfg.frame_rate = 30 7 | cfg.track_buffer = 240 8 | cfg.conf_thres = 0.5 9 | cfg.nms_thres = 0.4 10 | cfg.iou_thres = 0.5 11 | -------------------------------------------------------------------------------- /trackers/tracking/README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | Track Association part adapted from [Towards-Realtime-MOT](https://github.com/Zhongdao/Towards-Realtime-MOT), many thanks to their wonderful work! 3 | -------------------------------------------------------------------------------- /trackers/tracking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/tracking/__init__.py -------------------------------------------------------------------------------- /trackers/tracking/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed 53 | 54 | -------------------------------------------------------------------------------- /trackers/tracking/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akshaykadam771/Suspicious-Activity-Detection-Using-Pose-Estimation/3a2e739921ae9f198c63dd6ffd2e6827773994d8/trackers/tracking/utils/__init__.py -------------------------------------------------------------------------------- /trackers/tracking/utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | from utils.log import logger 6 | 7 | 8 | def write_results(filename, results_dict: Dict, data_type: str): 9 | if not filename: 10 | return 11 | path = os.path.dirname(filename) 12 | if not os.path.exists(path): 13 | os.makedirs(path) 14 | 15 | if data_type in ('mot', 'mcmot', 'lab'): 16 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 17 | elif data_type == 'kitti': 18 | save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 19 | else: 20 | raise ValueError(data_type) 21 | 22 | with open(filename, 'w') as f: 23 | for frame_id, frame_data in results_dict.items(): 24 | if data_type == 'kitti': 25 | frame_id -= 1 26 | for tlwh, track_id in frame_data: 27 | if track_id < 0: 28 | continue 29 | x1, y1, w, h = tlwh 30 | x2, y2 = x1 + w, y1 + h 31 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 32 | f.write(line) 33 | logger.info('Save results to {}'.format(filename)) 34 | 35 | 36 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 37 | if data_type in ('mot', 'lab'): 38 | read_fun = read_mot_results 39 | else: 40 | raise ValueError('Unknown data type: {}'.format(data_type)) 41 | 42 | return read_fun(filename, is_gt, is_ignore) 43 | 44 | 45 | """ 46 | labels={'ped', ... % 1 47 | 'person_on_vhcl', ... % 2 48 | 'car', ... % 3 49 | 'bicycle', ... % 4 50 | 'mbike', ... % 5 51 | 'non_mot_vhcl', ... % 6 52 | 'static_person', ... % 7 53 | 'distractor', ... % 8 54 | 'occluder', ... % 9 55 | 'occluder_on_grnd', ... %10 56 | 'occluder_full', ... % 11 57 | 'reflection', ... % 12 58 | 'crowd' ... % 13 59 | }; 60 | """ 61 | 62 | 63 | def read_mot_results(filename, is_gt, is_ignore): 64 | valid_labels = {1} 65 | ignore_labels = {2, 7, 8, 12} 66 | results_dict = dict() 67 | if os.path.isfile(filename): 68 | with open(filename, 'r') as f: 69 | for line in f.readlines(): 70 | linelist = line.split(',') 71 | if len(linelist) < 7: 72 | continue 73 | fid = int(linelist[0]) 74 | if fid < 1: 75 | continue 76 | results_dict.setdefault(fid, list()) 77 | 78 | if is_gt: 79 | if 'MOT16-' in filename or 'MOT17-' in filename: 80 | label = int(float(linelist[7])) 81 | mark = int(float(linelist[6])) 82 | if mark == 0 or label not in valid_labels: 83 | continue 84 | score = 1 85 | elif is_ignore: 86 | if 'MOT16-' in filename or 'MOT17-' in filename: 87 | label = int(float(linelist[7])) 88 | vis_ratio = float(linelist[8]) 89 | if label not in ignore_labels and vis_ratio >= 0: 90 | continue 91 | else: 92 | continue 93 | score = 1 94 | else: 95 | score = float(linelist[6]) 96 | 97 | tlwh = tuple(map(float, linelist[2:6])) 98 | target_id = int(linelist[1]) 99 | 100 | results_dict[fid].append((tlwh, target_id, score)) 101 | 102 | return results_dict 103 | 104 | 105 | def unzip_objs(objs): 106 | if len(objs) > 0: 107 | tlwhs, ids, scores = zip(*objs) 108 | else: 109 | tlwhs, ids, scores = [], [], [] 110 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 111 | 112 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /trackers/tracking/utils/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from utils import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /trackers/tracking/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | def parse_model_cfg(path): 2 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 3 | file = open(path, 'r') 4 | lines = file.read().split('\n') 5 | lines = [x for x in lines if x and not x.startswith('#')] 6 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 7 | module_defs = [] 8 | for line in lines: 9 | if line.startswith('['): # This marks the start of a new block 10 | module_defs.append({}) 11 | module_defs[-1]['type'] = line[1:-1].rstrip() 12 | if module_defs[-1]['type'] == 'convolutional': 13 | module_defs[-1]['batch_normalize'] = 0 14 | else: 15 | key, value = line.split("=") 16 | value = value.strip() 17 | if value[0] == '$': 18 | value = module_defs[0].get(value.strip('$'), None) 19 | module_defs[-1][key.rstrip()] = value.strip() 20 | 21 | return module_defs 22 | 23 | 24 | def parse_data_cfg(path): 25 | """Parses the data configuration file""" 26 | options = dict() 27 | options['gpus'] = '0' 28 | options['num_workers'] = '10' 29 | with open(path, 'r') as fp: 30 | lines = fp.readlines() 31 | for line in lines: 32 | line = line.strip() 33 | if line == '' or line.startswith('#'): 34 | continue 35 | key, value = line.split('=') 36 | options[key.strip()] = value.strip() 37 | return options 38 | -------------------------------------------------------------------------------- /trackers/tracking/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | self.duration = 0. 21 | 22 | def tic(self): 23 | # using time.time instead of time.clock because time time.clock 24 | # does not normalize for multithreading 25 | self.start_time = time.time() 26 | 27 | def toc(self, average=True): 28 | self.diff = time.time() - self.start_time 29 | self.total_time += self.diff 30 | self.calls += 1 31 | self.average_time = self.total_time / self.calls 32 | if average: 33 | self.duration = self.average_time 34 | else: 35 | self.duration = self.diff 36 | return self.duration 37 | 38 | def clear(self): 39 | self.total_time = 0. 40 | self.calls = 0 41 | self.start_time = 0. 42 | self.diff = 0. 43 | self.average_time = 0. 44 | self.duration = 0. 45 | 46 | -------------------------------------------------------------------------------- /trackers/utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | from utils.log import logger 6 | 7 | 8 | def write_results(filename, results_dict: Dict, data_type: str): 9 | if not filename: 10 | return 11 | path = os.path.dirname(filename) 12 | if not os.path.exists(path): 13 | os.makedirs(path) 14 | 15 | if data_type in ('mot', 'mcmot', 'lab'): 16 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 17 | elif data_type == 'kitti': 18 | save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 19 | else: 20 | raise ValueError(data_type) 21 | 22 | with open(filename, 'w') as f: 23 | for frame_id, frame_data in results_dict.items(): 24 | if data_type == 'kitti': 25 | frame_id -= 1 26 | for tlwh, track_id in frame_data: 27 | if track_id < 0: 28 | continue 29 | x1, y1, w, h = tlwh 30 | x2, y2 = x1 + w, y1 + h 31 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 32 | f.write(line) 33 | logger.info('Save results to {}'.format(filename)) 34 | 35 | 36 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 37 | if data_type in ('mot', 'lab'): 38 | read_fun = read_mot_results 39 | else: 40 | raise ValueError('Unknown data type: {}'.format(data_type)) 41 | 42 | return read_fun(filename, is_gt, is_ignore) 43 | 44 | 45 | """ 46 | labels={'ped', ... % 1 47 | 'person_on_vhcl', ... % 2 48 | 'car', ... % 3 49 | 'bicycle', ... % 4 50 | 'mbike', ... % 5 51 | 'non_mot_vhcl', ... % 6 52 | 'static_person', ... % 7 53 | 'distractor', ... % 8 54 | 'occluder', ... % 9 55 | 'occluder_on_grnd', ... %10 56 | 'occluder_full', ... % 11 57 | 'reflection', ... % 12 58 | 'crowd' ... % 13 59 | }; 60 | """ 61 | 62 | 63 | def read_mot_results(filename, is_gt, is_ignore): 64 | valid_labels = {1} 65 | ignore_labels = {2, 7, 8, 12} 66 | results_dict = dict() 67 | if os.path.isfile(filename): 68 | with open(filename, 'r') as f: 69 | for line in f.readlines(): 70 | linelist = line.split(',') 71 | if len(linelist) < 7: 72 | continue 73 | fid = int(linelist[0]) 74 | if fid < 1: 75 | continue 76 | results_dict.setdefault(fid, list()) 77 | 78 | if is_gt: 79 | if 'MOT16-' in filename or 'MOT17-' in filename: 80 | label = int(float(linelist[7])) 81 | mark = int(float(linelist[6])) 82 | if mark == 0 or label not in valid_labels: 83 | continue 84 | score = 1 85 | elif is_ignore: 86 | if 'MOT16-' in filename or 'MOT17-' in filename: 87 | label = int(float(linelist[7])) 88 | vis_ratio = float(linelist[8]) 89 | if label not in ignore_labels and vis_ratio >= 0: 90 | continue 91 | else: 92 | continue 93 | score = 1 94 | else: 95 | score = float(linelist[6]) 96 | 97 | tlwh = tuple(map(float, linelist[2:6])) 98 | target_id = int(linelist[1]) 99 | 100 | results_dict[fid].append((tlwh, target_id, score)) 101 | 102 | return results_dict 103 | 104 | 105 | def unzip_objs(objs): 106 | if len(objs) > 0: 107 | tlwhs, ids, scores = zip(*objs) 108 | else: 109 | tlwhs, ids, scores = [], [], [] 110 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 111 | 112 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /trackers/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.DEBUG) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | logger = get_logger('root') 19 | -------------------------------------------------------------------------------- /trackers/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | def parse_model_cfg(path): 2 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 3 | file = open(path, 'r') 4 | lines = file.read().split('\n') 5 | lines = [x for x in lines if x and not x.startswith('#')] 6 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 7 | module_defs = [] 8 | for line in lines: 9 | if line.startswith('['): # This marks the start of a new block 10 | module_defs.append({}) 11 | module_defs[-1]['type'] = line[1:-1].rstrip() 12 | if module_defs[-1]['type'] == 'convolutional': 13 | module_defs[-1]['batch_normalize'] = 0 14 | else: 15 | key, value = line.split("=") 16 | value = value.strip() 17 | if value[0] == '$': 18 | value = module_defs[0].get(value.strip('$'), None) 19 | module_defs[-1][key.rstrip()] = value 20 | 21 | return module_defs 22 | 23 | 24 | def parse_data_cfg(path): 25 | """Parses the data configuration file""" 26 | options = dict() 27 | options['gpus'] = '0' 28 | options['num_workers'] = '10' 29 | with open(path, 'r') as fp: 30 | lines = fp.readlines() 31 | for line in lines: 32 | line = line.strip() 33 | if line == '' or line.startswith('#'): 34 | continue 35 | key, value = line.split('=') 36 | options[key.strip()] = value.strip() 37 | return options 38 | -------------------------------------------------------------------------------- /trackers/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | self.duration = 0. 21 | 22 | def tic(self): 23 | # using time.time instead of time.clock because time time.clock 24 | # does not normalize for multithreading 25 | self.start_time = time.time() 26 | 27 | def toc(self, average=True): 28 | self.diff = time.time() - self.start_time 29 | self.total_time += self.diff 30 | self.calls += 1 31 | self.average_time = self.total_time / self.calls 32 | if average: 33 | self.duration = self.average_time 34 | else: 35 | self.duration = self.diff 36 | return self.duration 37 | 38 | def clear(self): 39 | self.total_time = 0. 40 | self.calls = 0 41 | self.start_time = 0. 42 | self.diff = 0. 43 | self.average_time = 0. 44 | self.duration = 0. 45 | 46 | --------------------------------------------------------------------------------