├── .gitignore ├── LICENSE ├── NOTICE ├── README.md ├── demo ├── _init_paths.py ├── centernet_tensorrt_engine.py ├── convert2onnx.py ├── demo_main.py ├── face │ ├── __init__.py │ ├── centerface.py │ ├── demo.py │ ├── prnet.py │ ├── reid │ │ ├── __init__.py │ │ ├── reid_manager.py │ │ └── reid_table │ │ │ ├── __init__.py │ │ │ ├── base_idbase.py │ │ │ ├── head_pose_base.py │ │ │ └── reid_utils.py │ ├── resfcn256.py │ └── utils │ │ ├── BFM_UV.mat │ │ ├── cv_plot.py │ │ ├── estimate_pose.py │ │ ├── generate_posmap_300WLP.py │ │ ├── losses.py │ │ ├── render.py │ │ ├── render_app.py │ │ ├── rotate_vertices.py │ │ ├── utils.py │ │ └── uv_data │ │ ├── canonical_vertices.npy │ │ ├── face_ind.txt │ │ ├── triangles.txt │ │ ├── uv_kpt_ind.txt │ │ └── uv_weight_mask_gdh.png ├── result.png ├── tensorrt_model.py └── tracking │ ├── __init__.py │ ├── deep_sort.py │ ├── feature_extractor.py │ ├── model.py │ ├── sort │ ├── __init__.py │ ├── detection.py │ ├── iou_matching.py │ ├── kalman_filter.py │ ├── linear_assignment.py │ ├── nn_matching.py │ ├── preprocessing.py │ ├── track.py │ └── tracker.py │ └── util.py ├── experiments ├── darknet53_512x512.yaml ├── dla_34_512x512.yaml ├── efficientdet_512x512.yaml ├── ghost_net.yaml ├── hardnet_512x512.yaml ├── hrnet_w32_512.yaml ├── hrnet_w48_512.yaml ├── mobilenetv2_512x512.yaml ├── mobilenetv3_512x512.yaml ├── res_50_512x512.yaml └── shufflenetV2_512x512.yaml ├── images └── image1.jpeg ├── lib ├── config │ ├── __init__.py │ └── default.py ├── datasets │ ├── coco_hp.py │ ├── data.py │ ├── dataset_factory.py │ └── multi_pose.py ├── detectors │ ├── base_detector.py │ ├── detector_factory.py │ └── multi_pose.py ├── external │ ├── .gitignore │ ├── Makefile │ ├── __init__.py │ ├── build │ │ └── temp.linux-x86_64-3.6 │ │ │ └── nms.o │ ├── make.sh │ ├── nms.pyx │ └── setup.py ├── logger.py ├── models │ ├── backbones │ │ ├── DCNv2 │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── dcn_v2.py │ │ │ ├── make.sh │ │ │ ├── setup.py │ │ │ ├── src │ │ │ │ ├── cpu │ │ │ │ │ ├── dcn_v2_cpu.cpp │ │ │ │ │ └── vision.h │ │ │ │ ├── cuda │ │ │ │ │ ├── dcn_v2_cuda.cu │ │ │ │ │ ├── dcn_v2_im2col_cuda.cu │ │ │ │ │ ├── dcn_v2_im2col_cuda.h │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu │ │ │ │ │ └── vision.h │ │ │ │ ├── dcn_v2.h │ │ │ │ └── vision.cpp │ │ │ └── test.py │ │ ├── Utitled Document │ │ ├── __init__.py │ │ ├── darknet.py │ │ ├── dlav0.py │ │ ├── efficientdet │ │ │ ├── __init__.py │ │ │ ├── bifpn.py │ │ │ ├── conv_module.py │ │ │ ├── efficientdet.py │ │ │ ├── efficientnet.py │ │ │ ├── module.py │ │ │ ├── retinahead.py │ │ │ └── utils.py │ │ ├── ghost_net.py │ │ ├── hardnet.py │ │ ├── large_hourglass.py │ │ ├── mobilenet │ │ │ ├── __init__.py │ │ │ ├── mobilenetv2.py │ │ │ └── mobilenetv3.py │ │ ├── msra_resnet.py │ │ ├── pose_dla_dcn.py │ │ ├── pose_higher_hrnet.py │ │ ├── resnet_dcn.py │ │ ├── shufflenetv2_dcn.py │ │ └── test_mode.py │ ├── decode.py │ ├── heads │ │ ├── __init__.py │ │ ├── keypoint.py │ │ └── mask.py │ ├── losses.py │ ├── model.py │ └── utils.py ├── trains │ ├── base_trainer.py │ ├── multi_pose.py │ └── train_factory.py └── utils │ ├── __init__.py │ ├── debugger.py │ ├── image.py │ ├── oracle_utils.py │ ├── post_process.py │ └── utils.py ├── readme ├── DATA.md ├── DEVELOP.md ├── GETTING_STARTED.md ├── INSTALL.md ├── demo.gif ├── fig2.png ├── multi_pose_screenshot_27.11.2019.png ├── performance.png └── plot_speed_accuracy.py ├── requirements.txt ├── samples ├── _init_paths.py ├── data_inspect_utils.py └── inspect_data.ipynb └── tools ├── _init_paths.py ├── demo.py ├── evaluate.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | *pyc 2 | *~ 3 | *so 4 | lib/models/backbones/DCNv2/build/* 5 | lib/models/backbones/DCNv2/DCNv2.egg-info/* 6 | *build/* 7 | */.vscode/* 8 | *__pycache__* 9 | *.ipynb_checkpoints* 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Xingyi Zhou 4 | All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The repo is based on [CenterNet](https://arxiv.org/abs/1904.07850), which aimed for push the boundary of human pose estimation 2 | multi person pose estimation using center point detection: 3 | ![](readme/fig2.png) 4 | 5 | ## Main results 6 | 7 | ### Keypoint detection on COCO validation 2017 8 |

9 | 10 | | Backbone | AP | FPS | TensorRT Speed | GFLOPs |Download | 11 | |--------------|-----------|--------------|----------|----------|----------| 12 | |DLA-34 | 62.7 | 23 | - | - |[model](https://drive.google.com/open?id=1IahJ3vpjTVu1p-Okf6lcn-bM7fVKNg6N) | 13 | |Resnet-50 | 54.5 | 28 | 33 | - |[model](https://drive.google.com/open?id=1oBgWrfigo2fGtpQJXQ0stADTgVFxPWGq) | 14 | |MobilenetV3 | 46.0 | 30 | - | - |[model](https://drive.google.com/open?id=1snJnADAD1NUzyO1QXCftuZu1rsr8095G) | 15 | |ShuffleNetV2 | 43.9 | 25 | - | - |[model](https://drive.google.com/open?id=1FK7YQzCB6mLcb0v4SOmlqtRJfA-PQSvN) | 16 | |[HRNet_W32](https://drive.google.com/open?id=1mJoK7KEx35Wgf6uAZ-Ez5IwAeOk1RYw0)| 63.8 | 16 | - | - |[model](https://drive.google.com/open?id=1X0yxGeeNsD4VwU2caDo-BaH_MoCAnU_J) | 17 | |[HardNet](https://github.com/PingoLH/FCHarDNet)| 46.0 | 30 | - | - |[model](https://drive.google.com/open?id=1CFc_qAAT4NFfrAG8JOxRVG8CAw9ySuYp) | 18 | |[Darknet53]()| 34.2 | 30 | - | - |[model](https://drive.google.com/open?id=1S8spP_QKHqIYmWpfF9Bb4-4OoUXIOnkh) | 19 | |[EfficientDet]()| 38.2 | 30 | - | - |[model](https://drive.google.com/open?id=1S8spP_QKHqIYmWpfF9Bb4-4OoUXIOnkh) | 20 | 21 | ## Installation 22 | 23 | git submodule init&git submodule update 24 | Please refer to [INSTALL.md](readme/INSTALL.md) for installation instructions. 25 | 26 | ## Use CenterNet 27 | 28 | We support demo for image/ image folder, video, and webcam. 29 | 30 | First, download the model [DLA-34](https://drive.google.com/open?id=1OkHjjViB0dzbuicdtIam-YcoT0sYpmjP) 31 | from the [Model zoo](https://drive.google.com/open?id=1UG2l8XtjOfBtG_GLpSdxlWS2wxFR8hQF) and put them in anywhere. 32 | 33 | Run: 34 | 35 | ~~~ 36 | cd tools; python demo.py --cfg ../experiments/dla_34_512x512.yaml --TESTMODEL /your/model/path/dla34_best.pth --DEMOFILE ../images/33823288584_1d21cf0a26_k.jpg --DEBUG 1 37 | ~~~ 38 | The result for the example images should look like: 39 |

40 | 41 | ## Evaluation 42 | ~~~ 43 | cd tools; python evaluate.py --cfg ../experiments/dla_34_512x512.yaml --TESTMODEL /your/model/path/dla34_best.pth --DEMOFILE --DEBUG 0 44 | ~~~ 45 | 46 | ## Training 47 | 48 | After [installation](readme/INSTALL.md), follow the instructions in [DATA.md](readme/DATA.md) to setup the datasets. 49 | 50 | We provide config files for all the experiments in the [experiments](experiments) folder. 51 | 52 | ``` 53 | cd ./tools python -m torch.distributed.launch --nproc_per_node 4 train.py --cfg ../experiments/*yalm 54 | ``` 55 | 56 | ## Demo 57 | 58 | the demo files located in the `demo` directory, which is would be a very robust human detection+tracking+face reid system. 59 | 60 |

61 | 62 |

63 | 64 | ## License 65 | 66 | MIT License (refer to the LICENSE file for details). 67 | 68 | ## Citation 69 | 70 | If you find this project useful for your research, please use the following BibTeX entry. 71 | 72 | @inproceedings{zhou2019objects, 73 | title={Objects as Points}, 74 | author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp}, 75 | booktitle={arXiv preprint arXiv:1904.07850}, 76 | year={2019} 77 | } 78 | -------------------------------------------------------------------------------- /demo/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | 5 | def add_path(path): 6 | if path not in sys.path: 7 | sys.path.insert(0, path) 8 | 9 | this_dir = osp.dirname(__file__) 10 | 11 | # Add lib to PYTHONPATH 12 | lib_path = osp.join(this_dir, '..', 'lib') 13 | add_path(lib_path) 14 | -------------------------------------------------------------------------------- /demo/face/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/__init__.py -------------------------------------------------------------------------------- /demo/face/centerface.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | 7 | class CenterFace(object): 8 | def __init__(self, model_path, landmarks=True): 9 | self.landmarks = landmarks 10 | if self.landmarks: 11 | self.net = cv2.dnn.readNetFromONNX(model_path) 12 | else: 13 | self.net = cv2.dnn.readNetFromONNX('cface.1k.onnx') 14 | 15 | def __call__(self, img, threshold=0.5): 16 | blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(self.img_w_new, self.img_h_new), mean=(0, 0, 0), swapRB=True, crop=False) 17 | self.net.setInput(blob) 18 | begin = datetime.datetime.now() 19 | if self.landmarks: 20 | heatmap, scale, offset, lms = self.net.forward(["537", "538", "539", '540']) 21 | else: 22 | heatmap, scale, offset = self.net.forward(["535", "536", "537"]) 23 | 24 | end = datetime.datetime.now() 25 | print("cpu times = ", end - begin) 26 | if self.landmarks: 27 | dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold) 28 | else: 29 | dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold) 30 | 31 | if len(dets) > 0: 32 | dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_h 33 | if self.landmarks: 34 | lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_h 35 | else: 36 | dets = np.empty(shape=[0, 5], dtype=np.float32) 37 | if self.landmarks: 38 | lms = np.empty(shape=[0, 10], dtype=np.float32) 39 | if self.landmarks: 40 | return dets, lms 41 | else: 42 | return dets 43 | 44 | def transform(self, h, w): 45 | img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32) 46 | scale_h, scale_w = img_h_new / h, img_w_new / w 47 | self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = img_h_new, img_w_new, scale_h, scale_w 48 | 49 | def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1): 50 | heatmap = np.squeeze(heatmap) 51 | scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :] 52 | offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :] 53 | c0, c1 = np.where(heatmap > threshold) 54 | if self.landmarks: 55 | boxes, lms = [], [] 56 | else: 57 | boxes = [] 58 | if len(c0) > 0: 59 | for i in range(len(c0)): 60 | s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4 61 | o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]] 62 | s = heatmap[c0[i], c1[i]] 63 | x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2) 64 | x1, y1 = min(x1, size[1]), min(y1, size[0]) 65 | boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s]) 66 | if self.landmarks: 67 | lm = [] 68 | for j in range(5): 69 | lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1) 70 | lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1) 71 | lms.append(lm) 72 | boxes = np.asarray(boxes, dtype=np.float32) 73 | keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3) 74 | boxes = boxes[keep, :] 75 | if self.landmarks: 76 | lms = np.asarray(lms, dtype=np.float32) 77 | lms = lms[keep, :] 78 | if self.landmarks: 79 | return boxes, lms 80 | else: 81 | return boxes 82 | 83 | def nms(self, boxes, scores, nms_thresh): 84 | x1 = boxes[:, 0] 85 | y1 = boxes[:, 1] 86 | x2 = boxes[:, 2] 87 | y2 = boxes[:, 3] 88 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 89 | order = np.argsort(scores)[::-1] 90 | num_detections = boxes.shape[0] 91 | suppressed = np.zeros((num_detections,), dtype=np.bool) 92 | 93 | keep = [] 94 | for _i in range(num_detections): 95 | i = order[_i] 96 | if suppressed[i]: 97 | continue 98 | keep.append(i) 99 | 100 | ix1 = x1[i] 101 | iy1 = y1[i] 102 | ix2 = x2[i] 103 | iy2 = y2[i] 104 | iarea = areas[i] 105 | 106 | for _j in range(_i + 1, num_detections): 107 | j = order[_j] 108 | if suppressed[j]: 109 | continue 110 | 111 | xx1 = max(ix1, x1[j]) 112 | yy1 = max(iy1, y1[j]) 113 | xx2 = min(ix2, x2[j]) 114 | yy2 = min(iy2, y2[j]) 115 | w = max(0, xx2 - xx1 + 1) 116 | h = max(0, yy2 - yy1 + 1) 117 | 118 | inter = w * h 119 | ovr = inter / (iarea + areas[j] - inter) 120 | if ovr >= nms_thresh: 121 | suppressed[j] = True 122 | 123 | return keep 124 | -------------------------------------------------------------------------------- /demo/face/demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import scipy.io as sio 5 | 6 | from centerface import CenterFace 7 | 8 | 9 | def test_image(image_path, model_path): 10 | frame = cv2.imread(image_path) 11 | h, w = frame.shape[:2] 12 | landmarks = True 13 | centerface = CenterFace(model_path=model_path, landmarks=landmarks) 14 | centerface.transform(h, w) 15 | if landmarks: 16 | dets, lms = centerface(frame, threshold=0.35) 17 | else: 18 | dets = centerface(frame, threshold=0.35) 19 | 20 | for det in dets: 21 | boxes, score = det[:4], det[4] 22 | cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1) 23 | if landmarks: 24 | for lm in lms: 25 | cv2.circle(frame, (int(lm[0]), int(lm[1])), 2, (0, 0, 255), -1) 26 | cv2.circle(frame, (int(lm[2]), int(lm[3])), 2, (0, 0, 255), -1) 27 | cv2.circle(frame, (int(lm[4]), int(lm[5])), 2, (0, 0, 255), -1) 28 | cv2.circle(frame, (int(lm[6]), int(lm[7])), 2, (0, 0, 255), -1) 29 | cv2.circle(frame, (int(lm[8]), int(lm[9])), 2, (0, 0, 255), -1) 30 | cv2.imshow('out', frame) 31 | cv2.waitKey(0) 32 | 33 | 34 | 35 | if __name__ == '__main__': 36 | image_path = '/home/tensorboy/centerpose/images/image1.jpg' 37 | model_path = '/home/tensorboy/CenterFace/models/onnx/centerface.onnx' 38 | test_image(image_path, model_path) 39 | -------------------------------------------------------------------------------- /demo/face/reid/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/reid/__init__.py -------------------------------------------------------------------------------- /demo/face/reid/reid_manager.py: -------------------------------------------------------------------------------- 1 | from .reid_table.head_pose_base import ReidDataBase 2 | 3 | 4 | class ReIDManager(object): 5 | def __init__(self, config): 6 | self.reid_table = ReidDataBase(config) 7 | 8 | def query_targets(self, reappear_targets, ignored_targets): 9 | if len(reappear_targets) == 0: 10 | return [], [] 11 | reappear_detections = [] 12 | for single_target in reappear_targets: 13 | best_detection = self._get_detection_with_highest_face(single_target) 14 | reappear_detections.append(best_detection) 15 | 16 | ignored_id = [t.id for t in ignored_targets] 17 | hash_ids, hash_status = self.reid_table.reid_query_detections(reappear_detections, ignored_id) 18 | return hash_ids, hash_status 19 | 20 | def update_targets(self, tracked_targets): 21 | all_detections = [self._get_latest_detection(target) for target in tracked_targets] 22 | # update reid features 23 | self.reid_table.update(all_detections) 24 | 25 | def remove_targets(self, removed_targets): 26 | will_remove_ids = [target.id for target in removed_targets] 27 | self.reid_table.remove(will_remove_ids) 28 | 29 | def query_certain_id(self, detection_list, target_id): 30 | if len(detection_list) == 0: 31 | return [] 32 | return self.reid_table.reid_query_certain_id(detection_list, target_id) 33 | 34 | def _get_detection_with_highest_face(self, target): 35 | detection_list = target.last_detections # list(target.get_detections()) 36 | detection_list = sorted(detection_list, key=lambda t: t.face_score)[::-1] 37 | return detection_list[0] 38 | 39 | def _get_latest_detection(self, target): 40 | # target.last_detections: a list of last detections sorted by time (currently contains 4 detections) 41 | return target.last_detections[0] 42 | -------------------------------------------------------------------------------- /demo/face/reid/reid_table/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/reid/reid_table/__init__.py -------------------------------------------------------------------------------- /demo/face/reid/reid_table/base_idbase.py: -------------------------------------------------------------------------------- 1 | class BaseReidDatabase(object): 2 | """ 3 | Args: 4 | dataset: a collection of (hash_id, features) pairs in some format 5 | (maybe proto?) 6 | """ 7 | 8 | def __init__(self): 9 | self.dataset = {} 10 | 11 | def update(self, features, cameras, hash_ids): 12 | """Update dataset with features for a specific hash_id 13 | Args: 14 | features: List[M_i x L-dimensional np.float32 array] 15 | cameras: List[M_i np.float32 vetor] 16 | hash_ids: List[ids] of length M_i 17 | """ 18 | raise NotImplementedError 19 | 20 | def get_all_ids(self): 21 | return ['{:04d}'.format(abs(k) % 10000) for k, v in self.dataset.items()] 22 | 23 | def get_current_table_size(self): 24 | return len(self.dataset) 25 | 26 | def check_if_in_table(self, new_id): 27 | return new_id in self.dataset 28 | 29 | # search all persons in one frame 30 | def retrieval(self, features, cameras, tracked_ids): 31 | """Computes and returns closest entity based on features 32 | Args: 33 | features: List[M_i x L-dimensional np.float32 array] 34 | cameras: List[M_i np.float32 vetor] 35 | tracked_ids: List of ids of unknown length, confirmed ids by tracker. ReID should ignore these ids. 36 | Returns: 37 | hash_ids(list): list of ids, id could be none 38 | """ 39 | raise NotImplementedError 40 | 41 | def remove(self, hash_id): 42 | """Deletes entity with hash_id and all of it's features from the dataset 43 | Args: 44 | hash_id(string): unique string identifying the specific person 45 | """ 46 | raise NotImplementedError 47 | -------------------------------------------------------------------------------- /demo/face/reid/reid_table/reid_utils.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class MatchFlags(Enum): 5 | MATCHED = 0 6 | BADFEATURE = 1 7 | NOTCONVINCED = 2 8 | FIRSTTIME = 3 9 | EMPTYDICT = 4 10 | UNREGISTER = 5 11 | NODETECTION = 6 12 | 13 | 14 | class HEADFLAGS(Enum): 15 | S0 = 0 16 | S1 = 1 17 | S2 = 2 18 | S3 = 3 19 | S4 = 4 20 | S5 = 5 21 | S6 = 6 22 | 23 | 24 | # Data template 25 | class Detection: 26 | def __init__(self, ind, score, camera, feature, landmarks, bbox): 27 | self.target_id = ind 28 | self.face_score = score 29 | self.features = feature 30 | self.camera_id = camera 31 | self.landmarks = landmarks 32 | self.bbox = bbox 33 | 34 | def set_new_id(self, new_id): 35 | self.target_id = new_id 36 | 37 | def get_id(self): 38 | return self.target_id 39 | 40 | def get_face_score(self): 41 | return self.face_score 42 | 43 | 44 | def assign_head_status(yaw): 45 | # if abs(yaw) > 25: 46 | # head_status = HEADFLAGS.S2 47 | # elif abs(yaw) > 15: 48 | # head_status = HEADFLAGS.S1 49 | # else: 50 | # head_status = HEADFLAGS.S0 51 | if abs(yaw) > 30: 52 | head_status = HEADFLAGS.S6 53 | elif abs(yaw) > 25: 54 | head_status = HEADFLAGS.S5 55 | elif abs(yaw) > 20: 56 | head_status = HEADFLAGS.S4 57 | elif abs(yaw) > 15: 58 | head_status = HEADFLAGS.S3 59 | elif abs(yaw) > 10: 60 | head_status = HEADFLAGS.S2 61 | elif abs(yaw) > 5: 62 | head_status = HEADFLAGS.S1 63 | else: 64 | head_status = HEADFLAGS.S0 65 | return head_status 66 | -------------------------------------------------------------------------------- /demo/face/utils/BFM_UV.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/utils/BFM_UV.mat -------------------------------------------------------------------------------- /demo/face/utils/cv_plot.py: -------------------------------------------------------------------------------- 1 | # coding: UTF-8 2 | """ 3 | @func: draw landmark & mesh on image. 4 | @source: YadiraF/PRNet/utils/cv_plot.py 5 | """ 6 | import cv2 7 | import numpy as np 8 | 9 | end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1 10 | 11 | 12 | def plot_kpt(image, kpt): 13 | ''' Draw 68 key points 14 | Args: 15 | image: the input image 16 | kpt: (68, 3). 17 | ''' 18 | image = image.copy() 19 | kpt = np.round(kpt).astype(np.int32) 20 | for i in range(kpt.shape[0]): 21 | st = kpt[i, :2] 22 | image = cv2.circle(image, (st[0], st[1]), 1, (0, 0, 255), 2) 23 | if i in end_list: 24 | continue 25 | ed = kpt[i + 1, :2] 26 | image = cv2.line(image, (st[0], st[1]), (ed[0], ed[1]), (255, 255, 255), 1) 27 | return image 28 | 29 | 30 | def plot_vertices(image, vertices): 31 | image = image.copy() 32 | vertices = np.round(vertices).astype(np.int32) 33 | for i in range(0, vertices.shape[0], 2): 34 | st = vertices[i, :2] 35 | image = cv2.circle(image, (st[0], st[1]), 1, (255, 0, 0), -1) 36 | return image 37 | 38 | 39 | def plot_pose_box(image, P, kpt, color=(0, 255, 0), line_width=2): 40 | ''' Draw a 3D box as annotation of pose. Ref:https://github.com/yinguobing/head-pose-estimation/blob/master/pose_estimator.py 41 | Args: 42 | image: the input image 43 | P: (3, 4). Affine Camera Matrix. 44 | kpt: (68, 3). 45 | ''' 46 | image = image.copy() 47 | 48 | point_3d = [] 49 | rear_size = 90 50 | rear_depth = 0 51 | point_3d.append((-rear_size, -rear_size, rear_depth)) 52 | point_3d.append((-rear_size, rear_size, rear_depth)) 53 | point_3d.append((rear_size, rear_size, rear_depth)) 54 | point_3d.append((rear_size, -rear_size, rear_depth)) 55 | point_3d.append((-rear_size, -rear_size, rear_depth)) 56 | 57 | front_size = 105 58 | front_depth = 110 59 | point_3d.append((-front_size, -front_size, front_depth)) 60 | point_3d.append((-front_size, front_size, front_depth)) 61 | point_3d.append((front_size, front_size, front_depth)) 62 | point_3d.append((front_size, -front_size, front_depth)) 63 | point_3d.append((-front_size, -front_size, front_depth)) 64 | point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3) 65 | 66 | # Map to 2d image points 67 | point_3d_homo = np.hstack((point_3d, np.ones([point_3d.shape[0], 1]))) # n x 4 68 | point_2d = point_3d_homo.dot(P.T)[:, :2] 69 | point_2d[:, :2] = point_2d[:, :2] - np.mean(point_2d[:4, :2], 0) + np.mean(kpt[:27, :2], 0) 70 | point_2d = np.int32(point_2d.reshape(-1, 2)) 71 | 72 | # Draw all the lines 73 | cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA) 74 | cv2.line(image, tuple(point_2d[1]), tuple( 75 | point_2d[6]), color, line_width, cv2.LINE_AA) 76 | cv2.line(image, tuple(point_2d[2]), tuple( 77 | point_2d[7]), color, line_width, cv2.LINE_AA) 78 | cv2.line(image, tuple(point_2d[3]), tuple( 79 | point_2d[8]), color, line_width, cv2.LINE_AA) 80 | 81 | return image 82 | -------------------------------------------------------------------------------- /demo/face/utils/estimate_pose.py: -------------------------------------------------------------------------------- 1 | # coding: UTF-8 2 | 3 | from math import asin, atan2, cos, sin 4 | 5 | import numpy as np 6 | 7 | 8 | def isRotationMatrix(R): 9 | ''' checks if a matrix is a valid rotation matrix(whether orthogonal or not) 10 | ''' 11 | Rt = np.transpose(R) 12 | shouldBeIdentity = np.dot(Rt, R) 13 | I = np.identity(3, dtype=R.dtype) 14 | n = np.linalg.norm(I - shouldBeIdentity) 15 | return n < 1e-6 16 | 17 | 18 | def matrix2angle(R): 19 | ''' compute three Euler angles from a Rotation Matrix. Ref: http://www.gregslabaugh.net/publications/euler.pdf 20 | Args: 21 | R: (3,3). rotation matrix 22 | Returns: 23 | x: yaw 24 | y: pitch 25 | z: roll 26 | ''' 27 | # assert(isRotationMatrix(R)) 28 | 29 | if R[2, 0] != 1 or R[2, 0] != -1: 30 | x = asin(R[2, 0]) 31 | y = atan2(R[2, 1] / cos(x), R[2, 2] / cos(x)) 32 | z = atan2(R[1, 0] / cos(x), R[0, 0] / cos(x)) 33 | 34 | else: # Gimbal lock 35 | z = 0 # can be anything 36 | if R[2, 0] == -1: 37 | x = np.pi / 2 38 | y = z + atan2(R[0, 1], R[0, 2]) 39 | else: 40 | x = -np.pi / 2 41 | y = -z + atan2(-R[0, 1], -R[0, 2]) 42 | 43 | return x, y, z 44 | 45 | 46 | def P2sRt(P): 47 | ''' decompositing camera matrix P. 48 | Args: 49 | P: (3, 4). Affine Camera Matrix. 50 | Returns: 51 | s: scale factor. 52 | R: (3, 3). rotation matrix. 53 | t2d: (2,). 2d translation. 54 | ''' 55 | t2d = P[:2, 3] 56 | R1 = P[0:1, :3] 57 | R2 = P[1:2, :3] 58 | s = (np.linalg.norm(R1) + np.linalg.norm(R2)) / 2.0 59 | r1 = R1 / np.linalg.norm(R1) 60 | r2 = R2 / np.linalg.norm(R2) 61 | r3 = np.cross(r1, r2) 62 | 63 | R = np.concatenate((r1, r2, r3), 0) 64 | return s, R, t2d 65 | 66 | 67 | def compute_similarity_transform(points_static, points_to_transform): 68 | # http://nghiaho.com/?page_id=671 69 | p0 = np.copy(points_static).T 70 | p1 = np.copy(points_to_transform).T 71 | 72 | t0 = -np.mean(p0, axis=1).reshape(3, 1) 73 | t1 = -np.mean(p1, axis=1).reshape(3, 1) 74 | t_final = t1 - t0 75 | 76 | p0c = p0 + t0 77 | p1c = p1 + t1 78 | 79 | covariance_matrix = p0c.dot(p1c.T) 80 | U, S, V = np.linalg.svd(covariance_matrix) 81 | R = U.dot(V) 82 | if np.linalg.det(R) < 0: 83 | R[:, 2] *= -1 84 | 85 | rms_d0 = np.sqrt(np.mean(np.linalg.norm(p0c, axis=0) ** 2)) 86 | rms_d1 = np.sqrt(np.mean(np.linalg.norm(p1c, axis=0) ** 2)) 87 | 88 | s = (rms_d0 / rms_d1) 89 | P = np.c_[s * np.eye(3).dot(R), t_final] 90 | return P 91 | 92 | 93 | def estimate_pose(vertices): 94 | canonical_vertices = np.load('face/utils/uv_data/canonical_vertices.npy') 95 | P = compute_similarity_transform(vertices, canonical_vertices) 96 | _, R, _ = P2sRt(P) # decompose affine matrix to s, R, t 97 | pose = matrix2angle(R) 98 | 99 | return P, pose 100 | -------------------------------------------------------------------------------- /demo/face/utils/generate_posmap_300WLP.py: -------------------------------------------------------------------------------- 1 | # coding: UTF-8 2 | ''' 3 | Generate uv position map of 300W_LP. 4 | ''' 5 | import argparse 6 | import os 7 | import sys 8 | from time import time 9 | 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import scipy.io as sio 13 | import skimage.transform 14 | from skimage import io 15 | 16 | import face3d 17 | from face3d import mesh 18 | from face3d.morphable_model import MorphabelModel 19 | 20 | sys.path.append('..') 21 | 22 | 23 | def process_uv(uv_coords, uv_h=256, uv_w=256): 24 | uv_coords[:, 0] = uv_coords[:, 0] * (uv_w - 1) 25 | uv_coords[:, 1] = uv_coords[:, 1] * (uv_h - 1) 26 | uv_coords[:, 1] = uv_h - uv_coords[:, 1] - 1 27 | uv_coords = np.hstack((uv_coords, np.zeros((uv_coords.shape[0], 1)))) # add z 28 | return uv_coords 29 | 30 | 31 | def run_posmap_300W_LP(bfm, image_path, mat_path, save_folder, idx=0, uv_h=256, uv_w=256, image_h=256, image_w=256): 32 | # 1. load image and fitted parameters 33 | image_name = image_path.strip().split('/')[-1] 34 | image = io.imread(image_path) / 255. 35 | [h, w, c] = image.shape 36 | 37 | info = sio.loadmat(mat_path) 38 | pose_para = info['Pose_Para'].T.astype(np.float32) 39 | shape_para = info['Shape_Para'].astype(np.float32) 40 | exp_para = info['Exp_Para'].astype(np.float32) 41 | 42 | # 2. generate mesh 43 | # generate shape 44 | vertices = bfm.generate_vertices(shape_para, exp_para) 45 | # transform mesh 46 | s = pose_para[-1, 0] 47 | angles = pose_para[:3, 0] 48 | t = pose_para[3:6, 0] 49 | transformed_vertices = bfm.transform_3ddfa(vertices, s, angles, t) 50 | projected_vertices = transformed_vertices.copy() # using stantard camera & orth projection as in 3DDFA 51 | image_vertices = projected_vertices.copy() 52 | image_vertices[:, 1] = h - image_vertices[:, 1] - 1 53 | 54 | # 3. crop image with key points 55 | kpt = image_vertices[bfm.kpt_ind, :].astype(np.int32) 56 | left = np.min(kpt[:, 0]) 57 | right = np.max(kpt[:, 0]) 58 | top = np.min(kpt[:, 1]) 59 | bottom = np.max(kpt[:, 1]) 60 | center = np.array([right - (right - left) / 2.0, 61 | bottom - (bottom - top) / 2.0]) 62 | old_size = (right - left + bottom - top) / 2 63 | size = int(old_size * 1.5) 64 | # random pertube. you can change the numbers 65 | marg = old_size * 0.1 66 | t_x = np.random.rand() * marg * 2 - marg 67 | t_y = np.random.rand() * marg * 2 - marg 68 | center[0] = center[0] + t_x 69 | center[1] = center[1] + t_y 70 | size = size * (np.random.rand() * 0.2 + 0.9) 71 | 72 | # crop and record the transform parameters 73 | src_pts = np.array([[center[0] - size / 2, center[1] - size / 2], [center[0] - size / 2, center[1] + size / 2], 74 | [center[0] + size / 2, center[1] - size / 2]]) 75 | DST_PTS = np.array([[0, 0], [0, image_h - 1], [image_w - 1, 0]]) 76 | tform = skimage.transform.estimate_transform('similarity', src_pts, DST_PTS) 77 | cropped_image = skimage.transform.warp(image, tform.inverse, output_shape=(image_h, image_w)) 78 | 79 | # transform face position(image vertices) along with 2d facial image 80 | position = image_vertices.copy() 81 | position[:, 2] = 1 82 | position = np.dot(position, tform.params.T) 83 | position[:, 2] = image_vertices[:, 2] * tform.params[0, 0] # scale z 84 | position[:, 2] = position[:, 2] - np.min(position[:, 2]) # translate z 85 | 86 | # 4. uv position map: render position in uv space 87 | uv_position_map = mesh.render.render_colors(uv_coords, bfm.full_triangles, position, uv_h, uv_w, c=3) 88 | 89 | # 5. save files 90 | if not os.path.exists(os.path.join(save_folder, str(idx) + '/')): 91 | os.mkdir(os.path.join(save_folder, str(idx) + '/')) 92 | 93 | io.imsave('{}/{}/{}'.format(save_folder, idx, 'original.jpg'), np.squeeze(cropped_image)) 94 | np.save('{}/{}/{}'.format(save_folder, idx, image_name.replace('jpg', 'npy')), uv_position_map) 95 | io.imsave('{}/{}/{}'.format(save_folder, idx, 'uv_posmap.jpg'), 96 | (uv_position_map) / max(image_h, image_w)) # only for show 97 | 98 | # --verify 99 | # import cv2 100 | # uv_texture_map_rec = cv2.remap(cropped_image, uv_position_map[:,:,:2].astype(np.float32), None, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT,borderValue=(0)) 101 | # io.imsave('{}/{}'.format(save_folder, image_name.replace('.jpg', '_tex.jpg')), np.squeeze(uv_texture_map_rec)) 102 | 103 | 104 | def generate_batch_sample(input_dir, save_folder='./300WLP'): 105 | if not os.path.exists(save_folder): 106 | os.mkdir(save_folder) 107 | # set para 108 | uv_h = uv_w = 256 109 | 110 | # load uv coords 111 | global uv_coords 112 | uv_coords = face3d.morphable_model.load.load_uv_coords('BFM/BFM_UV.mat') # 113 | uv_coords = process_uv(uv_coords, uv_h, uv_w) 114 | 115 | # load bfm 116 | bfm = MorphabelModel('BFM/BFM.mat') 117 | 118 | # Batch generating uv_map Dataset 119 | """ 120 | @date: 2019/07/19 121 | Train Dataset: 122 | AFW. 10413. 123 | HELEN. 75351. 124 | LFPW. 33111. 125 | Test Dataset: 126 | IBUG. 3571. 127 | 128 | """ 129 | base = 0 130 | 131 | for idx, item in enumerate(os.listdir(input_dir)): 132 | if 'jpg' in item: 133 | ab_path = os.path.join(input_dir, item) 134 | img_path = ab_path 135 | mat_path = ab_path.replace('jpg', 'mat') 136 | 137 | run_posmap_300W_LP(bfm, img_path, mat_path, save_folder, idx + base) 138 | print("Number {} uv_pos_map was generated!".format(idx)) 139 | 140 | 141 | if __name__ == '__main__': 142 | parser = argparse.ArgumentParser() 143 | parser.add_argument("--save_dir", help="specify output uv_map directory.") 144 | parser.add_argument("--input_dir", help="specify input origin mat & image directory.") 145 | args = parser.parse_args() 146 | 147 | generate_batch_sample(save_folder=args.save_dir, 148 | input_dir=args.input_dir) 149 | -------------------------------------------------------------------------------- /demo/face/utils/render_app.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .render import vis_of_vertices, render_texture 3 | from scipy import ndimage 4 | 5 | def get_visibility(vertices, triangles, h, w): 6 | triangles = triangles.T 7 | vertices_vis = vis_of_vertices(vertices.T, triangles, h, w) 8 | vertices_vis = vertices_vis.astype(bool) 9 | for k in range(2): 10 | tri_vis = vertices_vis[triangles[0,:]] | vertices_vis[triangles[1,:]] | vertices_vis[triangles[2,:]] 11 | ind = triangles[:, tri_vis] 12 | vertices_vis[ind] = True 13 | # for k in range(2): 14 | # tri_vis = vertices_vis[triangles[0,:]] & vertices_vis[triangles[1,:]] & vertices_vis[triangles[2,:]] 15 | # ind = triangles[:, tri_vis] 16 | # vertices_vis[ind] = True 17 | vertices_vis = vertices_vis.astype(np.float32) #1 for visible and 0 for non-visible 18 | return vertices_vis 19 | 20 | def get_uv_mask(vertices_vis, triangles, uv_coords, h, w, resolution): 21 | triangles = triangles.T 22 | vertices_vis = vertices_vis.astype(np.float32) 23 | uv_mask = render_texture(uv_coords.T, vertices_vis[np.newaxis, :], triangles, resolution, resolution, 1) 24 | uv_mask = np.squeeze(uv_mask > 0) 25 | uv_mask = ndimage.binary_closing(uv_mask) 26 | uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4))) 27 | uv_mask = ndimage.binary_closing(uv_mask) 28 | uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4))) 29 | uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4))) 30 | uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4))) 31 | uv_mask = uv_mask.astype(np.float32) 32 | 33 | return np.squeeze(uv_mask) 34 | 35 | def get_depth_image(vertices, triangles, h, w, isShow = False): 36 | z = vertices[:, 2:] 37 | if isShow: 38 | z = z/max(z) 39 | depth_image = render_texture(vertices.T, z.T, triangles.T, h, w, 1) 40 | return np.squeeze(depth_image) 41 | -------------------------------------------------------------------------------- /demo/face/utils/rotate_vertices.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def frontalize(vertices): 5 | canonical_vertices = np.load('utils/uv_data/canonical_vertices.npy') 6 | 7 | vertices_homo = np.hstack((vertices, np.ones([vertices.shape[0], 1]))) # n x 4 8 | P = np.linalg.lstsq(vertices_homo, canonical_vertices)[0].T # Affine matrix. 3 x 4 9 | front_vertices = vertices_homo.dot(P.T) 10 | 11 | return front_vertices 12 | -------------------------------------------------------------------------------- /demo/face/utils/uv_data/canonical_vertices.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/utils/uv_data/canonical_vertices.npy -------------------------------------------------------------------------------- /demo/face/utils/uv_data/uv_kpt_ind.txt: -------------------------------------------------------------------------------- 1 | 1.500000000000000000e+01 2.200000000000000000e+01 2.600000000000000000e+01 3.200000000000000000e+01 4.500000000000000000e+01 6.700000000000000000e+01 9.100000000000000000e+01 1.120000000000000000e+02 1.280000000000000000e+02 1.430000000000000000e+02 1.640000000000000000e+02 1.880000000000000000e+02 2.100000000000000000e+02 2.230000000000000000e+02 2.290000000000000000e+02 2.330000000000000000e+02 2.400000000000000000e+02 5.800000000000000000e+01 7.100000000000000000e+01 8.500000000000000000e+01 9.700000000000000000e+01 1.060000000000000000e+02 1.490000000000000000e+02 1.580000000000000000e+02 1.700000000000000000e+02 1.840000000000000000e+02 1.970000000000000000e+02 1.280000000000000000e+02 1.280000000000000000e+02 1.280000000000000000e+02 1.280000000000000000e+02 1.170000000000000000e+02 1.220000000000000000e+02 1.280000000000000000e+02 1.330000000000000000e+02 1.380000000000000000e+02 7.800000000000000000e+01 8.600000000000000000e+01 9.500000000000000000e+01 1.020000000000000000e+02 9.600000000000000000e+01 8.700000000000000000e+01 1.530000000000000000e+02 1.600000000000000000e+02 1.690000000000000000e+02 1.770000000000000000e+02 1.680000000000000000e+02 1.590000000000000000e+02 1.080000000000000000e+02 1.160000000000000000e+02 1.240000000000000000e+02 1.280000000000000000e+02 1.310000000000000000e+02 1.390000000000000000e+02 1.460000000000000000e+02 1.370000000000000000e+02 1.320000000000000000e+02 1.280000000000000000e+02 1.230000000000000000e+02 1.180000000000000000e+02 1.100000000000000000e+02 1.220000000000000000e+02 1.280000000000000000e+02 1.330000000000000000e+02 1.450000000000000000e+02 1.320000000000000000e+02 1.280000000000000000e+02 1.230000000000000000e+02 2 | 9.600000000000000000e+01 1.180000000000000000e+02 1.410000000000000000e+02 1.650000000000000000e+02 1.830000000000000000e+02 1.900000000000000000e+02 1.880000000000000000e+02 1.870000000000000000e+02 1.930000000000000000e+02 1.870000000000000000e+02 1.880000000000000000e+02 1.900000000000000000e+02 1.830000000000000000e+02 1.650000000000000000e+02 1.410000000000000000e+02 1.180000000000000000e+02 9.600000000000000000e+01 4.900000000000000000e+01 4.200000000000000000e+01 3.900000000000000000e+01 4.000000000000000000e+01 4.200000000000000000e+01 4.200000000000000000e+01 4.000000000000000000e+01 3.900000000000000000e+01 4.200000000000000000e+01 4.900000000000000000e+01 5.900000000000000000e+01 7.300000000000000000e+01 8.600000000000000000e+01 9.600000000000000000e+01 1.110000000000000000e+02 1.130000000000000000e+02 1.150000000000000000e+02 1.130000000000000000e+02 1.110000000000000000e+02 6.700000000000000000e+01 6.000000000000000000e+01 6.100000000000000000e+01 6.500000000000000000e+01 6.800000000000000000e+01 6.900000000000000000e+01 6.500000000000000000e+01 6.100000000000000000e+01 6.000000000000000000e+01 6.700000000000000000e+01 6.900000000000000000e+01 6.800000000000000000e+01 1.420000000000000000e+02 1.310000000000000000e+02 1.270000000000000000e+02 1.280000000000000000e+02 1.270000000000000000e+02 1.310000000000000000e+02 1.420000000000000000e+02 1.480000000000000000e+02 1.500000000000000000e+02 1.500000000000000000e+02 1.500000000000000000e+02 1.480000000000000000e+02 1.410000000000000000e+02 1.350000000000000000e+02 1.340000000000000000e+02 1.350000000000000000e+02 1.420000000000000000e+02 1.430000000000000000e+02 1.420000000000000000e+02 1.430000000000000000e+02 3 | -------------------------------------------------------------------------------- /demo/face/utils/uv_data/uv_weight_mask_gdh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/utils/uv_data/uv_weight_mask_gdh.png -------------------------------------------------------------------------------- /demo/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/result.png -------------------------------------------------------------------------------- /demo/tensorrt_model.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | 3 | import tensorrt as trt 4 | import torch 5 | 6 | 7 | def torch_dtype_to_trt(dtype): 8 | if dtype == torch.int8: 9 | return trt.int8 10 | elif dtype == torch.int32: 11 | return trt.int32 12 | elif dtype == torch.float16: 13 | return trt.float16 14 | elif dtype == torch.float32: 15 | return trt.float32 16 | else: 17 | raise TypeError('%s is not supported by tensorrt' % dtype) 18 | 19 | 20 | def torch_dtype_from_trt(dtype): 21 | if dtype == trt.int8: 22 | return torch.int8 23 | elif dtype == trt.int32: 24 | return torch.int32 25 | elif dtype == trt.float16: 26 | return torch.float16 27 | elif dtype == trt.float32: 28 | return torch.float32 29 | else: 30 | raise TypeError('%s is not supported by torch' % dtype) 31 | 32 | 33 | def torch_device_to_trt(device): 34 | if device.type == torch.device('cuda').type: 35 | return trt.TensorLocation.DEVICE 36 | elif device.type == torch.device('cpu').type: 37 | return trt.TensorLocation.HOST 38 | else: 39 | return TypeError('%s is not supported by tensorrt' % device) 40 | 41 | 42 | def torch_device_from_trt(device): 43 | if device == trt.TensorLocation.DEVICE: 44 | return torch.device('cuda') 45 | elif device == trt.TensorLocation.HOST: 46 | return torch.device('cpu') 47 | else: 48 | return TypeError('%s is not supported by torch' % device) 49 | 50 | 51 | class TRTModel(object): 52 | 53 | def __init__(self, engine_path, input_names=None, output_names=None, final_shapes=None): 54 | 55 | # load engine 56 | self.logger = trt.Logger() 57 | self.runtime = trt.Runtime(self.logger) 58 | with open(engine_path, 'rb') as f: 59 | self.engine = self.runtime.deserialize_cuda_engine(f.read()) 60 | self.context = self.engine.create_execution_context() 61 | 62 | if input_names is None: 63 | self.input_names = self._trt_input_names() 64 | else: 65 | self.input_names = input_names 66 | 67 | if output_names is None: 68 | self.output_names = self._trt_output_names() 69 | else: 70 | self.output_names = output_names 71 | 72 | self.final_shapes = final_shapes 73 | 74 | def _input_binding_indices(self): 75 | return [i for i in range(self.engine.num_bindings) if self.engine.binding_is_input(i)] 76 | 77 | def _output_binding_indices(self): 78 | return [i for i in range(self.engine.num_bindings) if not self.engine.binding_is_input(i)] 79 | 80 | def _trt_input_names(self): 81 | return [self.engine.get_binding_name(i) for i in self._input_binding_indices()] 82 | 83 | def _trt_output_names(self): 84 | return [self.engine.get_binding_name(i) for i in self._output_binding_indices()] 85 | 86 | def create_output_buffers(self, batch_size): 87 | outputs = [None] * len(self.output_names) 88 | for i, output_name in enumerate(self.output_names): 89 | idx = self.engine.get_binding_index(output_name) 90 | dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx)) 91 | if self.final_shapes is not None: 92 | shape = (batch_size, ) + self.final_shapes[i] 93 | else: 94 | shape = (batch_size, ) + tuple(self.engine.get_binding_shape(idx)) 95 | device = torch_device_from_trt(self.engine.get_location(idx)) 96 | output = torch.empty(size=shape, dtype=dtype, device=device) 97 | outputs[i] = output 98 | return outputs 99 | 100 | def execute(self, *inputs): 101 | batch_size = inputs[0].shape[0] 102 | 103 | bindings = [None] * (len(self.input_names) + len(self.output_names)) 104 | 105 | # map input bindings 106 | inputs_torch = [None] * len(self.input_names) 107 | for i, name in enumerate(self.input_names): 108 | idx = self.engine.get_binding_index(name) 109 | 110 | # convert to appropriate format 111 | inputs_torch[i] = torch.from_numpy(inputs[i]) 112 | inputs_torch[i] = inputs_torch[i].to(torch_device_from_trt(self.engine.get_location(idx))) 113 | inputs_torch[i] = inputs_torch[i].type(torch_dtype_from_trt(self.engine.get_binding_dtype(idx))) 114 | 115 | bindings[idx] = int(inputs_torch[i].data_ptr()) 116 | 117 | output_buffers = self.create_output_buffers(batch_size) 118 | 119 | # map output bindings 120 | for i, name in enumerate(self.output_names): 121 | idx = self.engine.get_binding_index(name) 122 | bindings[idx] = int(output_buffers[i].data_ptr()) 123 | 124 | self.context.execute(batch_size, bindings) 125 | 126 | outputs = [buffer for buffer in output_buffers] 127 | 128 | return outputs 129 | 130 | def __call__(self, *inputs): 131 | return self.execute(*inputs) 132 | -------------------------------------------------------------------------------- /demo/tracking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/tracking/__init__.py -------------------------------------------------------------------------------- /demo/tracking/deep_sort.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | 5 | from .feature_extractor import Extractor 6 | from .sort.detection import Detection 7 | from .sort.nn_matching import NearestNeighborDistanceMetric 8 | from .sort.preprocessing import non_max_suppression 9 | from .sort.tracker import Tracker 10 | 11 | 12 | class DeepSort(object): 13 | def __init__(self, model_path): 14 | self.min_confidence = 0.3 15 | self.nms_max_overlap = 1.0 16 | 17 | self.extractor = Extractor(model_path, use_cuda=True) 18 | 19 | max_cosine_distance = 0.2 20 | nn_budget = 100 21 | metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) 22 | self.tracker = Tracker(metric) 23 | 24 | def update(self, bbox_xywh, confidences, ori_img): 25 | self.height, self.width = ori_img.shape[:2] 26 | 27 | 28 | # generate detections 29 | try : 30 | features = self._get_features(bbox_xywh, ori_img) 31 | except : 32 | print('a') 33 | detections = [Detection(bbox_xywh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence] 34 | 35 | 36 | # run on non-maximum supression 37 | boxes = np.array([d.tlwh for d in detections]) 38 | scores = np.array([d.confidence for d in detections]) 39 | indices = non_max_suppression( boxes, self.nms_max_overlap, scores) 40 | detections = [detections[i] for i in indices] 41 | 42 | 43 | # update tracker 44 | self.tracker.predict() 45 | self.tracker.update(detections) 46 | 47 | 48 | # output bbox identities 49 | outputs = [] 50 | for track in self.tracker.tracks: 51 | if not track.is_confirmed() or track.time_since_update > 1: 52 | continue 53 | box = track.to_tlwh() 54 | x1,y1,x2,y2 = self._xywh_to_xyxy_centernet(box) 55 | track_id = track.track_id 56 | outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int)) 57 | if len(outputs) > 0: 58 | outputs = np.stack(outputs,axis=0) 59 | 60 | 61 | return outputs 62 | 63 | 64 | 65 | # for centernet (x1,x2 w,h -> x1,y1,x2,y2) 66 | def _xywh_to_xyxy_centernet(self, bbox_xywh): 67 | x1,y1,w,h = bbox_xywh 68 | x1 = max(x1,0) 69 | y1 = max(y1,0) 70 | x2 = min(int(x1+w),self.width-1) 71 | y2 = min(int(y1+h),self.height-1) 72 | return int(x1),int(y1),x2,y2 73 | 74 | # for yolo (centerx,centerx, w,h -> x1,y1,x2,y2) 75 | def _xywh_to_xyxy_yolo(self, bbox_xywh): 76 | x,y,w,h = bbox_xywh 77 | x1 = max(int(x-w/2),0) 78 | x2 = min(int(x+w/2),self.width-1) 79 | y1 = max(int(y-h/2),0) 80 | y2 = min(int(y+h/2),self.height-1) 81 | return x1,y1,x2,y2 82 | 83 | def _get_features(self, bbox_xywh, ori_img): 84 | features = [] 85 | for box in bbox_xywh: 86 | x1,y1,x2,y2 = self._xywh_to_xyxy_centernet(box) 87 | im = ori_img[y1:y2,x1:x2] 88 | feature = self.extractor(im)[0] 89 | features.append(feature) 90 | if len(features): 91 | features = np.stack(features, axis=0) 92 | else: 93 | features = np.array([]) 94 | return features 95 | 96 | if __name__ == '__main__': 97 | pass 98 | -------------------------------------------------------------------------------- /demo/tracking/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | import torchvision.transforms as transforms 5 | 6 | from .model import Net 7 | 8 | 9 | class Extractor(object): 10 | def __init__(self, model_path, use_cuda=True): 11 | self.net = Net(reid=True) 12 | self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" 13 | state_dict = torch.load(model_path)['net_dict'] 14 | self.net.load_state_dict(state_dict) 15 | print("Loading weights from {}... Done!".format(model_path)) 16 | self.net.to(self.device) 17 | self.norm = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 18 | 19 | def __call__(self, img): 20 | assert isinstance(img, np.ndarray), "type error" 21 | img = img.astype(np.float)#/255. 22 | img = cv2.resize(img, (64,128)) 23 | img = torch.from_numpy(img).float().permute(2,0,1) 24 | img = self.norm(img).unsqueeze(0) 25 | with torch.no_grad(): 26 | img = img.to(self.device) 27 | feature = self.net(img) 28 | return feature.cpu().numpy() 29 | 30 | 31 | if __name__ == '__main__': 32 | img = cv2.imread("demo.jpg")[:,:,(2,1,0)] 33 | extr = Extractor("checkpoint/ckpt.t7") 34 | feature = extr(img) 35 | print(feature.shape) 36 | -------------------------------------------------------------------------------- /demo/tracking/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class BasicBlock(nn.Module): 7 | def __init__(self, c_in, c_out,is_downsample=False): 8 | super(BasicBlock,self).__init__() 9 | self.is_downsample = is_downsample 10 | if is_downsample: 11 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False) 12 | else: 13 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False) 14 | self.bn1 = nn.BatchNorm2d(c_out) 15 | self.relu = nn.ReLU(True) 16 | self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False) 17 | self.bn2 = nn.BatchNorm2d(c_out) 18 | if is_downsample: 19 | self.downsample = nn.Sequential( 20 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 21 | nn.BatchNorm2d(c_out) 22 | ) 23 | elif c_in != c_out: 24 | self.downsample = nn.Sequential( 25 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 26 | nn.BatchNorm2d(c_out) 27 | ) 28 | self.is_downsample = True 29 | 30 | def forward(self,x): 31 | y = self.conv1(x) 32 | y = self.bn1(y) 33 | y = self.relu(y) 34 | y = self.conv2(y) 35 | y = self.bn2(y) 36 | if self.is_downsample: 37 | x = self.downsample(x) 38 | return F.relu(x.add(y),True) 39 | 40 | def make_layers(c_in,c_out,repeat_times, is_downsample=False): 41 | blocks = [] 42 | for i in range(repeat_times): 43 | if i ==0: 44 | blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),] 45 | else: 46 | blocks += [BasicBlock(c_out,c_out),] 47 | return nn.Sequential(*blocks) 48 | 49 | class Net(nn.Module): 50 | def __init__(self, num_classes=751 ,reid=False): 51 | super(Net,self).__init__() 52 | # 3 128 64 53 | self.conv = nn.Sequential( 54 | nn.Conv2d(3,64,3,stride=1,padding=1), 55 | nn.BatchNorm2d(64), 56 | nn.ReLU(inplace=True), 57 | # nn.Conv2d(32,32,3,stride=1,padding=1), 58 | # nn.BatchNorm2d(32), 59 | # nn.ReLU(inplace=True), 60 | nn.MaxPool2d(3,2,padding=1), 61 | ) 62 | # 32 64 32 63 | self.layer1 = make_layers(64,64,2,False) 64 | # 32 64 32 65 | self.layer2 = make_layers(64,128,2,True) 66 | # 64 32 16 67 | self.layer3 = make_layers(128,256,2,True) 68 | # 128 16 8 69 | self.layer4 = make_layers(256,512,2,True) 70 | # 256 8 4 71 | self.avgpool = nn.AvgPool2d((8,4),1) 72 | # 256 1 1 73 | self.reid = reid 74 | self.classifier = nn.Sequential( 75 | nn.Linear(512, 256), 76 | nn.BatchNorm1d(256), 77 | nn.ReLU(inplace=True), 78 | nn.Dropout(), 79 | nn.Linear(256, num_classes), 80 | ) 81 | 82 | def forward(self, x): 83 | x = self.conv(x) 84 | x = self.layer1(x) 85 | x = self.layer2(x) 86 | x = self.layer3(x) 87 | x = self.layer4(x) 88 | x = self.avgpool(x) 89 | x = x.view(x.size(0),-1) 90 | # B x 128 91 | if self.reid: 92 | x = x.div(x.norm(p=2,dim=1,keepdim=True)) 93 | return x 94 | # classifier 95 | x = self.classifier(x) 96 | return x 97 | 98 | 99 | if __name__ == '__main__': 100 | net = Net() 101 | x = torch.randn(4,3,128,64) 102 | y = net(x) 103 | import ipdb; ipdb.set_trace() 104 | -------------------------------------------------------------------------------- /demo/tracking/sort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/tracking/sort/__init__.py -------------------------------------------------------------------------------- /demo/tracking/sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, confidence, feature): 30 | self.tlwh = np.asarray(tlwh, dtype=np.float) 31 | self.confidence = float(confidence) 32 | self.feature = np.asarray(feature, dtype=np.float32) 33 | 34 | def to_tlbr(self): 35 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 36 | `(top left, bottom right)`. 37 | """ 38 | ret = self.tlwh.copy() 39 | ret[2:] += ret[:2] 40 | return ret 41 | 42 | def to_xyah(self): 43 | """Convert bounding box to format `(center x, center y, aspect ratio, 44 | height)`, where the aspect ratio is `width / height`. 45 | """ 46 | ret = self.tlwh.copy() 47 | ret[:2] += ret[2:] / 2 48 | ret[2] /= ret[3] 49 | return ret 50 | -------------------------------------------------------------------------------- /demo/tracking/sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | 4 | import numpy as np 5 | 6 | from . import linear_assignment 7 | 8 | 9 | def iou(bbox, candidates): 10 | """Computer intersection over union. 11 | 12 | Parameters 13 | ---------- 14 | bbox : ndarray 15 | A bounding box in format `(top left x, top left y, width, height)`. 16 | candidates : ndarray 17 | A matrix of candidate bounding boxes (one per row) in the same format 18 | as `bbox`. 19 | 20 | Returns 21 | ------- 22 | ndarray 23 | The intersection over union in [0, 1] between the `bbox` and each 24 | candidate. A higher score means a larger fraction of the `bbox` is 25 | occluded by the candidate. 26 | 27 | """ 28 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 29 | candidates_tl = candidates[:, :2] 30 | candidates_br = candidates[:, :2] + candidates[:, 2:] 31 | 32 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 33 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 34 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 35 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 36 | wh = np.maximum(0., br - tl) 37 | 38 | area_intersection = wh.prod(axis=1) 39 | area_bbox = bbox[2:].prod() 40 | area_candidates = candidates[:, 2:].prod(axis=1) 41 | return area_intersection / (area_bbox + area_candidates - area_intersection) 42 | 43 | 44 | def iou_cost(tracks, detections, track_indices=None, 45 | detection_indices=None): 46 | """An intersection over union distance metric. 47 | 48 | Parameters 49 | ---------- 50 | tracks : List[deep_sort.track.Track] 51 | A list of tracks. 52 | detections : List[deep_sort.detection.Detection] 53 | A list of detections. 54 | track_indices : Optional[List[int]] 55 | A list of indices to tracks that should be matched. Defaults to 56 | all `tracks`. 57 | detection_indices : Optional[List[int]] 58 | A list of indices to detections that should be matched. Defaults 59 | to all `detections`. 60 | 61 | Returns 62 | ------- 63 | ndarray 64 | Returns a cost matrix of shape 65 | len(track_indices), len(detection_indices) where entry (i, j) is 66 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 67 | 68 | """ 69 | if track_indices is None: 70 | track_indices = np.arange(len(tracks)) 71 | if detection_indices is None: 72 | detection_indices = np.arange(len(detections)) 73 | 74 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 75 | for row, track_idx in enumerate(track_indices): 76 | if tracks[track_idx].time_since_update > 1: 77 | cost_matrix[row, :] = linear_assignment.INFTY_COST 78 | continue 79 | 80 | bbox = tracks[track_idx].to_tlwh() 81 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 82 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 83 | return cost_matrix 84 | -------------------------------------------------------------------------------- /demo/tracking/sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import cv2 3 | import numpy as np 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | return pick 74 | -------------------------------------------------------------------------------- /demo/tracking/sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | 3 | 4 | class TrackState: 5 | """ 6 | Enumeration type for the single target track state. Newly created tracks are 7 | classified as `tentative` until enough evidence has been collected. Then, 8 | the track state is changed to `confirmed`. Tracks that are no longer alive 9 | are classified as `deleted` to mark them for removal from the set of active 10 | tracks. 11 | 12 | """ 13 | 14 | Tentative = 1 15 | Confirmed = 2 16 | Deleted = 3 17 | 18 | 19 | class Track: 20 | """ 21 | A single target track with state space `(x, y, a, h)` and associated 22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 23 | aspect ratio and `h` is the height. 24 | 25 | Parameters 26 | ---------- 27 | mean : ndarray 28 | Mean vector of the initial state distribution. 29 | covariance : ndarray 30 | Covariance matrix of the initial state distribution. 31 | track_id : int 32 | A unique track identifier. 33 | n_init : int 34 | Number of consecutive detections before the track is confirmed. The 35 | track state is set to `Deleted` if a miss occurs within the first 36 | `n_init` frames. 37 | max_age : int 38 | The maximum number of consecutive misses before the track state is 39 | set to `Deleted`. 40 | feature : Optional[ndarray] 41 | Feature vector of the detection this track originates from. If not None, 42 | this feature is added to the `features` cache. 43 | 44 | Attributes 45 | ---------- 46 | mean : ndarray 47 | Mean vector of the initial state distribution. 48 | covariance : ndarray 49 | Covariance matrix of the initial state distribution. 50 | track_id : int 51 | A unique track identifier. 52 | hits : int 53 | Total number of measurement updates. 54 | age : int 55 | Total number of frames since first occurance. 56 | time_since_update : int 57 | Total number of frames since last measurement update. 58 | state : TrackState 59 | The current track state. 60 | features : List[ndarray] 61 | A cache of features. On each measurement update, the associated feature 62 | vector is added to this list. 63 | 64 | """ 65 | 66 | def __init__(self, mean, covariance, track_id, n_init, max_age, 67 | feature=None): 68 | self.mean = mean 69 | self.covariance = covariance 70 | self.track_id = track_id 71 | self.hits = 1 72 | self.age = 1 73 | self.time_since_update = 0 74 | 75 | self.state = TrackState.Tentative 76 | self.features = [] 77 | if feature is not None: 78 | self.features.append(feature) 79 | 80 | self._n_init = n_init 81 | self._max_age = max_age 82 | 83 | def to_tlwh(self): 84 | """Get current position in bounding box format `(top left x, top left y, 85 | width, height)`. 86 | 87 | Returns 88 | ------- 89 | ndarray 90 | The bounding box. 91 | 92 | """ 93 | ret = self.mean[:4].copy() 94 | ret[2] *= ret[3] 95 | ret[:2] -= ret[2:] / 2 96 | return ret 97 | 98 | def to_tlbr(self): 99 | """Get current position in bounding box format `(min x, miny, max x, 100 | max y)`. 101 | 102 | Returns 103 | ------- 104 | ndarray 105 | The bounding box. 106 | 107 | """ 108 | ret = self.to_tlwh() 109 | ret[2:] = ret[:2] + ret[2:] 110 | return ret 111 | 112 | def predict(self, kf): 113 | """Propagate the state distribution to the current time step using a 114 | Kalman filter prediction step. 115 | 116 | Parameters 117 | ---------- 118 | kf : kalman_filter.KalmanFilter 119 | The Kalman filter. 120 | 121 | """ 122 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 123 | self.age += 1 124 | self.time_since_update += 1 125 | 126 | def update(self, kf, detection): 127 | """Perform Kalman filter measurement update step and update the feature 128 | cache. 129 | 130 | Parameters 131 | ---------- 132 | kf : kalman_filter.KalmanFilter 133 | The Kalman filter. 134 | detection : Detection 135 | The associated detection. 136 | 137 | """ 138 | self.mean, self.covariance = kf.update( 139 | self.mean, self.covariance, detection.to_xyah()) 140 | self.features.append(detection.feature) 141 | 142 | self.hits += 1 143 | self.time_since_update = 0 144 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 145 | self.state = TrackState.Confirmed 146 | 147 | def mark_missed(self): 148 | """Mark this track as missed (no association at the current time step). 149 | """ 150 | if self.state == TrackState.Tentative: 151 | self.state = TrackState.Deleted 152 | elif self.time_since_update > self._max_age: 153 | self.state = TrackState.Deleted 154 | 155 | def is_tentative(self): 156 | """Returns True if this track is tentative (unconfirmed). 157 | """ 158 | return self.state == TrackState.Tentative 159 | 160 | def is_confirmed(self): 161 | """Returns True if this track is confirmed.""" 162 | return self.state == TrackState.Confirmed 163 | 164 | def is_deleted(self): 165 | """Returns True if this track is dead and should be deleted.""" 166 | return self.state == TrackState.Deleted 167 | -------------------------------------------------------------------------------- /demo/tracking/sort/tracker.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | 4 | import numpy as np 5 | 6 | from . import iou_matching, kalman_filter, linear_assignment 7 | from .track import Track 8 | 9 | 10 | class Tracker: 11 | """ 12 | This is the multi-target tracker. 13 | 14 | Parameters 15 | ---------- 16 | metric : nn_matching.NearestNeighborDistanceMetric 17 | A distance metric for measurement-to-track association. 18 | max_age : int 19 | Maximum number of missed misses before a track is deleted. 20 | n_init : int 21 | Number of consecutive detections before the track is confirmed. The 22 | track state is set to `Deleted` if a miss occurs within the first 23 | `n_init` frames. 24 | 25 | Attributes 26 | ---------- 27 | metric : nn_matching.NearestNeighborDistanceMetric 28 | The distance metric used for measurement to track association. 29 | max_age : int 30 | Maximum number of missed misses before a track is deleted. 31 | n_init : int 32 | Number of frames that a track remains in initialization phase. 33 | kf : kalman_filter.KalmanFilter 34 | A Kalman filter to filter target trajectories in image space. 35 | tracks : List[Track] 36 | The list of active tracks at the current time step. 37 | 38 | """ 39 | 40 | def __init__(self, metric, max_iou_distance=0.7, max_age=30, n_init=3): 41 | self.metric = metric 42 | self.max_iou_distance = max_iou_distance 43 | self.max_age = max_age 44 | self.n_init = n_init 45 | 46 | self.kf = kalman_filter.KalmanFilter() 47 | self.tracks = [] 48 | self._next_id = 1 49 | 50 | def predict(self): 51 | """Propagate track state distributions one time step forward. 52 | 53 | This function should be called once every time step, before `update`. 54 | """ 55 | for track in self.tracks: 56 | track.predict(self.kf) 57 | 58 | def update(self, detections): 59 | """Perform measurement update and track management. 60 | 61 | Parameters 62 | ---------- 63 | detections : List[deep_sort.detection.Detection] 64 | A list of detections at the current time step. 65 | 66 | """ 67 | # Run matching cascade. 68 | matches, unmatched_tracks, unmatched_detections = \ 69 | self._match(detections) 70 | 71 | # Update track set. 72 | for track_idx, detection_idx in matches: 73 | self.tracks[track_idx].update( 74 | self.kf, detections[detection_idx]) 75 | for track_idx in unmatched_tracks: 76 | self.tracks[track_idx].mark_missed() 77 | for detection_idx in unmatched_detections: 78 | self._initiate_track(detections[detection_idx]) 79 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 80 | 81 | # Update distance metric. 82 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 83 | features, targets = [], [] 84 | for track in self.tracks: 85 | if not track.is_confirmed(): 86 | continue 87 | features += track.features 88 | targets += [track.track_id for _ in track.features] 89 | track.features = [] 90 | self.metric.partial_fit( 91 | np.asarray(features), np.asarray(targets), active_targets) 92 | 93 | def _match(self, detections): 94 | 95 | def gated_metric(tracks, dets, track_indices, detection_indices): 96 | features = np.array([dets[i].feature for i in detection_indices]) 97 | targets = np.array([tracks[i].track_id for i in track_indices]) 98 | cost_matrix = self.metric.distance(features, targets) 99 | cost_matrix = linear_assignment.gate_cost_matrix( 100 | self.kf, cost_matrix, tracks, dets, track_indices, 101 | detection_indices) 102 | 103 | return cost_matrix 104 | 105 | # Split track set into confirmed and unconfirmed tracks. 106 | confirmed_tracks = [ 107 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 108 | unconfirmed_tracks = [ 109 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 110 | 111 | # Associate confirmed tracks using appearance features. 112 | matches_a, unmatched_tracks_a, unmatched_detections = \ 113 | linear_assignment.matching_cascade( 114 | gated_metric, self.metric.matching_threshold, self.max_age, 115 | self.tracks, detections, confirmed_tracks) 116 | 117 | # Associate remaining tracks together with unconfirmed tracks using IOU. 118 | iou_track_candidates = unconfirmed_tracks + [ 119 | k for k in unmatched_tracks_a if 120 | self.tracks[k].time_since_update == 1] 121 | unmatched_tracks_a = [ 122 | k for k in unmatched_tracks_a if 123 | self.tracks[k].time_since_update != 1] 124 | matches_b, unmatched_tracks_b, unmatched_detections = \ 125 | linear_assignment.min_cost_matching( 126 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 127 | detections, iou_track_candidates, unmatched_detections) 128 | 129 | matches = matches_a + matches_b 130 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 131 | return matches, unmatched_tracks, unmatched_detections 132 | 133 | def _initiate_track(self, detection): 134 | mean, covariance = self.kf.initiate(detection.to_xyah()) 135 | self.tracks.append(Track( 136 | mean, covariance, self._next_id, self.n_init, self.max_age, 137 | detection.feature)) 138 | self._next_id += 1 139 | -------------------------------------------------------------------------------- /demo/tracking/util.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | COLORS_10 =[(144,238,144),(178, 34, 34),(221,160,221),( 0,255, 0),( 0,128, 0),(210,105, 30),(220, 20, 60), 5 | (192,192,192),(255,228,196),( 50,205, 50),(139, 0,139),(100,149,237),(138, 43,226),(238,130,238), 6 | (255, 0,255),( 0,100, 0),(127,255, 0),(255, 0,255),( 0, 0,205),(255,140, 0),(255,239,213), 7 | (199, 21,133),(124,252, 0),(147,112,219),(106, 90,205),(176,196,222),( 65,105,225),(173,255, 47), 8 | (255, 20,147),(219,112,147),(186, 85,211),(199, 21,133),(148, 0,211),(255, 99, 71),(144,238,144), 9 | (255,255, 0),(230,230,250),( 0, 0,255),(128,128, 0),(189,183,107),(255,255,224),(128,128,128), 10 | (105,105,105),( 64,224,208),(205,133, 63),( 0,128,128),( 72,209,204),(139, 69, 19),(255,245,238), 11 | (250,240,230),(152,251,152),( 0,255,255),(135,206,235),( 0,191,255),(176,224,230),( 0,250,154), 12 | (245,255,250),(240,230,140),(245,222,179),( 0,139,139),(143,188,143),(255, 0, 0),(240,128,128), 13 | (102,205,170),( 60,179,113),( 46,139, 87),(165, 42, 42),(178, 34, 34),(175,238,238),(255,248,220), 14 | (218,165, 32),(255,250,240),(253,245,230),(244,164, 96),(210,105, 30)] 15 | 16 | 17 | def draw_bbox(img, box, cls_name, identity=None, offset=(0,0)): 18 | ''' 19 | draw box of an id 20 | ''' 21 | x1,y1,x2,y2 = [int(i+offset[idx%2]) for idx,i in enumerate(box)] 22 | # set color and label text 23 | color = COLORS_10[identity%len(COLORS_10)] if identity is not None else COLORS_10[0] 24 | label = '{} {}'.format(cls_name, identity) 25 | # box text and bar 26 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0] 27 | cv2.rectangle(img,(x1, y1),(x2,y2),color,2) 28 | cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 29 | cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 1) 30 | return img 31 | 32 | 33 | def draw_bboxes(img, bbox, identities=None, offset=(0,0)): 34 | for i,box in enumerate(bbox): 35 | x1,y1,x2,y2 = [int(i) for i in box] 36 | x1 += offset[0] 37 | x2 += offset[0] 38 | y1 += offset[1] 39 | y2 += offset[1] 40 | # box text and bar 41 | id = int(identities[i]) if identities is not None else 0 42 | color = COLORS_10[id%len(COLORS_10)] 43 | label = '{} {}'.format("object", id) 44 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0] 45 | cv2.rectangle(img,(x1, y1),(x2,y2),color,3) 46 | cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 47 | cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) 48 | return img 49 | 50 | def softmax(x): 51 | assert isinstance(x, np.ndarray), "expect x be a numpy array" 52 | x_exp = np.exp(x*5) 53 | return x_exp/x_exp.sum() 54 | 55 | def softmin(x): 56 | assert isinstance(x, np.ndarray), "expect x be a numpy array" 57 | x_exp = np.exp(-x) 58 | return x_exp/x_exp.sum() 59 | 60 | 61 | 62 | if __name__ == '__main__': 63 | x = np.arange(10)/10. 64 | x = np.array([0.5,0.5,0.5,0.6,1.]) 65 | y = softmax(x) 66 | z = softmin(x) 67 | import ipdb; ipdb.set_trace() 68 | -------------------------------------------------------------------------------- /experiments/darknet53_512x512.yaml: -------------------------------------------------------------------------------- 1 | SAMPLE_METHOD: 'coco_hp' 2 | DATA_DIR: '/home/tensorboy/data' 3 | EXP_ID: 'darknet53' 4 | DEBUG: 0 5 | DEBUG_THEME: 'white' 6 | SEED: 317 7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/darknet53' 8 | LOG_DIR: '' 9 | EXPERIMENT_NAME: '' 10 | GPUS: [0, 1, 2, 3] 11 | WORKERS: 4 12 | PRINT_FREQ: 0 13 | PIN_MEMORY: true 14 | RANK: 0 15 | SAVE_RESULTS: true 16 | TASK : 'multi_pose' 17 | 18 | CUDNN: 19 | BENCHMARK: true 20 | 21 | MODEL: 22 | 23 | CENTER_THRESH: 0.1 24 | NUM_CLASSES: 1 25 | NAME: 'darknet' 26 | HEADS_NAME: 'keypoint' 27 | HEADS_NUM: [1, 2, 34, 2, 17, 2] 28 | INTERMEDIATE_CHANNEL: 256 29 | HEAD_CONV: 256 30 | DOWN_RATIO: 4 31 | NUM_STACKS: 1 32 | INPUT_RES: 512 33 | OUTPUT_RES: 128 34 | INPUT_H: 512 35 | INPUT_W: 512 36 | PAD: 31 37 | NUM_KEYPOINTS: 17 38 | TAG_PER_JOINT: true 39 | TARGET_TYPE: 'gaussian' 40 | SIGMA: 2 41 | 42 | LOSS: 43 | METRIC: 'loss' 44 | MSE_LOSS: false 45 | REG_LOSS: 'l1' 46 | USE_OHKM: false 47 | TOPK: 8 48 | USE_TARGET_WEIGHT: true 49 | USE_DIFFERENT_JOINTS_WEIGHT: false 50 | HP_WEIGHT: 1. 51 | HM_HP_WEIGHT: 1. 52 | DENSE_HP: false 53 | HM_HP: true 54 | REG_BBOX: true 55 | WH_WEIGHT: 0.1 56 | REG_OFFSET: true 57 | OFF_WEIGHT: 1. 58 | REG_HP_OFFSET: true 59 | 60 | DATASET: 61 | DATASET: 'coco_hp' 62 | TRAIN_SET: 'train' 63 | TEST_SET: 'valid' 64 | TRAIN_IMAGE_DIR: 'images/train2017' 65 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json'] 66 | VAL_IMAGE_DIR: 'images/val2017' 67 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json' 68 | 69 | # training data augmentation 70 | MEAN: [0.408, 0.447, 0.470] 71 | STD: [0.289, 0.274, 0.278] 72 | SHIFT: 0.1 73 | SCALE: 0.4 74 | ROTATE: 0. 75 | # for pose 76 | AUG_ROT: 0. 77 | FLIP: 0.5 78 | NO_COLOR_AUG: false 79 | 80 | ROT_FACTOR: 30 81 | SCALE_MIN: 0.5 82 | SCALE_MAX: 1.1 83 | IMAGE_SIZE: 512 84 | RANDOM_CROP: true 85 | 86 | TRAIN: 87 | OPTIMIZER: 'adam' 88 | DISTRIBUTE: true 89 | LOCAL_RANK: 0 90 | HIDE_DATA_TIME: false 91 | SAVE_ALL_MODEL: false 92 | RESUME: false 93 | LR_FACTOR: 0.1 94 | LR_STEP: [270, 300] 95 | EPOCHS: 320 96 | NUM_ITERS: -1 97 | LR: 1.875e-4 98 | BATCH_SIZE: 48 99 | MASTER_BATCH_SIZE: 12 100 | 101 | MOMENTUM: 0.9 102 | WD: 0.0001 103 | NESTEROV: false 104 | GAMMA1: 0.99 105 | GAMMA2: 0.0 106 | 107 | # 'apply and reset gradients every n batches' 108 | STRIDE_APPLY: 1 109 | 110 | CHECKPOINT: '' 111 | SHUFFLE: true 112 | VAL_INTERVALS: 1 113 | TRAINVAL: false 114 | 115 | TEST: 116 | # Test Model Epoch 117 | MODEL_PATH: '/home/tensorboy/data/centerpose/darknet53/model_best.pth' 118 | TASK: 'multi_pose' 119 | FLIP_TEST: true 120 | 121 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg' 122 | MODEL_FILE: '' 123 | TEST_SCALES: [1] 124 | IMAGE_THRE: 0.1 125 | TOPK: 100 126 | NMS: false 127 | NMS_THRE: 0.5 128 | NOT_PREFETCH_TEST: false 129 | FIX_RES: true 130 | 131 | SOFT_NMS: false 132 | OKS_THRE: 0.5 133 | VIS_THRESH: 0.3 134 | KEYPOINT_THRESH: 0.2 135 | NUM_MIN_KPT: 4 136 | THRESH_HUMAN: 0.5 137 | 138 | EVAL_ORACLE_HM: false 139 | EVAL_ORACLE_WH: false 140 | EVAL_ORACLE_OFFSET: false 141 | EVAL_ORACLE_KPS: false 142 | EVAL_ORACLE_HMHP: false 143 | EVAL_ORACLE_HP_OFFSET: false 144 | EVAL_ORACLE_DEP: false 145 | -------------------------------------------------------------------------------- /experiments/dla_34_512x512.yaml: -------------------------------------------------------------------------------- 1 | SAMPLE_METHOD: 'coco_hp' 2 | DATA_DIR: '/data' 3 | EXP_ID: 'sgd_lr6e3' 4 | DEBUG: 0 5 | DEBUG_THEME: 'white' 6 | SEED: 317 7 | OUTPUT_DIR: '/data/centerpose/dla34_lr6e3' 8 | LOG_DIR: '' 9 | EXPERIMENT_NAME: '' 10 | GPUS: [0, 1, 2, 3] 11 | WORKERS: 8 12 | PRINT_FREQ: 0 13 | PIN_MEMORY: true 14 | RANK: 0 15 | SAVE_RESULTS: true 16 | 17 | CUDNN: 18 | BENCHMARK: true 19 | 20 | MODEL: 21 | INIT_WEIGHTS: false 22 | PRETRAINED: '' 23 | CENTER_THRESH: 0.1 24 | NUM_CLASSES: 1 25 | NAME: 'dla_34' 26 | HEADS_NAME: 'keypoint' 27 | HEADS_NUM: [1, 2, 34, 2, 17, 2] 28 | HEAD_CONV: 256 29 | INTERMEDIATE_CHANNEL: 64 30 | DOWN_RATIO: 4 31 | NUM_STACKS: 1 32 | INPUT_RES: 512 33 | OUTPUT_RES: 128 34 | INPUT_H: 512 35 | INPUT_W: 512 36 | PAD: 31 37 | NUM_KEYPOINTS: 17 38 | TAG_PER_JOINT: true 39 | TARGET_TYPE: 'gaussian' 40 | SIGMA: 2 41 | 42 | LOSS: 43 | METRIC: 'loss' 44 | MSE_LOSS: false 45 | REG_LOSS: 'l1' 46 | USE_OHKM: false 47 | TOPK: 8 48 | USE_TARGET_WEIGHT: true 49 | USE_DIFFERENT_JOINTS_WEIGHT: false 50 | HP_WEIGHT: 1. 51 | HM_HP_WEIGHT: 1. 52 | DENSE_HP: false 53 | HM_HP: true 54 | REG_BBOX: true 55 | WH_WEIGHT: 0.1 56 | REG_OFFSET: true 57 | OFF_WEIGHT: 1. 58 | REG_HP_OFFSET: true 59 | HM_HP_WEIGHT: 1. 60 | 61 | DATASET: 62 | DATASET: 'coco_hp' 63 | TRAIN_SET: 'train' 64 | TEST_SET: 'valid' 65 | TRAIN_IMAGE_DIR: 'images/train2017' 66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json'] 67 | VAL_IMAGE_DIR: 'images/val2017' 68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json' 69 | 70 | # training data augmentation 71 | MEAN: [0.408, 0.447, 0.470] 72 | STD: [0.289, 0.274, 0.278] 73 | SHIFT: 0.1 74 | SCALE: 0.4 75 | ROTATE: 0. 76 | # for pose 77 | AUG_ROT: 0. 78 | FLIP: 0.5 79 | NO_COLOR_AUG: false 80 | 81 | ROT_FACTOR: 30 82 | SCALE_MIN: 0.5 83 | SCALE_MAX: 1.1 84 | IMAGE_SIZE: 512 85 | RANDOM_CROP: true 86 | 87 | TRAIN: 88 | DISTRIBUTE: true 89 | OPTIMIZER: 'adam' 90 | LOCAL_RANK: 0 91 | HIDE_DATA_TIME: false 92 | SAVE_ALL_MODEL: false 93 | RESUME: false 94 | LR_FACTOR: 0.1 95 | LR_STEP: [270, 300] 96 | EPOCHS: 320 97 | NUM_ITERS: -1 98 | LR: 2.8125e-3 99 | BATCH_SIZE: 72 100 | MASTER_BATCH_SIZE: 18 101 | 102 | MOMENTUM: 0.9 103 | WD: 0.0001 104 | NESTEROV: false 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | 108 | # 'apply and reset gradients every n batches' 109 | STRIDE_APPLY: 1 110 | CHECKPOINT: '' 111 | SHUFFLE: true 112 | VAL_INTERVALS: 1 113 | TRAINVAL: false 114 | 115 | TEST: 116 | # Test Model Epoch 117 | MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/dla34_best.pth' 118 | TASK: 'multi_pose' 119 | FLIP_TEST: true 120 | 121 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg' 122 | MODEL_FILE: '' 123 | TEST_SCALES: [1] 124 | IMAGE_THRE: 0.1 125 | TOPK: 100 126 | NMS: true 127 | NMS_THRE: 0.5 128 | NOT_PREFETCH_TEST: false 129 | FIX_RES: false 130 | 131 | SOFT_NMS: false 132 | OKS_THRE: 0.5 133 | VIS_THRESH: 0.3 134 | KEYPOINT_THRESH: 0.2 135 | NUM_MIN_KPT: 4 136 | THRESH_HUMAN: 0.4 137 | 138 | EVAL_ORACLE_HM: false 139 | EVAL_ORACLE_WH: false 140 | EVAL_ORACLE_OFFSET: false 141 | EVAL_ORACLE_KPS: false 142 | EVAL_ORACLE_HMHP: false 143 | EVAL_ORACLE_HP_OFFSET: false 144 | EVAL_ORACLE_DEP: false 145 | -------------------------------------------------------------------------------- /experiments/efficientdet_512x512.yaml: -------------------------------------------------------------------------------- 1 | SAMPLE_METHOD: 'coco_hp' 2 | DATA_DIR: '/home/tensorboy/data' 3 | EXP_ID: 'coco_pose_efficientdet' 4 | DEBUG: 0 5 | DEBUG_THEME: 'white' 6 | SEED: 317 7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/efficientdet' 8 | LOG_DIR: '' 9 | EXPERIMENT_NAME: '' 10 | GPUS: [0, 1, 2, 3] 11 | WORKERS: 8 12 | PRINT_FREQ: 0 13 | PIN_MEMORY: true 14 | RANK: 0 15 | SAVE_RESULTS: true 16 | TASK : 'multi_pose' 17 | 18 | CUDNN: 19 | BENCHMARK: true 20 | 21 | MODEL: 22 | INIT_WEIGHTS: false 23 | PRETRAINED: '' 24 | CENTER_THRESH: 0.1 25 | NUM_CLASSES: 1 26 | NAME: 'efficientdet' 27 | HEADS_NAME: 'keypoint' 28 | HEADS_NUM: [1, 2, 34, 2, 17, 2] 29 | HEAD_CONV: 64 30 | INTERMEDIATE_CHANNEL: 172 31 | DOWN_RATIO: 4 32 | NUM_STACKS: 1 33 | INPUT_RES: 512 34 | OUTPUT_RES: 128 35 | INPUT_H: 512 36 | INPUT_W: 512 37 | PAD: 31 38 | NUM_KEYPOINTS: 17 39 | TAG_PER_JOINT: true 40 | TARGET_TYPE: 'gaussian' 41 | SIGMA: 2 42 | 43 | LOSS: 44 | METRIC: 'loss' 45 | MSE_LOSS: false 46 | REG_LOSS: 'l1' 47 | USE_OHKM: false 48 | TOPK: 8 49 | USE_TARGET_WEIGHT: true 50 | USE_DIFFERENT_JOINTS_WEIGHT: false 51 | HP_WEIGHT: 1. 52 | HM_HP_WEIGHT: 1. 53 | DENSE_HP: false 54 | HM_HP: true 55 | REG_BBOX: true 56 | WH_WEIGHT: 0.1 57 | REG_OFFSET: true 58 | OFF_WEIGHT: 1. 59 | REG_HP_OFFSET: true 60 | 61 | DATASET: 62 | DATASET: 'coco_hp' 63 | TRAIN_SET: 'train' 64 | TEST_SET: 'valid' 65 | TRAIN_IMAGE_DIR: 'images/train2017' 66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json'] 67 | VAL_IMAGE_DIR: 'images/val2017' 68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json' 69 | 70 | # training data augmentation 71 | MEAN: [0.408, 0.447, 0.470] 72 | STD: [0.289, 0.274, 0.278] 73 | SHIFT: 0.1 74 | SCALE: 0.4 75 | ROTATE: 0. 76 | # for pose 77 | AUG_ROT: 0. 78 | FLIP: 0.5 79 | NO_COLOR_AUG: false 80 | 81 | ROT_FACTOR: 30 82 | SCALE_MIN: 0.5 83 | SCALE_MAX: 1.1 84 | IMAGE_SIZE: 512 85 | RANDOM_CROP: true 86 | 87 | TRAIN: 88 | DISTRIBUTE: true 89 | OPTIMIZER: 'adam' 90 | LOCAL_RANK: 0 91 | HIDE_DATA_TIME: false 92 | SAVE_ALL_MODEL: false 93 | RESUME: false 94 | LR_FACTOR: 0.1 95 | LR_STEP: [270, 300] 96 | EPOCHS: 320 97 | NUM_ITERS: -1 98 | LR: 2.1875e-4 99 | BATCH_SIZE: 56 100 | MASTER_BATCH_SIZE: 14 101 | 102 | MOMENTUM: 0.9 103 | WD: 0.0001 104 | NESTEROV: false 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | 108 | # 'apply and reset gradients every n batches' 109 | STRIDE_APPLY: 1 110 | CHECKPOINT: '' 111 | SHUFFLE: true 112 | VAL_INTERVALS: 1 113 | TRAINVAL: false 114 | 115 | TEST: 116 | # Test Model Epoch 117 | MODEL_PATH: '/home/tensorboy/data/centerpose/efficientdet/model_best.pth' 118 | TASK: 'multi_pose' 119 | FLIP_TEST: false 120 | 121 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg' 122 | MODEL_FILE: '' 123 | TEST_SCALES: [1] 124 | IMAGE_THRE: 0.1 125 | TOPK: 32 126 | NMS: false 127 | NMS_THRE: 0.5 128 | NOT_PREFETCH_TEST: false 129 | FIX_RES: true 130 | 131 | SOFT_NMS: false 132 | OKS_THRE: 0.5 133 | VIS_THRESH: 0.3 134 | KEYPOINT_THRESH: 0.2 135 | NUM_MIN_KPT: 4 136 | THRESH_HUMAN: 0.5 137 | 138 | EVAL_ORACLE_HM: false 139 | EVAL_ORACLE_WH: false 140 | EVAL_ORACLE_OFFSET: false 141 | EVAL_ORACLE_KPS: false 142 | EVAL_ORACLE_HMHP: false 143 | EVAL_ORACLE_HP_OFFSET: false 144 | EVAL_ORACLE_DEP: false 145 | -------------------------------------------------------------------------------- /experiments/ghost_net.yaml: -------------------------------------------------------------------------------- 1 | SAMPLE_METHOD: 'coco_hp' 2 | DATA_DIR: '/data' 3 | EXP_ID: 'ghostnet' 4 | DEBUG: 0 5 | DEBUG_THEME: 'white' 6 | SEED: 317 7 | OUTPUT_DIR: '/data/centerpose/ghostnet' 8 | LOG_DIR: '' 9 | EXPERIMENT_NAME: '' 10 | GPUS: [0, 1, 2, 3] 11 | WORKERS: 8 12 | PRINT_FREQ: 0 13 | PIN_MEMORY: true 14 | RANK: 0 15 | SAVE_RESULTS: true 16 | 17 | CUDNN: 18 | BENCHMARK: true 19 | 20 | MODEL: 21 | INIT_WEIGHTS: false 22 | PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth' 23 | CENTER_THRESH: 0.1 24 | NUM_CLASSES: 1 25 | NAME: 'ghostnet' 26 | HEADS_NAME: 'keypoint' 27 | HEADS_NUM: [1, 2, 34, 2, 17, 2] 28 | HEAD_CONV: 256 29 | INTERMEDIATE_CHANNEL: 160 30 | DOWN_RATIO: 4 31 | NUM_STACKS: 1 32 | INPUT_RES: 512 33 | OUTPUT_RES: 128 34 | INPUT_H: 512 35 | INPUT_W: 512 36 | PAD: 31 37 | NUM_KEYPOINTS: 17 38 | TAG_PER_JOINT: true 39 | TARGET_TYPE: 'gaussian' 40 | SIGMA: 2 41 | 42 | LOSS: 43 | METRIC: 'loss' 44 | MSE_LOSS: false 45 | REG_LOSS: 'l1' 46 | USE_OHKM: false 47 | TOPK: 8 48 | USE_TARGET_WEIGHT: true 49 | USE_DIFFERENT_JOINTS_WEIGHT: false 50 | HP_WEIGHT: 1. 51 | HM_HP_WEIGHT: 1. 52 | DENSE_HP: false 53 | HM_HP: true 54 | REG_BBOX: true 55 | WH_WEIGHT: 0.1 56 | REG_OFFSET: true 57 | OFF_WEIGHT: 1. 58 | REG_HP_OFFSET: true 59 | HM_HP_WEIGHT: 1. 60 | 61 | DATASET: 62 | DATASET: 'coco_hp' 63 | TRAIN_SET: 'train' 64 | TEST_SET: 'valid' 65 | TRAIN_IMAGE_DIR: 'images/train2017' 66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json'] 67 | VAL_IMAGE_DIR: 'images/val2017' 68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json' 69 | 70 | # training data augmentation 71 | MEAN: [0.408, 0.447, 0.470] 72 | STD: [0.289, 0.274, 0.278] 73 | SHIFT: 0.1 74 | SCALE: 0.4 75 | ROTATE: 0. 76 | # for pose 77 | AUG_ROT: 0. 78 | FLIP: 0.5 79 | NO_COLOR_AUG: false 80 | 81 | ROT_FACTOR: 30 82 | SCALE_MIN: 0.5 83 | SCALE_MAX: 1.1 84 | IMAGE_SIZE: 512 85 | RANDOM_CROP: true 86 | 87 | TRAIN: 88 | OPTIMIZER: 'adam' 89 | DISTRIBUTE: true 90 | LOCAL_RANK: 0 91 | HIDE_DATA_TIME: false 92 | SAVE_ALL_MODEL: false 93 | RESUME: false 94 | LR_FACTOR: 0.1 95 | LR_STEP: [270, 300] 96 | EPOCHS: 320 97 | NUM_ITERS: -1 98 | LR: 2.5e-4 99 | BATCH_SIZE: 64 100 | MASTER_BATCH_SIZE: 16 101 | 102 | MOMENTUM: 0.9 103 | WD: 0.0001 104 | NESTEROV: false 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | 108 | # 'apply and reset gradients every n batches' 109 | STRIDE_APPLY: 1 110 | 111 | CHECKPOINT: '' 112 | SHUFFLE: true 113 | VAL_INTERVALS: 1 114 | TRAINVAL: false 115 | 116 | TEST: 117 | # Test Model Epoch 118 | MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/mobilenetV3_1x.pth' 119 | TASK: 'multi_pose' 120 | FLIP_TEST: true 121 | 122 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg' 123 | MODEL_FILE: '' 124 | TEST_SCALES: [1] 125 | IMAGE_THRE: 0.1 126 | TOPK: 100 127 | NMS: false 128 | NMS_THRE: 0.5 129 | NOT_PREFETCH_TEST: false 130 | FIX_RES: false 131 | KEEP_RES: false 132 | 133 | SOFT_NMS: false 134 | OKS_THRE: 0.5 135 | VIS_THRESH: 0.3 136 | KEYPOINT_THRESH: 0.2 137 | NUM_MIN_KPT: 4 138 | THRESH_HUMAN: 0.4 139 | 140 | EVAL_ORACLE_HM: false 141 | EVAL_ORACLE_WH: false 142 | EVAL_ORACLE_OFFSET: false 143 | EVAL_ORACLE_KPS: false 144 | EVAL_ORACLE_HMHP: false 145 | EVAL_ORACLE_HP_OFFSET: false 146 | EVAL_ORACLE_DEP: false 147 | -------------------------------------------------------------------------------- /experiments/hardnet_512x512.yaml: -------------------------------------------------------------------------------- 1 | SAMPLE_METHOD: 'coco_hp' 2 | DATA_DIR: '/data' 3 | EXP_ID: 'hardnet' 4 | DEBUG: 0 5 | DEBUG_THEME: 'white' 6 | SEED: 317 7 | OUTPUT_DIR: '/data/centerpose/hardnet' 8 | LOG_DIR: '' 9 | EXPERIMENT_NAME: '' 10 | GPUS: [0, 1, 2, 3] 11 | WORKERS: 8 12 | PRINT_FREQ: 0 13 | PIN_MEMORY: true 14 | RANK: 0 15 | SAVE_RESULTS: true 16 | 17 | CUDNN: 18 | BENCHMARK: true 19 | 20 | MODEL: 21 | INIT_WEIGHTS: true 22 | PRETRAINED: '/data/pretrained_models/imagenet/hardnet_petite_base.pth' 23 | CENTER_THRESH: 0.1 24 | INTERMEDIATE_CHANNEL: 48 25 | NUM_CLASSES: 1 26 | NAME: 'hardnet' 27 | HEADS_NAME: 'keypoint' 28 | HEADS_NUM: [1, 2, 34, 2, 17, 2] 29 | HEAD_CONV: 256 30 | DOWN_RATIO: 4 31 | NUM_STACKS: 1 32 | INPUT_RES: 512 33 | OUTPUT_RES: 128 34 | INPUT_H: 512 35 | INPUT_W: 512 36 | PAD: 31 37 | NUM_KEYPOINTS: 17 38 | TAG_PER_JOINT: true 39 | TARGET_TYPE: 'gaussian' 40 | SIGMA: 2 41 | 42 | LOSS: 43 | METRIC: 'loss' 44 | MSE_LOSS: false 45 | REG_LOSS: 'l1' 46 | USE_OHKM: false 47 | TOPK: 8 48 | USE_TARGET_WEIGHT: true 49 | USE_DIFFERENT_JOINTS_WEIGHT: false 50 | HP_WEIGHT: 1. 51 | HM_HP_WEIGHT: 1. 52 | DENSE_HP: false 53 | HM_HP: true 54 | REG_BBOX: true 55 | WH_WEIGHT: 0.1 56 | REG_OFFSET: true 57 | OFF_WEIGHT: 1. 58 | REG_HP_OFFSET: true 59 | HM_HP_WEIGHT: 1. 60 | 61 | DATASET: 62 | DATASET: 'coco_hp' 63 | TRAIN_SET: 'train' 64 | TEST_SET: 'valid' 65 | TRAIN_IMAGE_DIR: 'images/train2017' 66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json'] 67 | VAL_IMAGE_DIR: 'images/val2017' 68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json' 69 | 70 | # training data augmentation 71 | MEAN: [0.408, 0.447, 0.470] 72 | STD: [0.289, 0.274, 0.278] 73 | SHIFT: 0.1 74 | SCALE: 0.4 75 | ROTATE: 0. 76 | # for pose 77 | AUG_ROT: 0. 78 | FLIP: 0.5 79 | NO_COLOR_AUG: false 80 | 81 | ROT_FACTOR: 30 82 | SCALE_MIN: 0.5 83 | SCALE_MAX: 1.1 84 | IMAGE_SIZE: 512 85 | RANDOM_CROP: true 86 | 87 | TRAIN: 88 | OPTIMIZER: 'adam' 89 | DISTRIBUTE: true 90 | LOCAL_RANK: 0 91 | HIDE_DATA_TIME: false 92 | SAVE_ALL_MODEL: false 93 | RESUME: false 94 | LR_FACTOR: 0.1 95 | LR_STEP: [270, 300] 96 | EPOCHS: 320 97 | NUM_ITERS: -1 98 | LR: 5.e-4 99 | BATCH_SIZE: 128 100 | MASTER_BATCH_SIZE: 32 101 | 102 | MOMENTUM: 0.9 103 | WD: 0.0001 104 | NESTEROV: false 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | 108 | # 'apply and reset gradients every n batches' 109 | STRIDE_APPLY: 1 110 | 111 | CHECKPOINT: '' 112 | SHUFFLE: true 113 | VAL_INTERVALS: 1 114 | TRAINVAL: false 115 | 116 | TEST: 117 | # Test Model Epoch 118 | MODEL_PATH: '/data/centerpose/hardnet/model_best.pth' 119 | TASK: 'multi_pose' 120 | FLIP_TEST: true 121 | 122 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg' 123 | MODEL_FILE: '' 124 | TEST_SCALES: [1] 125 | IMAGE_THRE: 0.1 126 | TOPK: 100 127 | NMS: false 128 | NMS_THRE: 0.5 129 | NOT_PREFETCH_TEST: false 130 | FIX_RES: false 131 | KEEP_RES: false 132 | 133 | SOFT_NMS: false 134 | OKS_THRE: 0.5 135 | VIS_THRESH: 0.3 136 | KEYPOINT_THRESH: 0.2 137 | NUM_MIN_KPT: 4 138 | THRESH_HUMAN: 0.4 139 | 140 | EVAL_ORACLE_HM: false 141 | EVAL_ORACLE_WH: false 142 | EVAL_ORACLE_OFFSET: false 143 | EVAL_ORACLE_KPS: false 144 | EVAL_ORACLE_HMHP: false 145 | EVAL_ORACLE_HP_OFFSET: false 146 | EVAL_ORACLE_DEP: false 147 | -------------------------------------------------------------------------------- /experiments/hrnet_w32_512.yaml: -------------------------------------------------------------------------------- 1 | SAMPLE_METHOD: 'coco_hp' 2 | DATA_DIR: '/home/tensorboy/data' 3 | EXP_ID: 'coco_pose_hrnet' 4 | DEBUG: 0 5 | DEBUG_THEME: 'white' 6 | SEED: 317 7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/hrnet' 8 | LOG_DIR: '' 9 | EXPERIMENT_NAME: '' 10 | GPUS: [0, 1, 2, 3] 11 | WORKERS: 8 12 | PIN_MEMORY: true 13 | RANK: 0 14 | PRINT_FREQ: 100 15 | CUDNN: 16 | BENCHMARK: true 17 | DETERMINISTIC: false 18 | ENABLED: true 19 | DATASET: 20 | DATASET: 'coco_hp' 21 | TRAIN_SET: 'train' 22 | TEST_SET: 'valid' 23 | TRAIN_IMAGE_DIR: 'images/train2017' 24 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json'] 25 | VAL_IMAGE_DIR: 'images/val2017' 26 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json' 27 | 28 | # training data augmentation 29 | MEAN: [0.408, 0.447, 0.470] 30 | STD: [0.289, 0.274, 0.278] 31 | SHIFT: 0.1 32 | SCALE: 0.4 33 | ROTATE: 0. 34 | # for pose 35 | AUG_ROT: 0. 36 | FLIP: 0.5 37 | NO_COLOR_AUG: false 38 | 39 | ROT_FACTOR: 30 40 | SCALE_MIN: 0.5 41 | SCALE_MAX: 1.1 42 | IMAGE_SIZE: 512 43 | RANDOM_CROP: true 44 | 45 | LOSS: 46 | METRIC: 'loss' 47 | MSE_LOSS: false 48 | REG_LOSS: 'l1' 49 | USE_OHKM: false 50 | TOPK: 8 51 | USE_TARGET_WEIGHT: true 52 | USE_DIFFERENT_JOINTS_WEIGHT: false 53 | HP_WEIGHT: 1. 54 | HM_HP_WEIGHT: 1. 55 | DENSE_HP: false 56 | HM_HP: true 57 | REG_BBOX: true 58 | WH_WEIGHT: 0.1 59 | REG_OFFSET: true 60 | OFF_WEIGHT: 1. 61 | REG_HP_OFFSET: true 62 | HM_HP_WEIGHT: 1. 63 | MODEL: 64 | HEADS_NAME: 'keypoint' 65 | INTERMEDIATE_CHANNEL: 32 66 | CENTER_THRESH: 0.1 67 | NUM_CLASSES: 1 68 | NAME: 'hrnet' 69 | DOWN_RATIO: 4 70 | NUM_STACKS: 1 71 | INPUT_RES: 512 72 | OUTPUT_RES: 128 73 | INPUT_H: 512 74 | INPUT_W: 512 75 | PAD: 31 76 | NUM_KEYPOINTS: 17 77 | SIGMA: 2 78 | HEAD_CONV: 64 79 | EXTRA: 80 | FINAL_CONV_KERNEL: 1 81 | PRETRAINED_LAYERS: ['*'] 82 | STEM_INPLANES: 64 83 | STAGE2: 84 | NUM_MODULES: 1 85 | NUM_BRANCHES: 2 86 | BLOCK: BASIC 87 | NUM_BLOCKS: 88 | - 4 89 | - 4 90 | NUM_CHANNELS: 91 | - 32 92 | - 64 93 | FUSE_METHOD: SUM 94 | STAGE3: 95 | NUM_MODULES: 4 96 | NUM_BRANCHES: 3 97 | BLOCK: BASIC 98 | NUM_BLOCKS: 99 | - 4 100 | - 4 101 | - 4 102 | NUM_CHANNELS: 103 | - 32 104 | - 64 105 | - 128 106 | FUSE_METHOD: SUM 107 | STAGE4: 108 | NUM_MODULES: 3 109 | NUM_BRANCHES: 4 110 | BLOCK: BASIC 111 | NUM_BLOCKS: 112 | - 4 113 | - 4 114 | - 4 115 | - 4 116 | NUM_CHANNELS: 117 | - 32 118 | - 64 119 | - 128 120 | - 256 121 | FUSE_METHOD: SUM 122 | DECONV: 123 | NUM_DECONVS: 0 124 | NUM_CHANNELS: 125 | - 32 126 | KERNEL_SIZE: 127 | - 4 128 | NUM_BASIC_BLOCKS: 4 129 | CAT_OUTPUT: 130 | - True 131 | INIT_WEIGHTS: true 132 | PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth' 133 | TAG_PER_JOINT: true 134 | TEST: 135 | # Test Model Epoch 136 | MODEL_PATH: '/home/tensorboy/data/centerpose/hrnet/model_best.pth' 137 | TASK: 'multi_pose' 138 | FLIP_TEST: true 139 | FIX_RES: false 140 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg' 141 | MODEL_FILE: '' 142 | TEST_SCALES: [1,2] 143 | IMAGE_THRE: 0.1 144 | TOPK: 100 145 | NMS: false 146 | NMS_THRE: 0.5 147 | NOT_PREFETCH_TEST: false 148 | 149 | KEEP_RES: false 150 | 151 | SOFT_NMS: false 152 | OKS_THRE: 0.5 153 | VIS_THRESH: 0.3 154 | KEYPOINT_THRESH: 0.2 155 | NUM_MIN_KPT: 4 156 | THRESH_HUMAN: 0.4 157 | 158 | EVAL_ORACLE_HM: false 159 | EVAL_ORACLE_WH: false 160 | EVAL_ORACLE_OFFSET: false 161 | EVAL_ORACLE_KPS: false 162 | EVAL_ORACLE_HMHP: false 163 | EVAL_ORACLE_HP_OFFSET: false 164 | EVAL_ORACLE_DEP: false 165 | TRAIN: 166 | DISTRIBUTE: true 167 | OPTIMIZER: 'sgd' 168 | LOCAL_RANK: 0 169 | HIDE_DATA_TIME: false 170 | SAVE_ALL_MODEL: false 171 | RESUME: false 172 | LR_FACTOR: 0.1 173 | LR_STEP: [270, 300] 174 | EPOCHS: 320 175 | NUM_ITERS: -1 176 | LR: 1.71875e-4 177 | BATCH_SIZE: 44 178 | MASTER_BATCH_SIZE: 11 179 | 180 | MOMENTUM: 0.9 181 | WD: 0.0001 182 | NESTEROV: false 183 | GAMMA1: 0.99 184 | GAMMA2: 0.0 185 | 186 | # 'apply and reset gradients every n batches' 187 | STRIDE_APPLY: 1 188 | 189 | CHECKPOINT: '' 190 | SHUFFLE: true 191 | VAL_INTERVALS: 1 192 | TRAINVAL: false 193 | -------------------------------------------------------------------------------- /experiments/hrnet_w48_512.yaml: -------------------------------------------------------------------------------- 1 | SAMPLE_METHOD: 'coco_hp' 2 | DATA_DIR: '/home/tensorboy/data' 3 | EXP_ID: 'coco_pose_hrnet' 4 | DEBUG: 0 5 | DEBUG_THEME: 'white' 6 | SEED: 317 7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/hrnet' 8 | LOG_DIR: '' 9 | EXPERIMENT_NAME: '' 10 | GPUS: [0, 1, 2, 3] 11 | WORKERS: 8 12 | PIN_MEMORY: true 13 | RANK: 0 14 | PRINT_FREQ: 100 15 | CUDNN: 16 | BENCHMARK: true 17 | DETERMINISTIC: false 18 | ENABLED: true 19 | DATASET: 20 | DATASET: 'coco_hp' 21 | TRAIN_SET: 'train' 22 | TEST_SET: 'valid' 23 | TRAIN_IMAGE_DIR: 'images/train2017' 24 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json'] 25 | VAL_IMAGE_DIR: 'images/val2017' 26 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json' 27 | 28 | # training data augmentation 29 | MEAN: [0.408, 0.447, 0.470] 30 | STD: [0.289, 0.274, 0.278] 31 | SHIFT: 0.1 32 | SCALE: 0.4 33 | ROTATE: 0. 34 | # for pose 35 | AUG_ROT: 0. 36 | FLIP: 0.5 37 | NO_COLOR_AUG: false 38 | 39 | ROT_FACTOR: 30 40 | SCALE_MIN: 0.5 41 | SCALE_MAX: 1.1 42 | IMAGE_SIZE: 512 43 | RANDOM_CROP: true 44 | 45 | LOSS: 46 | METRIC: 'loss' 47 | MSE_LOSS: false 48 | REG_LOSS: 'l1' 49 | USE_OHKM: false 50 | TOPK: 8 51 | USE_TARGET_WEIGHT: true 52 | USE_DIFFERENT_JOINTS_WEIGHT: false 53 | HP_WEIGHT: 1. 54 | HM_HP_WEIGHT: 1. 55 | DENSE_HP: false 56 | HM_HP: true 57 | REG_BBOX: true 58 | WH_WEIGHT: 0.1 59 | REG_OFFSET: true 60 | OFF_WEIGHT: 1. 61 | REG_HP_OFFSET: true 62 | HM_HP_WEIGHT: 1. 63 | MODEL: 64 | HEADS_NAME: 'keypoint' 65 | INTERMEDIATE_CHANNEL: 48 66 | CENTER_THRESH: 0.1 67 | NUM_CLASSES: 1 68 | NAME: 'hrnet' 69 | DOWN_RATIO: 4 70 | NUM_STACKS: 1 71 | INPUT_RES: 512 72 | OUTPUT_RES: 128 73 | INPUT_H: 512 74 | INPUT_W: 512 75 | PAD: 31 76 | NUM_KEYPOINTS: 17 77 | SIGMA: 2 78 | HEAD_CONV: 64 79 | EXTRA: 80 | FINAL_CONV_KERNEL: 1 81 | PRETRAINED_LAYERS: ['*'] 82 | STEM_INPLANES: 64 83 | STAGE2: 84 | NUM_MODULES: 1 85 | NUM_BRANCHES: 2 86 | BLOCK: BASIC 87 | NUM_BLOCKS: 88 | - 4 89 | - 4 90 | NUM_CHANNELS: 91 | - 48 92 | - 96 93 | FUSE_METHOD: SUM 94 | STAGE3: 95 | NUM_MODULES: 4 96 | NUM_BRANCHES: 3 97 | BLOCK: BASIC 98 | NUM_BLOCKS: 99 | - 4 100 | - 4 101 | - 4 102 | NUM_CHANNELS: 103 | - 48 104 | - 96 105 | - 192 106 | FUSE_METHOD: SUM 107 | STAGE4: 108 | NUM_MODULES: 3 109 | NUM_BRANCHES: 4 110 | BLOCK: BASIC 111 | NUM_BLOCKS: 112 | - 4 113 | - 4 114 | - 4 115 | - 4 116 | NUM_CHANNELS: 117 | - 48 118 | - 96 119 | - 192 120 | - 384 121 | FUSE_METHOD: SUM 122 | DECONV: 123 | NUM_DECONVS: 1 124 | NUM_CHANNELS: 125 | - 48 126 | KERNEL_SIZE: 127 | - 4 128 | NUM_BASIC_BLOCKS: 4 129 | CAT_OUTPUT: 130 | - True 131 | INIT_WEIGHTS: true 132 | PRETRAINED: '/home/tensorboy/data/pretrained_models/imagenet/hrnet_w48-8ef0771d.pth' 133 | TAG_PER_JOINT: true 134 | TEST: 135 | # Test Model Epoch 136 | MODEL_PATH: '/home/tensorboy/data/centerpose/hrnet/model_best.pth' 137 | TASK: 'multi_pose' 138 | FLIP_TEST: true 139 | FIX_RES: false 140 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg' 141 | MODEL_FILE: '' 142 | TEST_SCALES: [1,2] 143 | IMAGE_THRE: 0.1 144 | TOPK: 100 145 | NMS: false 146 | NMS_THRE: 0.5 147 | NOT_PREFETCH_TEST: false 148 | 149 | KEEP_RES: false 150 | 151 | SOFT_NMS: false 152 | OKS_THRE: 0.5 153 | VIS_THRESH: 0.3 154 | KEYPOINT_THRESH: 0.2 155 | NUM_MIN_KPT: 4 156 | THRESH_HUMAN: 0.4 157 | 158 | EVAL_ORACLE_HM: false 159 | EVAL_ORACLE_WH: false 160 | EVAL_ORACLE_OFFSET: false 161 | EVAL_ORACLE_KPS: false 162 | EVAL_ORACLE_HMHP: false 163 | EVAL_ORACLE_HP_OFFSET: false 164 | EVAL_ORACLE_DEP: false 165 | TRAIN: 166 | DISTRIBUTE: true 167 | OPTIMIZER: 'adam' 168 | LOCAL_RANK: 0 169 | HIDE_DATA_TIME: false 170 | SAVE_ALL_MODEL: false 171 | RESUME: false 172 | LR_FACTOR: 0.1 173 | LR_STEP: [270, 300] 174 | EPOCHS: 320 175 | NUM_ITERS: -1 176 | LR: 1.25e-4 177 | BATCH_SIZE: 32 178 | MASTER_BATCH_SIZE: 8 179 | 180 | MOMENTUM: 0.9 181 | WD: 0.0001 182 | NESTEROV: false 183 | GAMMA1: 0.99 184 | GAMMA2: 0.0 185 | 186 | # 'apply and reset gradients every n batches' 187 | STRIDE_APPLY: 1 188 | 189 | CHECKPOINT: '' 190 | SHUFFLE: true 191 | VAL_INTERVALS: 1 192 | TRAINVAL: false 193 | -------------------------------------------------------------------------------- /experiments/mobilenetv2_512x512.yaml: -------------------------------------------------------------------------------- 1 | SAMPLE_METHOD: 'coco_hp' 2 | DATA_DIR: '/data' 3 | EXP_ID: 'coco_pose_mobilenetv2' 4 | DEBUG: 0 5 | DEBUG_THEME: 'white' 6 | SEED: 317 7 | OUTPUT_DIR: '/data/centerpose/mobilenetv2' 8 | LOG_DIR: '' 9 | EXPERIMENT_NAME: '' 10 | GPUS: [0, 1, 2, 3] 11 | WORKERS: 8 12 | PRINT_FREQ: 0 13 | PIN_MEMORY: true 14 | RANK: 0 15 | SAVE_RESULTS: true 16 | 17 | CUDNN: 18 | BENCHMARK: true 19 | 20 | MODEL: 21 | INIT_WEIGHTS: false 22 | PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth' 23 | CENTER_THRESH: 0.1 24 | NUM_CLASSES: 1 25 | NAME: 'mobilenetv2' 26 | HEADS_NAME: 'keypoint' 27 | HEADS_NUM: [1, 2, 34, 2, 17, 2] 28 | HEAD_CONV: 256 29 | INTERMEDIATE_CHANNEL: 24 30 | DOWN_RATIO: 4 31 | NUM_STACKS: 1 32 | INPUT_RES: 512 33 | OUTPUT_RES: 128 34 | INPUT_H: 512 35 | INPUT_W: 512 36 | PAD: 31 37 | NUM_KEYPOINTS: 17 38 | TAG_PER_JOINT: true 39 | TARGET_TYPE: 'gaussian' 40 | SIGMA: 2 41 | 42 | LOSS: 43 | METRIC: 'loss' 44 | MSE_LOSS: false 45 | REG_LOSS: 'l1' 46 | USE_OHKM: false 47 | TOPK: 8 48 | USE_TARGET_WEIGHT: true 49 | USE_DIFFERENT_JOINTS_WEIGHT: false 50 | HP_WEIGHT: 1. 51 | HM_HP_WEIGHT: 1. 52 | DENSE_HP: false 53 | HM_HP: true 54 | REG_BBOX: true 55 | WH_WEIGHT: 0.1 56 | REG_OFFSET: true 57 | OFF_WEIGHT: 1. 58 | REG_HP_OFFSET: true 59 | HM_HP_WEIGHT: 1. 60 | 61 | DATASET: 62 | DATASET: 'coco_hp' 63 | TRAIN_SET: 'train' 64 | TEST_SET: 'valid' 65 | TRAIN_IMAGE_DIR: 'images/train2017' 66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json'] 67 | VAL_IMAGE_DIR: 'images/val2017' 68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json' 69 | 70 | # training data augmentation 71 | MEAN: [0.408, 0.447, 0.470] 72 | STD: [0.289, 0.274, 0.278] 73 | SHIFT: 0.1 74 | SCALE: 0.4 75 | ROTATE: 0. 76 | # for pose 77 | AUG_ROT: 0. 78 | FLIP: 0.5 79 | NO_COLOR_AUG: false 80 | 81 | ROT_FACTOR: 30 82 | SCALE_MIN: 0.5 83 | SCALE_MAX: 1.1 84 | IMAGE_SIZE: 512 85 | RANDOM_CROP: true 86 | 87 | TRAIN: 88 | OPTIMIZER: 'adam' 89 | DISTRIBUTE: true 90 | LOCAL_RANK: 0 91 | HIDE_DATA_TIME: false 92 | SAVE_ALL_MODEL: false 93 | RESUME: false 94 | LR_FACTOR: 0.1 95 | LR_STEP: [270, 300] 96 | EPOCHS: 320 97 | NUM_ITERS: -1 98 | LR: 5.e-4 99 | BATCH_SIZE: 128 100 | MASTER_BATCH_SIZE: 32 101 | 102 | MOMENTUM: 0.9 103 | WD: 0.0001 104 | NESTEROV: false 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | 108 | # 'apply and reset gradients every n batches' 109 | STRIDE_APPLY: 1 110 | 111 | CHECKPOINT: '' 112 | SHUFFLE: true 113 | VAL_INTERVALS: 1 114 | TRAINVAL: false 115 | 116 | TEST: 117 | # Test Model Epoch 118 | MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/mobilenetV3_1x.pth' 119 | TASK: 'multi_pose' 120 | FLIP_TEST: true 121 | 122 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg' 123 | MODEL_FILE: '' 124 | TEST_SCALES: [1] 125 | IMAGE_THRE: 0.1 126 | TOPK: 100 127 | NMS: false 128 | NMS_THRE: 0.5 129 | NOT_PREFETCH_TEST: false 130 | FIX_RES: false 131 | KEEP_RES: false 132 | 133 | SOFT_NMS: false 134 | OKS_THRE: 0.5 135 | VIS_THRESH: 0.3 136 | KEYPOINT_THRESH: 0.2 137 | NUM_MIN_KPT: 4 138 | THRESH_HUMAN: 0.4 139 | 140 | EVAL_ORACLE_HM: false 141 | EVAL_ORACLE_WH: false 142 | EVAL_ORACLE_OFFSET: false 143 | EVAL_ORACLE_KPS: false 144 | EVAL_ORACLE_HMHP: false 145 | EVAL_ORACLE_HP_OFFSET: false 146 | EVAL_ORACLE_DEP: false 147 | -------------------------------------------------------------------------------- /experiments/mobilenetv3_512x512.yaml: -------------------------------------------------------------------------------- 1 | SAMPLE_METHOD: 'coco_hp' 2 | DATA_DIR: '/home/tensorboy/data' 3 | EXP_ID: 'coco_pose_mobilenet' 4 | DEBUG: 0 5 | DEBUG_THEME: 'white' 6 | SEED: 317 7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/mobilenetv3' 8 | LOG_DIR: '' 9 | EXPERIMENT_NAME: '' 10 | GPUS: [0, 1, 2, 3] 11 | WORKERS: 8 12 | PRINT_FREQ: 0 13 | PIN_MEMORY: true 14 | RANK: 0 15 | SAVE_RESULTS: true 16 | 17 | CUDNN: 18 | BENCHMARK: true 19 | 20 | MODEL: 21 | INIT_WEIGHTS: false 22 | PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth' 23 | CENTER_THRESH: 0.1 24 | NUM_CLASSES: 1 25 | NAME: 'mobilenetv3' 26 | HEADS_NAME: 'keypoint' 27 | HEADS_NUM: [1, 2, 34, 2, 17, 2] 28 | HEAD_CONV: 256 29 | INTERMEDIATE_CHANNEL: 24 30 | DOWN_RATIO: 4 31 | NUM_STACKS: 1 32 | INPUT_RES: 512 33 | OUTPUT_RES: 128 34 | INPUT_H: 512 35 | INPUT_W: 512 36 | PAD: 31 37 | NUM_KEYPOINTS: 17 38 | TAG_PER_JOINT: true 39 | TARGET_TYPE: 'gaussian' 40 | SIGMA: 2 41 | 42 | LOSS: 43 | METRIC: 'loss' 44 | MSE_LOSS: false 45 | REG_LOSS: 'l1' 46 | USE_OHKM: false 47 | TOPK: 8 48 | USE_TARGET_WEIGHT: true 49 | USE_DIFFERENT_JOINTS_WEIGHT: false 50 | HP_WEIGHT: 1. 51 | HM_HP_WEIGHT: 1. 52 | DENSE_HP: false 53 | HM_HP: true 54 | REG_BBOX: true 55 | WH_WEIGHT: 0.1 56 | REG_OFFSET: true 57 | OFF_WEIGHT: 1. 58 | REG_HP_OFFSET: true 59 | HM_HP_WEIGHT: 1. 60 | 61 | DATASET: 62 | DATASET: 'coco_hp' 63 | TRAIN_SET: 'train' 64 | TEST_SET: 'valid' 65 | TRAIN_IMAGE_DIR: 'images/train2017' 66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json'] 67 | VAL_IMAGE_DIR: 'images/val2017' 68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json' 69 | 70 | # training data augmentation 71 | MEAN: [0.408, 0.447, 0.470] 72 | STD: [0.289, 0.274, 0.278] 73 | SHIFT: 0.1 74 | SCALE: 0.4 75 | ROTATE: 0. 76 | # for pose 77 | AUG_ROT: 0. 78 | FLIP: 0.5 79 | NO_COLOR_AUG: false 80 | 81 | ROT_FACTOR: 30 82 | SCALE_MIN: 0.5 83 | SCALE_MAX: 1.1 84 | IMAGE_SIZE: 512 85 | RANDOM_CROP: true 86 | 87 | TRAIN: 88 | OPTIMIZER: 'adam' 89 | DISTRIBUTE: true 90 | LOCAL_RANK: 0 91 | HIDE_DATA_TIME: false 92 | SAVE_ALL_MODEL: false 93 | RESUME: false 94 | LR_FACTOR: 0.1 95 | LR_STEP: [270, 300] 96 | EPOCHS: 320 97 | NUM_ITERS: -1 98 | LR: 3.359375e-4 99 | BATCH_SIZE: 86 100 | MASTER_BATCH_SIZE: 20 101 | 102 | MOMENTUM: 0.9 103 | WD: 0.0001 104 | NESTEROV: false 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | 108 | # 'apply and reset gradients every n batches' 109 | STRIDE_APPLY: 1 110 | 111 | CHECKPOINT: '' 112 | SHUFFLE: true 113 | VAL_INTERVALS: 1 114 | TRAINVAL: false 115 | 116 | TEST: 117 | # Test Model Epoch 118 | MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/mobilenetV3_1x.pth' 119 | TASK: 'multi_pose' 120 | FLIP_TEST: true 121 | 122 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg' 123 | MODEL_FILE: '' 124 | TEST_SCALES: [1] 125 | IMAGE_THRE: 0.1 126 | TOPK: 100 127 | NMS: false 128 | NMS_THRE: 0.5 129 | NOT_PREFETCH_TEST: false 130 | FIX_RES: false 131 | KEEP_RES: false 132 | 133 | SOFT_NMS: false 134 | OKS_THRE: 0.5 135 | VIS_THRESH: 0.3 136 | KEYPOINT_THRESH: 0.2 137 | NUM_MIN_KPT: 4 138 | THRESH_HUMAN: 0.4 139 | 140 | EVAL_ORACLE_HM: false 141 | EVAL_ORACLE_WH: false 142 | EVAL_ORACLE_OFFSET: false 143 | EVAL_ORACLE_KPS: false 144 | EVAL_ORACLE_HMHP: false 145 | EVAL_ORACLE_HP_OFFSET: false 146 | EVAL_ORACLE_DEP: false 147 | -------------------------------------------------------------------------------- /experiments/res_50_512x512.yaml: -------------------------------------------------------------------------------- 1 | SAMPLE_METHOD: 'coco_hp' 2 | DATA_DIR: '/data' 3 | EXP_ID: 'coco_pose_res_50' 4 | DEBUG: 0 5 | DEBUG_THEME: 'white' 6 | SEED: 317 7 | OUTPUT_DIR: '/data/centerpose/res50_lre2' 8 | LOG_DIR: '' 9 | EXPERIMENT_NAME: '' 10 | GPUS: [0, 1, 2, 3] 11 | WORKERS: 8 12 | PRINT_FREQ: 0 13 | PIN_MEMORY: true 14 | RANK: 0 15 | SAVE_RESULTS: true 16 | TASK : 'multi_pose' 17 | 18 | CUDNN: 19 | BENCHMARK: true 20 | 21 | MODEL: 22 | INIT_WEIGHTS: false 23 | PRETRAINED: '' 24 | CENTER_THRESH: 0.1 25 | NUM_CLASSES: 1 26 | NAME: 'res_50' 27 | HEADS_NAME: 'keypoint' 28 | HEADS_NUM: [1, 2, 34, 2, 17, 2] 29 | HEAD_CONV: 64 30 | INTERMEDIATE_CHANNEL: 256 31 | DOWN_RATIO: 4 32 | NUM_STACKS: 1 33 | INPUT_RES: 512 34 | OUTPUT_RES: 128 35 | INPUT_H: 512 36 | INPUT_W: 512 37 | PAD: 31 38 | NUM_KEYPOINTS: 17 39 | TAG_PER_JOINT: true 40 | TARGET_TYPE: 'gaussian' 41 | SIGMA: 2 42 | 43 | LOSS: 44 | METRIC: 'loss' 45 | MSE_LOSS: false 46 | REG_LOSS: 'l1' 47 | USE_OHKM: false 48 | TOPK: 8 49 | USE_TARGET_WEIGHT: true 50 | USE_DIFFERENT_JOINTS_WEIGHT: false 51 | HP_WEIGHT: 1. 52 | HM_HP_WEIGHT: 1. 53 | DENSE_HP: false 54 | HM_HP: true 55 | REG_BBOX: true 56 | WH_WEIGHT: 0.1 57 | REG_OFFSET: true 58 | OFF_WEIGHT: 1. 59 | REG_HP_OFFSET: true 60 | 61 | DATASET: 62 | DATASET: 'coco_hp' 63 | TRAIN_SET: 'train' 64 | TEST_SET: 'valid' 65 | TRAIN_IMAGE_DIR: 'images/train2017' 66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json'] 67 | VAL_IMAGE_DIR: 'images/val2017' 68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json' 69 | 70 | # training data augmentation 71 | MEAN: [0.408, 0.447, 0.470] 72 | STD: [0.289, 0.274, 0.278] 73 | SHIFT: 0.1 74 | SCALE: 0.4 75 | ROTATE: 0. 76 | # for pose 77 | AUG_ROT: 0. 78 | FLIP: 0.5 79 | NO_COLOR_AUG: false 80 | 81 | ROT_FACTOR: 30 82 | SCALE_MIN: 0.5 83 | SCALE_MAX: 1.1 84 | IMAGE_SIZE: 512 85 | RANDOM_CROP: true 86 | 87 | TRAIN: 88 | DISTRIBUTE: true 89 | OPTIMIZER: 'sgd' 90 | LOCAL_RANK: 0 91 | HIDE_DATA_TIME: false 92 | SAVE_ALL_MODEL: false 93 | RESUME: false 94 | LR_FACTOR: 0.1 95 | LR_STEP: [800, 900] 96 | EPOCHS: 1000 97 | NUM_ITERS: -1 98 | LR: 7.e-3 99 | BATCH_SIZE: 56 100 | MASTER_BATCH_SIZE: 14 101 | 102 | MOMENTUM: 0.9 103 | WD: 0.0001 104 | NESTEROV: false 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | 108 | # 'apply and reset gradients every n batches' 109 | STRIDE_APPLY: 1 110 | 111 | CHECKPOINT: '' 112 | SHUFFLE: true 113 | VAL_INTERVALS: 1 114 | TRAINVAL: false 115 | 116 | TEST: 117 | # Test Model Epoch 118 | MODEL_PATH: '/home/tensorboy/data/centerpose/res50/model_best.pth' 119 | TASK: 'multi_pose' 120 | FLIP_TEST: true 121 | 122 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg' 123 | MODEL_FILE: '' 124 | TEST_SCALES: [1] 125 | IMAGE_THRE: 0.1 126 | TOPK: 100 127 | NMS: false 128 | NMS_THRE: 0.5 129 | NOT_PREFETCH_TEST: false 130 | FIX_RES: true 131 | 132 | SOFT_NMS: false 133 | OKS_THRE: 0.5 134 | VIS_THRESH: 0.3 135 | KEYPOINT_THRESH: 0.2 136 | NUM_MIN_KPT: 4 137 | THRESH_HUMAN: 0.5 138 | 139 | EVAL_ORACLE_HM: false 140 | EVAL_ORACLE_WH: false 141 | EVAL_ORACLE_OFFSET: false 142 | EVAL_ORACLE_KPS: false 143 | EVAL_ORACLE_HMHP: false 144 | EVAL_ORACLE_HP_OFFSET: false 145 | EVAL_ORACLE_DEP: false 146 | -------------------------------------------------------------------------------- /experiments/shufflenetV2_512x512.yaml: -------------------------------------------------------------------------------- 1 | SAMPLE_METHOD: 'coco_hp' 2 | DATA_DIR: '/home/tensorboy/data' 3 | EXP_ID: 'coco_pose_shufflenetv2' 4 | DEBUG: 0 5 | DEBUG_THEME: 'white' 6 | SEED: 317 7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/shufflenet_3x_sgd' 8 | LOG_DIR: '' 9 | EXPERIMENT_NAME: '' 10 | GPUS: [0, 1, 2, 3] 11 | WORKERS: 8 12 | PRINT_FREQ: 0 13 | PIN_MEMORY: true 14 | RANK: 0 15 | SAVE_RESULTS: true 16 | 17 | CUDNN: 18 | BENCHMARK: true 19 | 20 | MODEL: 21 | CENTER_THRESH: 0.1 22 | NUM_CLASSES: 1 23 | NAME: 'shufflenetV2' 24 | HEADS_NAME: 'keypoint' 25 | HEADS_NUM: [1, 2, 34, 2, 17, 2] 26 | HEAD_CONV: 256 27 | INTERMEDIATE_CHANNEL: 256 28 | DOWN_RATIO: 4 29 | NUM_STACKS: 1 30 | INPUT_RES: 512 31 | OUTPUT_RES: 128 32 | INPUT_H: 512 33 | INPUT_W: 512 34 | PAD: 31 35 | NUM_KEYPOINTS: 17 36 | TAG_PER_JOINT: true 37 | TARGET_TYPE: 'gaussian' 38 | SIGMA: 2 39 | 40 | LOSS: 41 | METRIC: 'loss' 42 | MSE_LOSS: false 43 | REG_LOSS: 'l1' 44 | USE_OHKM: false 45 | TOPK: 8 46 | USE_TARGET_WEIGHT: true 47 | USE_DIFFERENT_JOINTS_WEIGHT: false 48 | HP_WEIGHT: 1. 49 | HM_HP_WEIGHT: 1. 50 | DENSE_HP: false 51 | HM_HP: true 52 | REG_BBOX: true 53 | WH_WEIGHT: 0.1 54 | REG_OFFSET: true 55 | OFF_WEIGHT: 1. 56 | REG_HP_OFFSET: true 57 | HM_HP_WEIGHT: 1. 58 | 59 | DATASET: 60 | DATASET: 'coco_hp' 61 | TRAIN_SET: 'train' 62 | TEST_SET: 'valid' 63 | TRAIN_IMAGE_DIR: 'images/train2017' 64 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json'] 65 | VAL_IMAGE_DIR: 'images/val2017' 66 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json' 67 | 68 | # training data augmentation 69 | MEAN: [0.408, 0.447, 0.470] 70 | STD: [0.289, 0.274, 0.278] 71 | SHIFT: 0.1 72 | SCALE: 0.4 73 | ROTATE: 0. 74 | # for pose 75 | AUG_ROT: 0. 76 | FLIP: 0.5 77 | NO_COLOR_AUG: false 78 | 79 | ROT_FACTOR: 30 80 | SCALE_MIN: 0.5 81 | SCALE_MAX: 1.1 82 | IMAGE_SIZE: 512 83 | RANDOM_CROP: true 84 | 85 | TRAIN: 86 | OPTIMIZER: 'adam' 87 | DISTRIBUTE: true 88 | LOCAL_RANK: 0 89 | HIDE_DATA_TIME: false 90 | SAVE_ALL_MODEL: false 91 | RESUME: false 92 | LR_FACTOR: 0.1 93 | LR_STEP: [270, 300] 94 | EPOCHS: 320 95 | NUM_ITERS: -1 96 | LR: 4.6875e-4 97 | BATCH_SIZE: 120 98 | MASTER_BATCH_SIZE: 30 99 | 100 | MOMENTUM: 0.9 101 | WD: 0.0001 102 | NESTEROV: false 103 | GAMMA1: 0.99 104 | GAMMA2: 0.0 105 | 106 | # 'apply and reset gradients every n batches' 107 | STRIDE_APPLY: 1 108 | 109 | CHECKPOINT: '' 110 | SHUFFLE: true 111 | VAL_INTERVALS: 1 112 | TRAINVAL: false 113 | 114 | TEST: 115 | # Test Model Epoch 116 | MODEL_PATH: '/home/tensorboy/data/centerpose/shufflenet_3x_sgd/model_best.pth' 117 | TASK: 'multi_pose' 118 | FLIP_TEST: true 119 | 120 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg' 121 | MODEL_FILE: '' 122 | TEST_SCALES: [1] 123 | IMAGE_THRE: 0.1 124 | TOPK: 100 125 | NMS: false 126 | NMS_THRE: 0.5 127 | NOT_PREFETCH_TEST: false 128 | FIX_RES: false 129 | KEEP_RES: false 130 | 131 | SOFT_NMS: false 132 | OKS_THRE: 0.5 133 | VIS_THRESH: 0.3 134 | KEYPOINT_THRESH: 0.2 135 | NUM_MIN_KPT: 4 136 | THRESH_HUMAN: 0.4 137 | 138 | EVAL_ORACLE_HM: false 139 | EVAL_ORACLE_WH: false 140 | EVAL_ORACLE_OFFSET: false 141 | EVAL_ORACLE_KPS: false 142 | EVAL_ORACLE_HMHP: false 143 | EVAL_ORACLE_HP_OFFSET: false 144 | EVAL_ORACLE_DEP: false 145 | -------------------------------------------------------------------------------- /images/image1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/images/image1.jpeg -------------------------------------------------------------------------------- /lib/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .default import _C as cfg 2 | from .default import update_config 3 | -------------------------------------------------------------------------------- /lib/config/default.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | from yacs.config import CfgNode as CN 4 | 5 | _C = CN() 6 | 7 | _C.TASK = 'multi_pose' 8 | _C.SAMPLE_METHOD = 'coco_hp' 9 | _C.DATA_DIR = '/data' 10 | _C.EXP_ID = 'default' 11 | _C.DEBUG = 0 12 | _C.DEBUG_THEME = 'white' 13 | _C.TEST = False 14 | _C.SEED = 317 15 | _C.SAVE_RESULTS = False 16 | 17 | _C.OUTPUT_DIR = '' 18 | _C.LOG_DIR = '' 19 | _C.EXPERIMENT_NAME = '' 20 | _C.GPUS = [0, 1, 2, 3] 21 | _C.WORKERS = 4 22 | _C.PRINT_FREQ = 20 23 | _C.PIN_MEMORY = True 24 | _C.RANK = 0 25 | 26 | # Cudnn related params 27 | _C.CUDNN = CN() 28 | _C.CUDNN.ENABLED = True 29 | _C.CUDNN.BENCHMARK = True 30 | _C.CUDNN.DETERMINISTIC = False 31 | 32 | # common params for NETWORK 33 | _C.MODEL = CN() 34 | _C.MODEL.PRETRAINED = '' 35 | _C.MODEL.INIT_WEIGHTS = False 36 | _C.MODEL.NAME = 'res_50' 37 | # 0 for no conv layer, -1 for defaults setting, 64 for resnets and 256 for dla 38 | _C.MODEL.HEAD_CONV = 64 39 | _C.MODEL.INTERMEDIATE_CHANNEL = 64 40 | _C.MODEL.NUM_STACKS = 1 41 | _C.MODEL.HEADS_NAME = 'keypoint' 42 | _C.MODEL.HEADS_NUM = [1, 2, 34, 2, 17, 2] 43 | _C.MODEL.DOWN_RATIO = 4 44 | _C.MODEL.INPUT_RES = 512 45 | _C.MODEL.OUTPUT_RES = 128 46 | _C.MODEL.INPUT_H = 512 47 | _C.MODEL.INPUT_W = 512 48 | _C.MODEL.PAD = 31 49 | _C.MODEL.NUM_CLASSES = 1 50 | _C.MODEL.NUM_KEYPOINTS = 17 51 | _C.MODEL.TAG_PER_JOINT = True 52 | _C.MODEL.TARGET_TYPE = 'gaussian' 53 | _C.MODEL.SIGMA = 2 54 | _C.MODEL.CENTER_THRESH = 0.1 55 | _C.MODEL.EXTRA = CN(new_allowed=True) 56 | 57 | _C.LOSS = CN() 58 | _C.LOSS.METRIC = 'loss' 59 | _C.LOSS.MSE_LOSS = False 60 | _C.LOSS.USE_OHKM = False 61 | _C.LOSS.TOPK = 8 62 | _C.LOSS.USE_TARGET_WEIGHT = True 63 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False 64 | 65 | # multi pose 66 | _C.LOSS.HP_WEIGHT = 1. 67 | _C.LOSS.HM_WEIGHT = 1. 68 | _C.LOSS.REG_LOSS = 'l1' 69 | _C.LOSS.HM_HP_WEIGHT = 1. 70 | _C.LOSS.DENSE_HP = False 71 | _C.LOSS.HM_HP = True 72 | _C.LOSS.REG_HP_OFFSET = True 73 | _C.LOSS.REG_BBOX = True 74 | _C.LOSS.WH_WEIGHT = 0.1 75 | _C.LOSS.REG_OFFSET = True 76 | _C.LOSS.OFF_WEIGHT = 1. 77 | 78 | 79 | # DATASET related params 80 | _C.DATASET = CN() 81 | _C.DATASET.DATASET = 'coco_hp' 82 | _C.DATASET.TRAIN_SET = 'train' 83 | _C.DATASET.TEST_SET = 'valid' 84 | _C.DATASET.TRAIN_IMAGE_DIR = 'images/train2017' 85 | _C.DATASET.TRAIN_ANNOTATIONS = ['person_keypoints_train2017.json'] 86 | _C.DATASET.VAL_IMAGE_DIR = 'images/val2017' 87 | _C.DATASET.VAL_ANNOTATIONS = 'person_keypoints_val2017.json' 88 | # training data augmentation 89 | _C.DATASET.MEAN = [0.408, 0.447, 0.470] 90 | _C.DATASET.STD = [0.289, 0.274, 0.278] 91 | _C.DATASET.RANDOM_CROP = True 92 | _C.DATASET.SHIFT = 0.1 93 | _C.DATASET.SCALE = 0.4 94 | _C.DATASET.ROTATE = 0. 95 | # for pose 96 | _C.DATASET.AUG_ROT = 0. 97 | _C.DATASET.FLIP = 0.5 98 | _C.DATASET.NO_COLOR_AUG = False 99 | _C.DATASET.ROT_FACTOR = 30 100 | _C.DATASET.SCALE_MIN = 0.5 101 | _C.DATASET.SCALE_MAX = 1.1 102 | _C.DATASET.IMAGE_SIZE = 512 103 | 104 | # train 105 | _C.TRAIN = CN() 106 | 107 | _C.TRAIN.DISTRIBUTE = True 108 | _C.TRAIN.LOCAL_RANK = 0 109 | _C.TRAIN.HIDE_DATA_TIME = False 110 | _C.TRAIN.SAVE_ALL_MODEL = False 111 | _C.TRAIN.RESUME = False 112 | _C.TRAIN.LR_FACTOR = 0.1 113 | _C.TRAIN.LR_STEP = [90, 120] 114 | _C.TRAIN.EPOCHS = 140 115 | _C.TRAIN.NUM_ITERS = -1 116 | _C.TRAIN.LR = 1.25e-4 117 | _C.TRAIN.BATCH_SIZE = 32 118 | _C.TRAIN.MASTER_BATCH_SIZE = -1 119 | 120 | 121 | _C.TRAIN.OPTIMIZER = 'adam' 122 | _C.TRAIN.MOMENTUM = 0.9 123 | _C.TRAIN.WD = 0.0001 124 | _C.TRAIN.NESTEROV = False 125 | _C.TRAIN.GAMMA1 = 0.99 126 | _C.TRAIN.GAMMA2 = 0.0 127 | 128 | 129 | # 'apply and reset gradients every n batches' 130 | _C.TRAIN.STRIDE_APPLY = 1 131 | 132 | _C.TRAIN.RESUME = False 133 | _C.TRAIN.CHECKPOINT = '' 134 | _C.TRAIN.SHUFFLE = True 135 | _C.TRAIN.VAL_INTERVALS = 5 136 | _C.TRAIN.TRAINVAL = False 137 | 138 | # testing 139 | _C.TEST = CN() 140 | # size of images for each device 141 | _C.TEST.BATCH_SIZE_PER_GPU = 32 142 | # Test Model Epoch 143 | _C.TEST.FLIP_TEST = False 144 | _C.TEST.TASK = 'multi_pose' 145 | _C.TEST.MODEL_PATH = '' 146 | _C.TEST.DEMO_FILE = '' 147 | _C.TEST.MODEL_FILE = '' 148 | _C.TEST.TEST_SCALES = [1] 149 | _C.TEST.IMAGE_THRE = 0.1 150 | _C.TEST.TOPK = 100 151 | _C.TEST.NMS = False 152 | _C.TEST.NMS_THRE = 0.5 153 | _C.TEST.NOT_PREFETCH_TEST = False 154 | _C.TEST.FIX_RES = True 155 | _C.TEST.KEEP_RES = False 156 | 157 | _C.TEST.SOFT_NMS = False 158 | _C.TEST.OKS_THRE = 0.5 159 | _C.TEST.VIS_THRESH = 0.3 160 | _C.TEST.KEYPOINT_THRESH = 0.2 161 | _C.TEST.NUM_MIN_KPT = 4 162 | _C.TEST.THRESH_HUMAN = 0.4 163 | 164 | _C.TEST.EVAL_ORACLE_HM = False 165 | _C.TEST.EVAL_ORACLE_WH = False 166 | _C.TEST.EVAL_ORACLE_OFFSET = False 167 | _C.TEST.EVAL_ORACLE_KPS = False 168 | _C.TEST.EVAL_ORACLE_HMHP = False 169 | _C.TEST.EVAL_ORACLE_HP_OFFSET = False 170 | _C.TEST.EVAL_ORACLE_DEP = False 171 | 172 | 173 | def update_config(cfg, args_cfg): 174 | 175 | cfg.defrost() 176 | cfg.merge_from_file(args_cfg) 177 | cfg.freeze() 178 | 179 | 180 | if __name__ == '__main__': 181 | import sys 182 | 183 | with open(sys.argv[1], 'w') as f: 184 | print(_C, file=f) 185 | -------------------------------------------------------------------------------- /lib/datasets/coco_hp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import json 4 | import os 5 | import time 6 | 7 | import numpy as np 8 | import pycocotools.coco as coco 9 | import torch.utils.data as data 10 | from pycocotools.cocoeval import COCOeval 11 | 12 | 13 | class COCOHP(data.Dataset): 14 | num_classes = 1 15 | num_joints = 17 16 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 17 | [11, 12], [13, 14], [15, 16]] 18 | 19 | def __init__(self, cfg, split): 20 | super(COCOHP, self).__init__() 21 | 22 | self.data_dir = os.path.join(cfg.DATA_DIR, 'coco') 23 | self.img_dir = os.path.join(self.data_dir, 'images', '{}2017'.format(split)) 24 | if split == 'test': 25 | self.annot_path = os.path.join( 26 | self.data_dir, 'annotations', 27 | 'image_info_test-dev2017.json').format(split) 28 | else: 29 | self.annot_path = os.path.join( 30 | self.data_dir, 'annotations', 31 | 'person_keypoints_{}2017.json').format(split) 32 | self.max_objs = 32 33 | self._valid_ids = [1] 34 | self.class_name = ['__background__', 'person'] 35 | self._data_rng = np.random.RandomState(123) 36 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], 37 | dtype=np.float32) 38 | self._eig_vec = np.array([ 39 | [-0.58752847, -0.69563484, 0.41340352], 40 | [-0.5832747, 0.00994535, -0.81221408], 41 | [-0.56089297, 0.71832671, 0.41158938] 42 | ], dtype=np.float32) 43 | self.split = split 44 | self.cfg = cfg 45 | 46 | print('==> initializing coco 2017 {} data.'.format(split)) 47 | self.coco = coco.COCO(self.annot_path) 48 | images = self.coco.getImgIds() 49 | catIds = self.coco.getCatIds(self.class_name[-1]) 50 | assert catIds == self._valid_ids 51 | self.images = self.coco.getImgIds(images,catIds) 52 | self.num_samples = len(self.images) 53 | 54 | print('Loaded {} {} samples'.format(split, self.num_samples)) 55 | 56 | def _to_float(self, x): 57 | return float("{:.2f}".format(x)) 58 | 59 | def convert_eval_format(self, all_bboxes): 60 | detections = [] 61 | for image_id in all_bboxes: 62 | category_id = 1 63 | for dets in all_bboxes[image_id][category_id]: 64 | bbox = dets[:4] 65 | bbox[2] -= bbox[0] 66 | bbox[3] -= bbox[1] 67 | score = dets[4] 68 | keypoint_prob = np.array(np.array(dets[39:56])>0.1).astype(np.int32).reshape(17,1) 69 | keypoints = np.array(dets[5:39], dtype=np.float32).reshape(-1, 2) 70 | bbox_out = list(map(self._to_float, bbox)) 71 | keypoints_pred = np.concatenate([ 72 | keypoints, keypoint_prob], axis=1).reshape(51).tolist() 73 | keypoints_pred = list(map(self._to_float, keypoints_pred)) 74 | 75 | detection = { 76 | "image_id": int(image_id), 77 | "category_id": int(category_id), 78 | "bbox": bbox_out, 79 | "score": float("{:.2f}".format(score)), 80 | "keypoints": keypoints_pred 81 | } 82 | detections.append(detection) 83 | return detections 84 | 85 | def __len__(self): 86 | return self.num_samples 87 | 88 | def save_results(self, results, save_dir): 89 | json.dump(self.convert_eval_format(results), 90 | open('{}/results.json'.format(save_dir), 'w')) 91 | 92 | 93 | def run_eval(self, results, save_dir): 94 | #self.save_results(results, save_dir) 95 | #seconds = time.time() 96 | #local_time = time.ctime(seconds).replace(' ', '_').replace(':','_') 97 | #coco_dets = self.coco.loadRes('{}/{}_results.json'.format(save_dir, local_time)) 98 | coco_dets = self.coco.loadRes(self.convert_eval_format(results)) 99 | #coco_eval = COCOeval(self.coco, coco_dets, "bbox") 100 | #coco_eval.evaluate() 101 | #coco_eval.accumulate() 102 | 103 | coco_eval = COCOeval(self.coco, coco_dets, "keypoints") 104 | coco_eval.evaluate() 105 | coco_eval.accumulate() 106 | coco_eval.summarize() 107 | return coco_eval.stats[0] 108 | -------------------------------------------------------------------------------- /lib/datasets/data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | COCO_PERSON_SKELETON = [ 4 | (16, 14), (14, 12), (17, 15), (15, 13), (12, 13), (6, 12), (7, 13), 5 | (6, 7), (6, 8), (7, 9), (8, 10), (9, 11), (2, 3), (1, 2), (1, 3), 6 | (2, 4), (3, 5), (4, 6), (5, 7), 7 | ] 8 | 9 | COCO_KEYPOINTS = [ 10 | 'nose', # 1 11 | 'left_eye', # 2 12 | 'right_eye', # 3 13 | 'left_ear', # 4 14 | 'right_ear', # 5 15 | 'left_shoulder', # 6 16 | 'right_shoulder', # 7 17 | 'left_elbow', # 8 18 | 'right_elbow', # 9 19 | 'left_wrist', # 10 20 | 'right_wrist', # 11 21 | 'left_hip', # 12 22 | 'right_hip', # 13 23 | 'left_knee', # 14 24 | 'right_knee', # 15 25 | 'left_ankle', # 16 26 | 'right_ankle', # 17 27 | ] 28 | 29 | 30 | HFLIP = { 31 | 'left_eye': 'right_eye', 32 | 'right_eye': 'left_eye', 33 | 'left_ear': 'right_ear', 34 | 'right_ear': 'left_ear', 35 | 'left_shoulder': 'right_shoulder', 36 | 'right_shoulder': 'left_shoulder', 37 | 'left_elbow': 'right_elbow', 38 | 'right_elbow': 'left_elbow', 39 | 'left_wrist': 'right_wrist', 40 | 'right_wrist': 'left_wrist', 41 | 'left_hip': 'right_hip', 42 | 'right_hip': 'left_hip', 43 | 'left_knee': 'right_knee', 44 | 'right_knee': 'left_knee', 45 | 'left_ankle': 'right_ankle', 46 | 'right_ankle': 'left_ankle', 47 | } 48 | -------------------------------------------------------------------------------- /lib/datasets/dataset_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | from .coco_hp import COCOHP 4 | from .multi_pose import MultiPoseDataset 5 | 6 | dataset_factory = { 7 | 'coco_hp': COCOHP, 8 | } 9 | 10 | _sample_factory = { 11 | 'multi_pose': MultiPoseDataset, 12 | } 13 | 14 | 15 | def get_dataset(dataset, task): 16 | class Dataset(dataset_factory[dataset], _sample_factory[task]): 17 | pass 18 | return Dataset 19 | -------------------------------------------------------------------------------- /lib/detectors/base_detector.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import time 4 | 5 | import cv2 6 | import numpy as np 7 | import torch 8 | from progress.bar import Bar 9 | 10 | from models.model import create_model, load_model 11 | from utils.debugger import Debugger 12 | from utils.image import get_affine_transform 13 | 14 | 15 | class BaseDetector(object): 16 | def __init__(self, cfg): 17 | 18 | print('Creating model...') 19 | self.model = create_model(cfg.MODEL.NAME, cfg.MODEL.HEAD_CONV, cfg) 20 | self.model = load_model(self.model, cfg.TEST.MODEL_PATH) 21 | self.model = self.model.to(torch.device('cuda')) 22 | self.model.eval() 23 | 24 | self.mean = np.array(cfg.DATASET.MEAN, dtype=np.float32).reshape(1, 1, 3) 25 | self.std = np.array(cfg.DATASET.STD, dtype=np.float32).reshape(1, 1, 3) 26 | self.max_per_image = 100 27 | self.num_classes = cfg.MODEL.NUM_CLASSES 28 | self.scales = cfg.TEST.TEST_SCALES 29 | self.cfg = cfg 30 | self.pause = True 31 | 32 | def pre_process(self, image, scale, meta=None): 33 | height, width = image.shape[0:2] 34 | 35 | new_height = int(height * scale) 36 | new_width = int(width * scale) 37 | if self.cfg.TEST.FIX_RES: 38 | inp_height, inp_width = self.cfg.MODEL.INPUT_H, self.cfg.MODEL.INPUT_W 39 | c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) 40 | s = max(height, width) * 1.0 41 | else: 42 | inp_height = (new_height | self.cfg.MODEL.PAD) + 1 43 | inp_width = (new_width | self.cfg.MODEL.PAD) + 1 44 | c = np.array([new_width // 2, new_height // 2], dtype=np.float32) 45 | s = np.array([inp_width, inp_height], dtype=np.float32) 46 | 47 | trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) 48 | resized_image = cv2.resize(image, (new_width, new_height)) 49 | inp_image = cv2.warpAffine( 50 | resized_image, trans_input, (inp_width, inp_height), 51 | flags=cv2.INTER_LINEAR) 52 | 53 | inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32) 54 | 55 | images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width) 56 | if self.cfg.TEST.FLIP_TEST: 57 | images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) 58 | images = torch.from_numpy(images) 59 | meta = {'c': c, 's': s, 60 | 'out_height': inp_height // self.cfg.MODEL.DOWN_RATIO, 61 | 'out_width': inp_width // self.cfg.MODEL.DOWN_RATIO} 62 | return images, meta 63 | 64 | def process(self, images, return_time=False): 65 | raise NotImplementedError 66 | 67 | def post_process(self, dets, meta, scale=1): 68 | raise NotImplementedError 69 | 70 | def merge_outputs(self, detections): 71 | raise NotImplementedError 72 | 73 | def debug(self, debugger, images, dets, output, scale=1): 74 | raise NotImplementedError 75 | 76 | def show_results(self, debugger, image, results): 77 | raise NotImplementedError 78 | 79 | def run(self, image_or_path_or_tensor, meta=None): 80 | load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 81 | merge_time, tot_time = 0, 0 82 | debugger = Debugger((self.cfg.DEBUG==3), theme=self.cfg.DEBUG_THEME, 83 | num_classes=self.cfg.MODEL.NUM_CLASSES, dataset=self.cfg.SAMPLE_METHOD, down_ratio=self.cfg.MODEL.DOWN_RATIO) 84 | start_time = time.time() 85 | pre_processed = False 86 | if isinstance(image_or_path_or_tensor, np.ndarray): 87 | image = image_or_path_or_tensor 88 | elif type(image_or_path_or_tensor) == type (''): 89 | image = cv2.imread(image_or_path_or_tensor) 90 | else: 91 | image = image_or_path_or_tensor['image'][0].numpy() 92 | pre_processed_images = image_or_path_or_tensor 93 | pre_processed = True 94 | 95 | loaded_time = time.time() 96 | load_time += (loaded_time - start_time) 97 | 98 | detections = [] 99 | for scale in self.scales: 100 | scale_start_time = time.time() 101 | if not pre_processed: 102 | images, meta = self.pre_process(image, scale, meta) 103 | else: 104 | images = pre_processed_images['images'][scale][0] 105 | meta = pre_processed_images['meta'][scale] 106 | meta = {k: v.numpy()[0] for k, v in meta.items()} 107 | images = images.to(torch.device('cuda')) 108 | torch.cuda.synchronize() 109 | pre_process_time = time.time() 110 | pre_time += pre_process_time - scale_start_time 111 | 112 | output, dets, forward_time = self.process(images, return_time=True) 113 | 114 | torch.cuda.synchronize() 115 | net_time += forward_time - pre_process_time 116 | decode_time = time.time() 117 | dec_time += decode_time - forward_time 118 | 119 | if self.cfg.DEBUG >= 2: 120 | self.debug(debugger, images, dets, output, scale) 121 | 122 | dets= self.post_process(dets, meta, scale) 123 | torch.cuda.synchronize() 124 | post_process_time = time.time() 125 | post_time += post_process_time - decode_time 126 | 127 | detections.append(dets) 128 | 129 | results = self.merge_outputs(detections) 130 | torch.cuda.synchronize() 131 | end_time = time.time() 132 | merge_time += end_time - post_process_time 133 | tot_time += end_time - start_time 134 | 135 | if self.cfg.DEBUG >= 1: 136 | self.show_results(debugger, image, results) 137 | 138 | return {'results': {1:results}, 'tot': tot_time, 'load': load_time, 139 | 'pre': pre_time, 'net': net_time, 'dec': dec_time, 140 | 'post': post_time, 'merge': merge_time} 141 | -------------------------------------------------------------------------------- /lib/detectors/detector_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | from .multi_pose import MultiPoseDetector 4 | 5 | detector_factory = { 6 | 'multi_pose': MultiPoseDetector, 7 | } 8 | -------------------------------------------------------------------------------- /lib/detectors/multi_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import time 4 | 5 | import cv2 6 | import numpy as np 7 | import torch 8 | from progress.bar import Bar 9 | 10 | from models.decode import multi_pose_decode 11 | from models.utils import flip_lr, flip_lr_off, flip_tensor 12 | from utils.debugger import Debugger 13 | from utils.image import get_affine_transform 14 | from utils.post_process import multi_pose_post_process 15 | from .base_detector import BaseDetector 16 | 17 | try: 18 | from external.nms import soft_nms_39 19 | except: 20 | print('NMS not imported! If you need it,' 21 | ' do \n cd $CenterNet_ROOT/src/lib/external \n make') 22 | 23 | 24 | class MultiPoseDetector(BaseDetector): 25 | def __init__(self, cfg): 26 | super(MultiPoseDetector, self).__init__(cfg) 27 | self.flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] 28 | 29 | def process(self, images, return_time=False): 30 | with torch.no_grad(): 31 | torch.cuda.synchronize() 32 | outputs = self.model(images) 33 | hm, wh, hps, reg, hm_hp, hp_offset = outputs 34 | 35 | hm = hm.sigmoid_() 36 | if self.cfg.LOSS.HM_HP and not self.cfg.LOSS.MSE_LOSS: 37 | hm_hp = hm_hp.sigmoid_() 38 | 39 | reg = reg if self.cfg.LOSS.REG_OFFSET else None 40 | hm_hp = hm_hp if self.cfg.LOSS.HM_HP else None 41 | hp_offset = hp_offset if self.cfg.LOSS.REG_HP_OFFSET else None 42 | torch.cuda.synchronize() 43 | forward_time = time.time() 44 | 45 | if self.cfg.TEST.FLIP_TEST: 46 | hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2 47 | wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2 48 | hps = (hps[0:1] + 49 | flip_lr_off(hps[1:2], self.flip_idx)) / 2 50 | hm_hp = (hm_hp[0:1] + flip_lr(hm_hp[1:2], self.flip_idx)) / 2 \ 51 | if hm_hp is not None else None 52 | reg = reg[0:1] if reg is not None else None 53 | hp_offset = hp_offset[0:1] if hp_offset is not None else None 54 | 55 | dets = multi_pose_decode(hm, wh, hps, reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.cfg.TEST.TOPK) 56 | 57 | if return_time: 58 | return outputs, dets, forward_time 59 | else: 60 | return outputs, dets 61 | 62 | def post_process(self, dets, meta, scale=1): 63 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 64 | dets= multi_pose_post_process( 65 | dets.copy(), [meta['c']], [meta['s']], 66 | meta['out_height'], meta['out_width']) 67 | for j in range(1, self.num_classes + 1): 68 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 56) 69 | dets[0][j][:, :4] /= scale 70 | dets[0][j][:, 5:39] /= scale 71 | return dets[0] 72 | 73 | def merge_outputs(self, detections): 74 | results = np.concatenate( 75 | [detection[1] for detection in detections], axis=0).astype(np.float32) 76 | if self.cfg.TEST.NMS or len(self.cfg.TEST.TEST_SCALES) > 1: 77 | soft_nms_39(results, Nt=0.5, method=2) 78 | results = results.tolist() 79 | return results 80 | 81 | 82 | def debug(self, debugger, images, dets, output, scale=1): 83 | dets = dets.detach().cpu().numpy().copy() 84 | dets[:, :, :4] *= self.cfg.MODEL.DOWN_RATIO 85 | dets[:, :, 5:39] *= self.cfg.MODEL.DOWN_RATIO 86 | img = images[0].detach().cpu().numpy().transpose(1, 2, 0) 87 | img = np.clip((( 88 | img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8) 89 | pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy()) 90 | debugger.add_blend_img(img, pred, 'pred_hm') 91 | if self.LOSS.HM_HP: 92 | pred = debugger.gen_colormap_hp( 93 | output['hm_hp'][0].detach().cpu().numpy()) 94 | debugger.add_blend_img(img, pred, 'pred_hmhp') 95 | 96 | def show_results(self, debugger, image, results): 97 | debugger.add_img(image, img_id='multi_pose') 98 | for b_id, detection in enumerate(results): 99 | bbox = detection[:4] 100 | bbox_prob = detection[4] 101 | keypoints = detection[5:39] 102 | keypoints_prob = detection[39:] 103 | if bbox_prob > self.cfg.TEST.VIS_THRESH: 104 | debugger.add_coco_bbox(bbox, 0, bbox_prob, img_id='multi_pose') 105 | debugger.add_coco_hp(keypoints, keypoints_prob, img_id='multi_pose') 106 | 107 | debugger.show_all_imgs(pause=self.pause) 108 | -------------------------------------------------------------------------------- /lib/external/.gitignore: -------------------------------------------------------------------------------- 1 | bbox.c 2 | bbox.cpython-35m-x86_64-linux-gnu.so 3 | bbox.cpython-36m-x86_64-linux-gnu.so 4 | 5 | nms.c 6 | nms.cpython-35m-x86_64-linux-gnu.so 7 | nms.cpython-36m-x86_64-linux-gnu.so 8 | -------------------------------------------------------------------------------- /lib/external/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /lib/external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/external/__init__.py -------------------------------------------------------------------------------- /lib/external/build/temp.linux-x86_64-3.6/nms.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/external/build/temp.linux-x86_64-3.6/nms.o -------------------------------------------------------------------------------- /lib/external/make.sh: -------------------------------------------------------------------------------- 1 | python setup.py build_ext --inplace 2 | -------------------------------------------------------------------------------- /lib/external/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | 4 | import numpy 5 | from Cython.Build import cythonize 6 | 7 | extensions = [ 8 | Extension( 9 | "nms", 10 | ["nms.pyx"], 11 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] 12 | ) 13 | ] 14 | 15 | setup( 16 | name="coco", 17 | ext_modules=cythonize(extensions), 18 | include_dirs=[numpy.get_include()] 19 | ) 20 | -------------------------------------------------------------------------------- /lib/logger.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 4 | import os 5 | import sys 6 | import time 7 | 8 | import torch 9 | 10 | USE_TENSORBOARD = False 11 | try: 12 | import tensorboardX 13 | print('Using tensorboardX') 14 | except: 15 | USE_TENSORBOARD = False 16 | 17 | class Logger(object): 18 | def __init__(self, cfg): 19 | """Create a summary writer logging to log_dir.""" 20 | if not os.path.exists(cfg.OUTPUT_DIR): 21 | try: 22 | os.makedirs(cfg.OUTPUT_DIR) 23 | except: 24 | pass 25 | time_str = time.strftime('%Y-%m-%d-%H-%M') 26 | 27 | file_name = os.path.join(cfg.OUTPUT_DIR, 'opt.txt') 28 | with open(file_name, 'wt') as opt_file: 29 | opt_file.write('==> torch version: {}\n'.format(torch.__version__)) 30 | opt_file.write('==> cudnn version: {}\n'.format( 31 | torch.backends.cudnn.version())) 32 | opt_file.write('==> Cmd:\n') 33 | opt_file.write(str(sys.argv)) 34 | opt_file.write('\n==> Opt:\n') 35 | 36 | log_dir = cfg.OUTPUT_DIR + '/logs_{}'.format(time_str) 37 | if USE_TENSORBOARD: 38 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir) 39 | else: 40 | try: 41 | os.makedirs(os.path.dirname(log_dir)) 42 | except: 43 | pass 44 | try: 45 | os.makedirs(log_dir) 46 | except: 47 | pass 48 | self.log = open(log_dir + '/log.txt', 'w') 49 | try: 50 | os.system('cp {}/opt.txt {}/'.format(cfg.OUTPUT_DIR, log_dir)) 51 | except: 52 | pass 53 | self.start_line = True 54 | 55 | def write(self, txt): 56 | if self.start_line: 57 | time_str = time.strftime('%Y-%m-%d-%H-%M') 58 | self.log.write('{}: {}'.format(time_str, txt)) 59 | else: 60 | self.log.write(txt) 61 | self.start_line = False 62 | if '\n' in txt: 63 | self.start_line = True 64 | self.log.flush() 65 | 66 | def close(self): 67 | self.log.close() 68 | 69 | def scalar_summary(self, tag, value, step): 70 | """Log a scalar variable.""" 71 | if USE_TENSORBOARD: 72 | self.writer.add_scalar(tag, value, step) 73 | -------------------------------------------------------------------------------- /lib/models/backbones/DCNv2/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Charles Shang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /lib/models/backbones/DCNv2/README.md: -------------------------------------------------------------------------------- 1 | ## Deformable Convolutional Networks V2 with Pytorch 1.0 2 | 3 | ### Build 4 | ```bash 5 | ./make.sh # build 6 | python test.py # run examples and gradient check 7 | ``` 8 | 9 | ### An Example 10 | - deformable conv 11 | ```python 12 | from dcn_v2 import DCN 13 | input = torch.randn(2, 64, 128, 128).cuda() 14 | # wrap all things (offset and mask) in DCN 15 | dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda() 16 | output = dcn(input) 17 | print(output.shape) 18 | ``` 19 | - deformable roi pooling 20 | ```python 21 | from dcn_v2 import DCNPooling 22 | input = torch.randn(2, 32, 64, 64).cuda() 23 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 24 | x = torch.randint(256, (20, 1)).cuda().float() 25 | y = torch.randint(256, (20, 1)).cuda().float() 26 | w = torch.randint(64, (20, 1)).cuda().float() 27 | h = torch.randint(64, (20, 1)).cuda().float() 28 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 29 | 30 | # mdformable pooling (V2) 31 | # wrap all things (offset and mask) in DCNPooling 32 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 33 | pooled_size=7, 34 | output_dim=32, 35 | no_trans=False, 36 | group_size=1, 37 | trans_std=0.1).cuda() 38 | 39 | dout = dpooling(input, rois) 40 | ``` 41 | ### Note 42 | Now the master branch is for pytorch 1.0 (new ATen API), you can switch back to pytorch 0.4 with, 43 | ```bash 44 | git checkout pytorch_0.4 45 | ``` 46 | 47 | ### Known Issues: 48 | 49 | - [x] Gradient check w.r.t offset (solved) 50 | - [ ] Backward is not reentrant (minor) 51 | 52 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op). 53 | 54 | I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes. 55 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some 56 | non-differential points? 57 | 58 | Update: all gradient check passes with double precision. 59 | 60 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 61 | float `<1e-15` for double), 62 | so it may not be a serious problem (?) 63 | 64 | Please post an issue or PR if you have any comments. 65 | -------------------------------------------------------------------------------- /lib/models/backbones/DCNv2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/DCNv2/__init__.py -------------------------------------------------------------------------------- /lib/models/backbones/DCNv2/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python setup.py build develop 3 | -------------------------------------------------------------------------------- /lib/models/backbones/DCNv2/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import glob 4 | import os 5 | 6 | import torch 7 | from setuptools import find_packages, setup 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension 9 | 10 | requirements = ["torch", "torchvision"] 11 | 12 | def get_extensions(): 13 | this_dir = os.path.dirname(os.path.abspath(__file__)) 14 | extensions_dir = os.path.join(this_dir, "src") 15 | 16 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 17 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 18 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 19 | 20 | sources = main_file + source_cpu 21 | extension = CppExtension 22 | extra_compile_args = {"cxx": []} 23 | define_macros = [] 24 | 25 | if torch.cuda.is_available() and CUDA_HOME is not None: 26 | extension = CUDAExtension 27 | sources += source_cuda 28 | define_macros += [("WITH_CUDA", None)] 29 | extra_compile_args["nvcc"] = [ 30 | "-DCUDA_HAS_FP16=1", 31 | "-D__CUDA_NO_HALF_OPERATORS__", 32 | "-D__CUDA_NO_HALF_CONVERSIONS__", 33 | "-D__CUDA_NO_HALF2_OPERATORS__", 34 | ] 35 | else: 36 | raise NotImplementedError('Cuda is not availabel') 37 | 38 | sources = [os.path.join(extensions_dir, s) for s in sources] 39 | include_dirs = [extensions_dir] 40 | ext_modules = [ 41 | extension( 42 | "_ext", 43 | sources, 44 | include_dirs=include_dirs, 45 | define_macros=define_macros, 46 | extra_compile_args=extra_compile_args, 47 | ) 48 | ] 49 | return ext_modules 50 | 51 | setup( 52 | name="DCNv2", 53 | version="0.1", 54 | author="charlesshang", 55 | url="https://github.com/charlesshang/DCNv2", 56 | description="deformable convolutional networks", 57 | packages=find_packages(exclude=("configs", "tests",)), 58 | # install_requires=requirements, 59 | ext_modules=get_extensions(), 60 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 61 | ) 62 | -------------------------------------------------------------------------------- /lib/models/backbones/DCNv2/src/cpu/dcn_v2_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | 7 | at::Tensor 8 | dcn_v2_cpu_forward(const at::Tensor &input, 9 | const at::Tensor &weight, 10 | const at::Tensor &bias, 11 | const at::Tensor &offset, 12 | const at::Tensor &mask, 13 | const int kernel_h, 14 | const int kernel_w, 15 | const int stride_h, 16 | const int stride_w, 17 | const int pad_h, 18 | const int pad_w, 19 | const int dilation_h, 20 | const int dilation_w, 21 | const int deformable_group) 22 | { 23 | AT_ERROR("Not implement on cpu"); 24 | } 25 | 26 | std::vector 27 | dcn_v2_cpu_backward(const at::Tensor &input, 28 | const at::Tensor &weight, 29 | const at::Tensor &bias, 30 | const at::Tensor &offset, 31 | const at::Tensor &mask, 32 | const at::Tensor &grad_output, 33 | int kernel_h, int kernel_w, 34 | int stride_h, int stride_w, 35 | int pad_h, int pad_w, 36 | int dilation_h, int dilation_w, 37 | int deformable_group) 38 | { 39 | AT_ERROR("Not implement on cpu"); 40 | } 41 | 42 | std::tuple 43 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input, 44 | const at::Tensor &bbox, 45 | const at::Tensor &trans, 46 | const int no_trans, 47 | const float spatial_scale, 48 | const int output_dim, 49 | const int group_size, 50 | const int pooled_size, 51 | const int part_size, 52 | const int sample_per_part, 53 | const float trans_std) 54 | { 55 | AT_ERROR("Not implement on cpu"); 56 | } 57 | 58 | std::tuple 59 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad, 60 | const at::Tensor &input, 61 | const at::Tensor &bbox, 62 | const at::Tensor &trans, 63 | const at::Tensor &top_count, 64 | const int no_trans, 65 | const float spatial_scale, 66 | const int output_dim, 67 | const int group_size, 68 | const int pooled_size, 69 | const int part_size, 70 | const int sample_per_part, 71 | const float trans_std) 72 | { 73 | AT_ERROR("Not implement on cpu"); 74 | } -------------------------------------------------------------------------------- /lib/models/backbones/DCNv2/src/cpu/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cpu_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cpu_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /lib/models/backbones/DCNv2/src/cuda/dcn_v2_im2col_cuda.h: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 4 | * 5 | * COPYRIGHT 6 | * 7 | * All contributions by the University of California: 8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 9 | * All rights reserved. 10 | * 11 | * All other contributions: 12 | * Copyright (c) 2014-2017, the respective contributors 13 | * All rights reserved. 14 | * 15 | * Caffe uses a shared copyright model: each contributor holds copyright over 16 | * their contributions to Caffe. The project versioning records all such 17 | * contribution and copyright details. If a contributor wants to further mark 18 | * their specific copyright on a particular contribution, they should indicate 19 | * their copyright solely in the commit message of the change when it is 20 | * committed. 21 | * 22 | * LICENSE 23 | * 24 | * Redistribution and use in source and binary forms, with or without 25 | * modification, are permitted provided that the following conditions are met: 26 | * 27 | * 1. Redistributions of source code must retain the above copyright notice, this 28 | * list of conditions and the following disclaimer. 29 | * 2. Redistributions in binary form must reproduce the above copyright notice, 30 | * this list of conditions and the following disclaimer in the documentation 31 | * and/or other materials provided with the distribution. 32 | * 33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | * 44 | * CONTRIBUTION AGREEMENT 45 | * 46 | * By contributing to the BVLC/caffe repository through pull-request, comment, 47 | * or otherwise, the contributor releases their content to the 48 | * license and copyright terms herein. 49 | * 50 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 51 | * 52 | * Copyright (c) 2018 Microsoft 53 | * Licensed under The MIT License [see LICENSE for details] 54 | * \file modulated_deformable_im2col.h 55 | * \brief Function definitions of converting an image to 56 | * column matrix based on kernel, padding, dilation, and offset. 57 | * These functions are mainly used in deformable convolution operators. 58 | * \ref: https://arxiv.org/abs/1811.11168 59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 60 | */ 61 | 62 | /***************** Adapted by Charles Shang *********************/ 63 | 64 | #ifndef DCN_V2_IM2COL_CUDA 65 | #define DCN_V2_IM2COL_CUDA 66 | 67 | #ifdef __cplusplus 68 | extern "C" 69 | { 70 | #endif 71 | 72 | void modulated_deformable_im2col_cuda(cudaStream_t stream, 73 | const float *data_im, const float *data_offset, const float *data_mask, 74 | const int batch_size, const int channels, const int height_im, const int width_im, 75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 77 | const int dilation_h, const int dilation_w, 78 | const int deformable_group, float *data_col); 79 | 80 | void modulated_deformable_col2im_cuda(cudaStream_t stream, 81 | const float *data_col, const float *data_offset, const float *data_mask, 82 | const int batch_size, const int channels, const int height_im, const int width_im, 83 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 84 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 85 | const int dilation_h, const int dilation_w, 86 | const int deformable_group, float *grad_im); 87 | 88 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream, 89 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 90 | const int batch_size, const int channels, const int height_im, const int width_im, 91 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 92 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 93 | const int dilation_h, const int dilation_w, 94 | const int deformable_group, 95 | float *grad_offset, float *grad_mask); 96 | 97 | #ifdef __cplusplus 98 | } 99 | #endif 100 | 101 | #endif -------------------------------------------------------------------------------- /lib/models/backbones/DCNv2/src/cuda/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cuda_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cuda_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /lib/models/backbones/DCNv2/src/dcn_v2.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | at::Tensor 10 | dcn_v2_forward(const at::Tensor &input, 11 | const at::Tensor &weight, 12 | const at::Tensor &bias, 13 | const at::Tensor &offset, 14 | const at::Tensor &mask, 15 | const int kernel_h, 16 | const int kernel_w, 17 | const int stride_h, 18 | const int stride_w, 19 | const int pad_h, 20 | const int pad_w, 21 | const int dilation_h, 22 | const int dilation_w, 23 | const int deformable_group) 24 | { 25 | if (input.type().is_cuda()) 26 | { 27 | #ifdef WITH_CUDA 28 | return dcn_v2_cuda_forward(input, weight, bias, offset, mask, 29 | kernel_h, kernel_w, 30 | stride_h, stride_w, 31 | pad_h, pad_w, 32 | dilation_h, dilation_w, 33 | deformable_group); 34 | #else 35 | AT_ERROR("Not compiled with GPU support"); 36 | #endif 37 | } 38 | AT_ERROR("Not implemented on the CPU"); 39 | } 40 | 41 | std::vector 42 | dcn_v2_backward(const at::Tensor &input, 43 | const at::Tensor &weight, 44 | const at::Tensor &bias, 45 | const at::Tensor &offset, 46 | const at::Tensor &mask, 47 | const at::Tensor &grad_output, 48 | int kernel_h, int kernel_w, 49 | int stride_h, int stride_w, 50 | int pad_h, int pad_w, 51 | int dilation_h, int dilation_w, 52 | int deformable_group) 53 | { 54 | if (input.type().is_cuda()) 55 | { 56 | #ifdef WITH_CUDA 57 | return dcn_v2_cuda_backward(input, 58 | weight, 59 | bias, 60 | offset, 61 | mask, 62 | grad_output, 63 | kernel_h, kernel_w, 64 | stride_h, stride_w, 65 | pad_h, pad_w, 66 | dilation_h, dilation_w, 67 | deformable_group); 68 | #else 69 | AT_ERROR("Not compiled with GPU support"); 70 | #endif 71 | } 72 | AT_ERROR("Not implemented on the CPU"); 73 | } 74 | 75 | std::tuple 76 | dcn_v2_psroi_pooling_forward(const at::Tensor &input, 77 | const at::Tensor &bbox, 78 | const at::Tensor &trans, 79 | const int no_trans, 80 | const float spatial_scale, 81 | const int output_dim, 82 | const int group_size, 83 | const int pooled_size, 84 | const int part_size, 85 | const int sample_per_part, 86 | const float trans_std) 87 | { 88 | if (input.type().is_cuda()) 89 | { 90 | #ifdef WITH_CUDA 91 | return dcn_v2_psroi_pooling_cuda_forward(input, 92 | bbox, 93 | trans, 94 | no_trans, 95 | spatial_scale, 96 | output_dim, 97 | group_size, 98 | pooled_size, 99 | part_size, 100 | sample_per_part, 101 | trans_std); 102 | #else 103 | AT_ERROR("Not compiled with GPU support"); 104 | #endif 105 | } 106 | AT_ERROR("Not implemented on the CPU"); 107 | } 108 | 109 | std::tuple 110 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad, 111 | const at::Tensor &input, 112 | const at::Tensor &bbox, 113 | const at::Tensor &trans, 114 | const at::Tensor &top_count, 115 | const int no_trans, 116 | const float spatial_scale, 117 | const int output_dim, 118 | const int group_size, 119 | const int pooled_size, 120 | const int part_size, 121 | const int sample_per_part, 122 | const float trans_std) 123 | { 124 | if (input.type().is_cuda()) 125 | { 126 | #ifdef WITH_CUDA 127 | return dcn_v2_psroi_pooling_cuda_backward(out_grad, 128 | input, 129 | bbox, 130 | trans, 131 | top_count, 132 | no_trans, 133 | spatial_scale, 134 | output_dim, 135 | group_size, 136 | pooled_size, 137 | part_size, 138 | sample_per_part, 139 | trans_std); 140 | #else 141 | AT_ERROR("Not compiled with GPU support"); 142 | #endif 143 | } 144 | AT_ERROR("Not implemented on the CPU"); 145 | } -------------------------------------------------------------------------------- /lib/models/backbones/DCNv2/src/vision.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "dcn_v2.h" 3 | 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 5 | m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward"); 6 | m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward"); 7 | m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward"); 8 | m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward"); 9 | } 10 | -------------------------------------------------------------------------------- /lib/models/backbones/Utitled Document: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/Utitled Document -------------------------------------------------------------------------------- /lib/models/backbones/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/__init__.py -------------------------------------------------------------------------------- /lib/models/backbones/darknet.py: -------------------------------------------------------------------------------- 1 | import math 2 | from collections import OrderedDict 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | class BasicBlock(nn.Module): 10 | def __init__(self, inplanes, planes): 11 | super(BasicBlock, self).__init__() 12 | self.conv1 = nn.Conv2d(inplanes, planes[0], kernel_size=1, 13 | stride=1, padding=0, bias=False) 14 | self.bn1 = nn.BatchNorm2d(planes[0]) 15 | self.relu1 = nn.LeakyReLU(0.1) 16 | self.conv2 = nn.Conv2d(planes[0], planes[1], kernel_size=3, 17 | stride=1, padding=1, bias=False) 18 | self.bn2 = nn.BatchNorm2d(planes[1]) 19 | self.relu2 = nn.LeakyReLU(0.1) 20 | 21 | def forward(self, x): 22 | residual = x 23 | 24 | out = self.conv1(x) 25 | out = self.bn1(out) 26 | out = self.relu1(out) 27 | 28 | out = self.conv2(out) 29 | out = self.bn2(out) 30 | out = self.relu2(out) 31 | 32 | out += residual 33 | return out 34 | 35 | 36 | class DarkNet(nn.Module): 37 | def __init__(self, layers): 38 | super(DarkNet, self).__init__() 39 | self.inplanes = 32 40 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False) 41 | self.bn1 = nn.BatchNorm2d(self.inplanes) 42 | self.relu1 = nn.LeakyReLU(0.1) 43 | 44 | self.layer1 = self._make_layer([32, 64], layers[0]) 45 | self.layer2 = self._make_layer([64, 128], layers[1]) 46 | self.layer3 = self._make_layer([128, 256], layers[2]) 47 | #self.layer4 = self._make_layer([256, 512], layers[3]) 48 | #self.layer5 = self._make_layer([512, 1024], layers[4]) 49 | 50 | self.layers_out_filters = [64, 128, 256] 51 | 52 | for m in self.modules(): 53 | if isinstance(m, nn.BatchNorm2d): 54 | m.weight.data.fill_(1) 55 | m.bias.data.zero_() 56 | 57 | def _make_layer(self, planes, blocks): 58 | layers = [] 59 | # downsample 60 | layers.append(("ds_conv", nn.Conv2d(self.inplanes, planes[1], kernel_size=3, 61 | stride=2, padding=1, bias=False))) 62 | layers.append(("ds_bn", nn.BatchNorm2d(planes[1]))) 63 | layers.append(("ds_relu", nn.LeakyReLU(0.1))) 64 | # blocks 65 | self.inplanes = planes[1] 66 | for i in range(0, blocks): 67 | layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes))) 68 | return nn.Sequential(OrderedDict(layers)) 69 | 70 | def forward(self, x): 71 | x = self.conv1(x) 72 | x = self.bn1(x) 73 | x = self.relu1(x) 74 | 75 | x = self.layer1(x) 76 | x = self.layer2(x) 77 | x = self.layer3(x) 78 | x = F.interpolate(x, size=(128, 128), 79 | mode="bilinear", align_corners=True) 80 | 81 | return x 82 | 83 | 84 | def darknet21(cfg,is_train=True, **kwargs): 85 | model = DarkNet([1, 1, 2, 2, 1]) 86 | if is_train and cfg.BACKBONE.INIT_WEIGHTS: 87 | if isinstance(cfg.BACKBONE.PRETRAINED, str): 88 | model.load_state_dict(torch.load(cfg.BACKBONE.PRETRAINED)) 89 | else: 90 | raise Exception("darknet request a pretrained path. got [{}]".format(cfg.BACKBONE.PRETRAINED)) 91 | return model 92 | 93 | def darknet53(num_layers, cfg): 94 | model = DarkNet([1, 2, 8]) 95 | #if is_train and cfg.BACKBONE.INIT_WEIGHTS: 96 | # if isinstance(cfg.BACKBONE.PRETRAINED, str): 97 | # model.load_state_dict(torch.load(cfg.BACKBONE.PRETRAINED)) 98 | # else: 99 | # raise Exception("darknet request a pretrained path. got [{}]".format(cfg.BACKBONE.PRETRAINED)) 100 | return model 101 | -------------------------------------------------------------------------------- /lib/models/backbones/efficientdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .efficientdet import EfficientDet 2 | 3 | 4 | def get_efficientdet(num_layers, cfg): 5 | model = EfficientDet(intermediate_channels=cfg.MODEL.INTERMEDIATE_CHANNEL) 6 | return model 7 | -------------------------------------------------------------------------------- /lib/models/backbones/efficientdet/efficientdet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | from .efficientnet import EfficientNet 5 | from .bifpn import BIFPN 6 | from .retinahead import RetinaHead 7 | from torchvision.ops import nms 8 | import torch.nn.functional as F 9 | 10 | MODEL_MAP = { 11 | 'efficientdet-d0': 'efficientnet-b0', 12 | 'efficientdet-d1': 'efficientnet-b1', 13 | 'efficientdet-d2': 'efficientnet-b2', 14 | 'efficientdet-d3': 'efficientnet-b3', 15 | 'efficientdet-d4': 'efficientnet-b4', 16 | 'efficientdet-d5': 'efficientnet-b5', 17 | 'efficientdet-d6': 'efficientnet-b6', 18 | 'efficientdet-d7': 'efficientnet-b6', 19 | } 20 | class EfficientDet(nn.Module): 21 | def __init__(self, 22 | intermediate_channels, 23 | network = 'efficientdet-d0', 24 | D_bifpn=3, 25 | W_bifpn=32, 26 | D_class=3, 27 | scale_ratios = [0.5, 1, 2, 4, 8,16,32], 28 | ): 29 | super(EfficientDet, self).__init__() 30 | self.backbone = EfficientNet.from_pretrained(MODEL_MAP[network]) 31 | self.neck = BIFPN(in_channels=self.backbone.get_list_features(), 32 | out_channels=W_bifpn, 33 | stack=D_bifpn, 34 | num_outs=7) 35 | self.bbox_head = RetinaHead(num_classes = intermediate_channels, 36 | in_channels = W_bifpn) 37 | 38 | self.scale_ratios = scale_ratios 39 | for m in self.modules(): 40 | if isinstance(m, nn.Conv2d): 41 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 42 | m.weight.data.normal_(0, math.sqrt(2. / n)) 43 | elif isinstance(m, nn.BatchNorm2d): 44 | m.weight.data.fill_(1) 45 | m.bias.data.zero_() 46 | self.freeze_bn() 47 | 48 | def forward(self, inputs): 49 | x = self.extract_feat(inputs) 50 | outs = self.bbox_head(x) 51 | 52 | return outs[0][1] 53 | 54 | def freeze_bn(self): 55 | '''Freeze BatchNorm layers.''' 56 | for layer in self.modules(): 57 | if isinstance(layer, nn.BatchNorm2d): 58 | layer.eval() 59 | def extract_feat(self, img): 60 | """ 61 | Directly extract features from the backbone+neck 62 | """ 63 | x = self.backbone(img) 64 | x = self.neck(x) 65 | return x 66 | 67 | -------------------------------------------------------------------------------- /lib/models/backbones/efficientdet/retinahead.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | from .conv_module import ConvModule, bias_init_with_prob, normal_init 5 | from six.moves import map, zip 6 | 7 | def multi_apply(func, *args, **kwargs): 8 | pfunc = partial(func, **kwargs) if kwargs else func 9 | map_results = map(pfunc, *args) 10 | return tuple(map(list, zip(*map_results))) 11 | 12 | class RetinaHead(nn.Module): 13 | """ 14 | An anchor-based head used in [1]_. 15 | The head contains two subnetworks. The first classifies anchor boxes and 16 | the second regresses deltas for the anchors. 17 | References: 18 | .. [1] https://arxiv.org/pdf/1708.02002.pdf 19 | Example: 20 | >>> import torch 21 | >>> self = RetinaHead(11, 7) 22 | >>> x = torch.rand(1, 7, 32, 32) 23 | >>> cls_score, bbox_pred = self.forward_single(x) 24 | >>> # Each anchor predicts a score for each class except background 25 | >>> cls_per_anchor = cls_score.shape[1] / self.num_anchors 26 | >>> box_per_anchor = bbox_pred.shape[1] / self.num_anchors 27 | >>> assert cls_per_anchor == (self.num_classes - 1) 28 | >>> assert box_per_anchor == 4 29 | """ 30 | 31 | def __init__(self, 32 | num_classes, 33 | in_channels, 34 | feat_channels=64, 35 | stacked_convs=4, 36 | octave_base_scale=4, 37 | scales_per_octave=3, 38 | conv_cfg=None, 39 | norm_cfg=None, 40 | **kwargs): 41 | super(RetinaHead, self).__init__() 42 | self.in_channels = in_channels 43 | self.num_classes = num_classes 44 | self.feat_channels = feat_channels 45 | self.stacked_convs = stacked_convs 46 | self.octave_base_scale = octave_base_scale 47 | self.scales_per_octave = scales_per_octave 48 | self.conv_cfg = conv_cfg 49 | self.norm_cfg = norm_cfg 50 | octave_scales = np.array( 51 | [2**(i / scales_per_octave) for i in range(scales_per_octave)]) 52 | self.cls_out_channels = num_classes 53 | self._init_layers() 54 | def _init_layers(self): 55 | self.relu = nn.ReLU(inplace=True) 56 | self.cls_convs = nn.ModuleList() 57 | #self.reg_convs = nn.ModuleList() 58 | for i in range(self.stacked_convs): 59 | chn = self.in_channels if i == 0 else self.feat_channels 60 | self.cls_convs.append( 61 | ConvModule( 62 | chn, 63 | self.feat_channels, 64 | 3, 65 | stride=1, 66 | padding=1, 67 | conv_cfg=self.conv_cfg, 68 | norm_cfg=self.norm_cfg)) 69 | self.retina_cls = nn.Conv2d( 70 | self.feat_channels, 71 | self.cls_out_channels, 72 | 3, 73 | padding=1) 74 | #self.output_act = nn.Sigmoid() 75 | 76 | def init_weights(self): 77 | for m in self.cls_convs: 78 | normal_init(m.conv, std=0.01) 79 | for m in self.reg_convs: 80 | normal_init(m.conv, std=0.01) 81 | bias_cls = bias_init_with_prob(0.01) 82 | normal_init(self.retina_cls, std=0.01, bias=bias_cls) 83 | #normal_init(self.retina_reg, std=0.01) 84 | 85 | def forward_single(self, x): 86 | cls_feat = x 87 | #reg_feat = x 88 | for cls_conv in self.cls_convs: 89 | cls_feat = cls_conv(cls_feat) 90 | #for reg_conv in self.reg_convs: 91 | # reg_feat = reg_conv(reg_feat) 92 | 93 | cls_score = self.retina_cls(cls_feat) 94 | # out is B x C x W x H, with C = n_classes + n_anchors 95 | #cls_score = cls_score.permute(0, 2, 3, 1) 96 | #batch_size, width, height, channels = cls_score.shape 97 | #cls_score = cls_score.view(batch_size, width, height, self.num_anchors, self.num_classes) 98 | #cls_score = cls_score.contiguous().view(x.size(0), -1, self.num_classes) 99 | 100 | 101 | #bbox_pred = self.retina_reg(reg_feat) 102 | #bbox_pred = bbox_pred.permute(0, 2, 3, 1) 103 | #bbox_pred = bbox_pred.contiguous().view(bbox_pred.size(0), -1, 4) 104 | return [cls_score] 105 | def forward(self, feats): 106 | return multi_apply(self.forward_single, feats) 107 | -------------------------------------------------------------------------------- /lib/models/backbones/mobilenet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/mobilenet/__init__.py -------------------------------------------------------------------------------- /lib/models/backbones/test_mode.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from darknet import darknet53 4 | from hardnet import hardnet 5 | 6 | model = hardnet(19).cuda() 7 | inputs = torch.randn((1,3,512,512)).cuda() 8 | 9 | outs = model(inputs) 10 | 11 | print(outs.shape) 12 | 13 | 14 | 15 | model = darknet53(0,1,2).cuda() 16 | 17 | inputs = torch.randn((1,3,512,512)).cuda() 18 | 19 | outs = model(inputs) 20 | 21 | print(outs.shape) 22 | -------------------------------------------------------------------------------- /lib/models/heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/heads/__init__.py -------------------------------------------------------------------------------- /lib/models/heads/keypoint.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | 9 | class KeypointHead(nn.Module): 10 | 11 | def __init__(self, intermediate_channel, head_conv): 12 | super(KeypointHead, self).__init__() 13 | 14 | self.hm = nn.Sequential( 15 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True), 16 | nn.ReLU(inplace=True), 17 | nn.Conv2d(head_conv, 1, kernel_size=1, stride=1, padding=0)) 18 | self.wh = nn.Sequential( 19 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True), 20 | nn.ReLU(inplace=True), 21 | nn.Conv2d(head_conv, 2, kernel_size=1, stride=1, padding=0)) 22 | self.hps = nn.Sequential( 23 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(head_conv, 34, kernel_size=1, stride=1, padding=0)) 26 | self.reg = nn.Sequential( 27 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True), 28 | nn.ReLU(inplace=True), 29 | nn.Conv2d(head_conv, 2, kernel_size=1, stride=1, padding=0)) 30 | self.hm_hp = nn.Sequential( 31 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True), 32 | nn.ReLU(inplace=True), 33 | nn.Conv2d(head_conv, 17, kernel_size=1, stride=1, padding=0)) 34 | self.hp_offset = nn.Sequential( 35 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True), 36 | nn.ReLU(inplace=True), 37 | nn.Conv2d(head_conv, 2, kernel_size=1, stride=1, padding=0)) 38 | self.init_weights() 39 | 40 | def forward(self, x): 41 | 42 | return [self.hm(x), self.wh(x), self.hps(x), self.reg(x), self.hm_hp(x), self.hp_offset(x)] 43 | 44 | def init_weights(self): 45 | self.hm[-1].bias.data.fill_(-2.19) 46 | self.hm_hp[-1].bias.data.fill_(-2.19) 47 | self.fill_fc_weights(self.wh) 48 | self.fill_fc_weights(self.hps) 49 | self.fill_fc_weights(self.reg) 50 | self.fill_fc_weights(self.hp_offset) 51 | 52 | def fill_fc_weights(self, layers): 53 | for m in layers.modules(): 54 | if isinstance(m, nn.Conv2d): 55 | nn.init.normal_(m.weight, std=0.001) 56 | if m.bias is not None: 57 | nn.init.constant_(m.bias, 0) 58 | -------------------------------------------------------------------------------- /lib/models/heads/mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/heads/mask.py -------------------------------------------------------------------------------- /lib/models/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torchvision.models as models 8 | 9 | from .backbones.darknet import darknet53 10 | from .backbones.dlav0 import get_pose_net as get_dlav0 11 | from .backbones.hardnet import get_hard_net 12 | from .backbones.large_hourglass import get_large_hourglass_net 13 | from .backbones.mobilenet.mobilenetv3 import get_mobile_pose_netv3 14 | from .backbones.mobilenet.mobilenetv2 import get_mobile_pose_netv2 15 | from .backbones.msra_resnet import get_resnet 16 | from .backbones.pose_dla_dcn import get_pose_net as get_dla_dcn 17 | from .backbones.pose_higher_hrnet import get_hrpose_net 18 | from .backbones.resnet_dcn import get_pose_net as get_pose_net_dcn 19 | from .backbones.shufflenetv2_dcn import get_shufflev2_net 20 | from .backbones.ghost_net import get_ghost_net 21 | from .backbones.efficientdet import get_efficientdet 22 | from .heads.keypoint import KeypointHead 23 | 24 | _backbone_factory = { 25 | 'res': get_resnet, # default Resnet with deconv 26 | 'dlav0': get_dlav0, # default DLAup 27 | 'dla': get_dla_dcn, 28 | 'resdcn': get_pose_net_dcn, 29 | 'hourglass': get_large_hourglass_net, 30 | 'mobilenetv3': get_mobile_pose_netv3, 31 | 'mobilenetv2': get_mobile_pose_netv2, 32 | 'shufflenetV2': get_shufflev2_net, 33 | 'hrnet': get_hrpose_net, 34 | 'hardnet': get_hard_net, 35 | 'darknet': darknet53, 36 | 'ghostnet': get_ghost_net, 37 | 'efficientdet':get_efficientdet, 38 | } 39 | 40 | _head_factory = { 41 | 'keypoint': KeypointHead 42 | } 43 | 44 | class BackBoneWithHead(nn.Module): 45 | 46 | def __init__(self, arch, head_conv, cfg): 47 | super(BackBoneWithHead, self).__init__() 48 | 49 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0 50 | arch = arch[:arch.find('_')] if '_' in arch else arch 51 | backbone = _backbone_factory[arch] 52 | self.backbone_model = backbone(num_layers=num_layers, cfg = cfg) 53 | 54 | head = _head_factory[cfg.MODEL.HEADS_NAME] 55 | self.head_model = head(cfg.MODEL.INTERMEDIATE_CHANNEL, cfg.MODEL.HEAD_CONV) 56 | 57 | def forward(self, x): 58 | x = self.backbone_model(x) 59 | return self.head_model(x) 60 | 61 | 62 | 63 | def create_model(arch, head_conv, cfg): 64 | 65 | return BackBoneWithHead(arch, head_conv, cfg) 66 | 67 | def load_model(model, model_path, optimizer=None, resume=False, 68 | lr=None, lr_step=None): 69 | start_epoch = 0 70 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 71 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 72 | state_dict_ = checkpoint['state_dict'] 73 | state_dict = {} 74 | 75 | # convert data_parallal to model 76 | for k in state_dict_: 77 | if k.startswith('module') and not k.startswith('module_list'): 78 | state_dict[k[7:]] = state_dict_[k] 79 | else: 80 | state_dict[k] = state_dict_[k] 81 | model_state_dict = model.state_dict() 82 | 83 | # check loaded parameters and created model parameters 84 | msg = 'If you see this, your model does not fully load the ' + \ 85 | 'pre-trained weight. Please make sure ' + \ 86 | 'you have correctly specified --arch xxx ' + \ 87 | 'or set the correct --num_classes for your own dataset.' 88 | for k in state_dict: 89 | if k in model_state_dict: 90 | if state_dict[k].shape != model_state_dict[k].shape: 91 | print('Skip loading parameter {}, required shape{}, '\ 92 | 'loaded shape{}. {}'.format( 93 | k, model_state_dict[k].shape, state_dict[k].shape, msg)) 94 | state_dict[k] = model_state_dict[k] 95 | else: 96 | print('Drop parameter {}.'.format(k) + msg) 97 | for k in model_state_dict: 98 | if not (k in state_dict): 99 | print('No param {}.'.format(k) + msg) 100 | state_dict[k] = model_state_dict[k] 101 | model.load_state_dict(state_dict, strict=False) 102 | 103 | # resume optimizer parameters 104 | if optimizer is not None and resume: 105 | if 'optimizer' in checkpoint: 106 | optimizer.load_state_dict(checkpoint['optimizer']) 107 | start_epoch = checkpoint['epoch'] 108 | start_lr = lr 109 | for step in lr_step: 110 | if start_epoch >= step: 111 | start_lr *= 0.1 112 | for param_group in optimizer.param_groups: 113 | param_group['lr'] = start_lr 114 | print('Resumed optimizer with start lr', start_lr) 115 | else: 116 | print('No optimizer parameters in checkpoint.') 117 | if optimizer is not None: 118 | return model, optimizer, start_epoch 119 | else: 120 | return model 121 | 122 | def save_model(path, epoch, model, optimizer=None): 123 | if isinstance(model, torch.nn.DataParallel): 124 | state_dict = model.module.state_dict() 125 | else: 126 | state_dict = model.state_dict() 127 | data = {'epoch': epoch, 128 | 'state_dict': state_dict} 129 | if not (optimizer is None): 130 | data['optimizer'] = optimizer.state_dict() 131 | torch.save(data, path) 132 | -------------------------------------------------------------------------------- /lib/models/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | def _sigmoid(x): 8 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 9 | return y 10 | 11 | def _gather_feat(feat, ind, mask=None): 12 | dim = feat.size(2) 13 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 14 | feat = feat.gather(1, ind) 15 | if mask is not None: 16 | mask = mask.unsqueeze(2).expand_as(feat) 17 | feat = feat[mask] 18 | feat = feat.view(-1, dim) 19 | return feat 20 | 21 | def _transpose_and_gather_feat(feat, ind): 22 | feat = feat.permute(0, 2, 3, 1).contiguous() 23 | feat = feat.view(feat.size(0), -1, feat.size(3)) 24 | feat = _gather_feat(feat, ind) 25 | return feat 26 | 27 | def flip_tensor(x): 28 | return torch.flip(x, [3]) 29 | 30 | def flip_lr(x, flip_idx): 31 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 32 | shape = tmp.shape 33 | for e in flip_idx: 34 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 35 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 36 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 37 | 38 | def flip_lr_off(x, flip_idx): 39 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 40 | shape = tmp.shape 41 | tmp = tmp.reshape(tmp.shape[0], 17, 2, 42 | tmp.shape[2], tmp.shape[3]) 43 | tmp[:, :, 0, :, :] *= -1 44 | for e in flip_idx: 45 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 46 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 47 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 48 | -------------------------------------------------------------------------------- /lib/trains/base_trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import time 4 | 5 | import torch 6 | import torch.nn as nn 7 | from progress.bar import Bar 8 | 9 | from utils.utils import AverageMeter 10 | 11 | 12 | class BaseTrainer(object): 13 | def __init__( 14 | self, cfg, local_rank, model, optimizer=None): 15 | self.cfg = cfg 16 | self.optimizer = optimizer 17 | self.loss_stats, self.loss = self._get_losses(cfg, local_rank) 18 | self.model = model 19 | self.local_rank = local_rank 20 | 21 | def set_device(self, gpus, chunk_sizes, device): 22 | 23 | if self.cfg.TRAIN.DISTRIBUTE: 24 | self.model = self.model.to(device) 25 | self.model = nn.parallel.DistributedDataParallel(self.model, find_unused_parameters=True, 26 | device_ids=[self.local_rank, ], 27 | output_device=self.local_rank) 28 | else: 29 | self.model = nn.DataParallel(self.model).to(device) 30 | self.loss.to(device) 31 | for state in self.optimizer.state.values(): 32 | for k, v in state.items(): 33 | if isinstance(v, torch.Tensor): 34 | state[k] = v.to(device=device, non_blocking=True) 35 | 36 | def run_epoch(self, phase, epoch, data_loader): 37 | 38 | model = self.model 39 | if phase == 'train': 40 | self.model.train() 41 | else: 42 | if len(self.cfg.GPUS) > 1: 43 | model = model.module 44 | model.eval() 45 | torch.cuda.empty_cache() 46 | 47 | cfg = self.cfg 48 | results = {} 49 | data_time, batch_time = AverageMeter(), AverageMeter() 50 | avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} 51 | num_iters = len(data_loader) 52 | bar = Bar('{}/{}'.format(cfg.TASK, cfg.EXP_ID), max=num_iters) 53 | end = time.time() 54 | for iter_id, batch in enumerate(data_loader): 55 | if iter_id >= num_iters: 56 | break 57 | data_time.update(time.time() - end) 58 | 59 | for k in batch: 60 | if k != 'meta': 61 | batch[k] = batch[k].to(device=torch.device('cuda:%d'%self.local_rank), non_blocking=True) 62 | 63 | outputs = model(batch['input']) 64 | loss, loss_stats = self.loss(outputs, batch) 65 | 66 | loss = loss.mean() 67 | if phase == 'train': 68 | self.optimizer.zero_grad() 69 | loss.backward() 70 | self.optimizer.step() 71 | batch_time.update(time.time() - end) 72 | end = time.time() 73 | 74 | Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format( 75 | epoch, iter_id, num_iters, phase=phase, 76 | total=bar.elapsed_td, eta=bar.eta_td) 77 | for l in avg_loss_stats: 78 | avg_loss_stats[l].update( 79 | loss_stats[l].mean().item(), batch['input'].size(0)) 80 | Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg) 81 | if not cfg.TRAIN.HIDE_DATA_TIME: 82 | Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \ 83 | '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) 84 | if cfg.PRINT_FREQ > 0: 85 | if iter_id % cfg.PRINT_FREQ == 0: 86 | print('{}/{}| {}'.format(cfg.TASK, cfg.EXP_ID, Bar.suffix)) 87 | else: 88 | bar.next() 89 | 90 | if cfg.DEBUG > 0: 91 | self.debug(batch, outputs, iter_id) 92 | 93 | if phase == 'val': 94 | self.save_result(outputs, batch, results) 95 | del outputs, loss, loss_stats 96 | 97 | bar.finish() 98 | ret = {k: v.avg for k, v in avg_loss_stats.items()} 99 | ret['time'] = bar.elapsed_td.total_seconds() / 60. 100 | 101 | return ret, results 102 | 103 | def debug(self, batch, output, iter_id): 104 | raise NotImplementedError 105 | 106 | def save_result(self, output, batch, results): 107 | raise NotImplementedError 108 | 109 | def _get_losses(self, cfg): 110 | raise NotImplementedError 111 | 112 | def val(self, epoch, data_loader): 113 | return self.run_epoch('val', epoch, data_loader) 114 | 115 | def train(self, epoch, data_loader): 116 | return self.run_epoch('train', epoch, data_loader) 117 | -------------------------------------------------------------------------------- /lib/trains/train_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | from .multi_pose import MultiPoseTrainer 4 | 5 | 6 | train_factory = { 7 | 'multi_pose': MultiPoseTrainer, 8 | } 9 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/oracle_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numba 4 | import numpy as np 5 | 6 | 7 | @numba.jit(nopython=True, nogil=True) 8 | def gen_oracle_map(feat, ind, w, h): 9 | # feat: B x maxN x featDim 10 | # ind: B x maxN 11 | batch_size = feat.shape[0] 12 | max_objs = feat.shape[1] 13 | feat_dim = feat.shape[2] 14 | out = np.zeros((batch_size, feat_dim, h, w), dtype=np.float32) 15 | vis = np.zeros((batch_size, h, w), dtype=np.uint8) 16 | ds = [(0, 1), (0, -1), (1, 0), (-1, 0)] 17 | for i in range(batch_size): 18 | queue_ind = np.zeros((h*w*2, 2), dtype=np.int32) 19 | queue_feat = np.zeros((h*w*2, feat_dim), dtype=np.float32) 20 | head, tail = 0, 0 21 | for j in range(max_objs): 22 | if ind[i][j] > 0: 23 | x, y = ind[i][j] % w, ind[i][j] // w 24 | out[i, :, y, x] = feat[i][j] 25 | vis[i, y, x] = 1 26 | queue_ind[tail] = x, y 27 | queue_feat[tail] = feat[i][j] 28 | tail += 1 29 | while tail - head > 0: 30 | x, y = queue_ind[head] 31 | f = queue_feat[head] 32 | head += 1 33 | for (dx, dy) in ds: 34 | xx, yy = x + dx, y + dy 35 | if xx >= 0 and yy >= 0 and xx < w and yy < h and vis[i, yy, xx] < 1: 36 | out[i, :, yy, xx] = f 37 | vis[i, yy, xx] = 1 38 | queue_ind[tail] = xx, yy 39 | queue_feat[tail] = f 40 | tail += 1 41 | return out 42 | -------------------------------------------------------------------------------- /lib/utils/post_process.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | 5 | from .image import transform_preds 6 | 7 | 8 | def multi_pose_post_process(dets, c, s, h, w): 9 | # dets: batch x max_dets x 40 10 | # return list of 39 in image coord 11 | ret = [] 12 | for i in range(dets.shape[0]): 13 | bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h)) 14 | pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h)) 15 | top_preds = np.concatenate( 16 | [bbox.reshape(-1, 4), dets[i, :, 4:5], 17 | pts.reshape(-1, 34), dets[i, :, 39:56]], axis=1).astype(np.float32).tolist() 18 | ret.append({np.ones(1, dtype=np.int32)[0]: top_preds}) 19 | return ret 20 | 21 | -------------------------------------------------------------------------------- /lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import torch 4 | 5 | 6 | class AverageMeter(object): 7 | """Computes and stores the average and current value""" 8 | def __init__(self): 9 | self.reset() 10 | 11 | def reset(self): 12 | self.val = 0 13 | self.avg = 0 14 | self.sum = 0 15 | self.count = 0 16 | 17 | def update(self, val, n=1): 18 | self.val = val 19 | self.sum += val * n 20 | self.count += n 21 | if self.count > 0: 22 | self.avg = self.sum / self.count 23 | -------------------------------------------------------------------------------- /readme/DATA.md: -------------------------------------------------------------------------------- 1 | # Dataset preparation 2 | 3 | If you want to reproduce the results in the paper for benchmark evaluation and training, you will need to setup dataset. 4 | 5 | 6 | ### COCO 7 | - Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download). 8 | - Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download). 9 | - Place the data (or create symlinks) to make the data folder like: 10 | 11 | ~~~ 12 | ${CenterNet_ROOT} 13 | |-- data 14 | `-- |-- coco 15 | `-- |-- annotations 16 | | |-- instances_train2017.json 17 | | |-- instances_val2017.json 18 | | |-- person_keypoints_train2017.json 19 | | |-- person_keypoints_val2017.json 20 | | |-- image_info_test-dev2017.json 21 | |---|-- train2017 22 | |---|-- val2017 23 | `---|-- test2017 24 | ~~~ 25 | -------------------------------------------------------------------------------- /readme/DEVELOP.md: -------------------------------------------------------------------------------- 1 | # Develop 2 | 3 | This document provides tutorials to develop CenterNet. `lib/src/opts` lists a few more options that the current version supports. 4 | 5 | ## New dataset 6 | Basically there are three steps: 7 | 8 | - Convert the dataset annotation to [COCO format](http://cocodataset.org/#format-data). Please refer to [src/tools/convert_kitti_to_coco.py](../src/tools/convert_kitti_to_coco.py) for an example to convert kitti format to coco format. 9 | - Create a dataset intilization file in `src/lib/datasets/dataset`. In most cases you can just copy `src/lib/datasets/dataset/coco.py` to your dataset name and change the category information, and annotation path. 10 | - Import your dataset at `src/lib/datasets/dataset_factory`. 11 | 12 | ## New task 13 | 14 | You will need to add files to `src/lib/datasets/sample/`, `src/lib/datasets/trains/`, and `src/lib/datasets/detectors/`, which specify the data generation during training, the training targets, and the testing, respectively. 15 | 16 | ## New architecture 17 | 18 | - Add your model file to `src/lib/models/networks/`. The model should accept a dict `heads` of `{name: channels}`, which specify the name of each network output and its number of channals. Make sure your model returns a list (for multiple stages. Single stage model should return a list containing a single element.). The element of the list is a dict contraining the same keys with `heads`. 19 | - Add your model in `model_factory` of `src/lib/models/model.py`. -------------------------------------------------------------------------------- /readme/GETTING_STARTED.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | This document provides tutorials to train and evaluate CenterNet. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md). 4 | 5 | ## Benchmark evaluation 6 | 7 | First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`. 8 | 9 | ### COCO 10 | 11 | To evaluate COCO object detection with DLA 12 | run 13 | 14 | ~~~ 15 | python test.py ctdet --exp_id coco_dla --keep_res --load_model ../models/ctdet_coco_dla_2x.pth 16 | ~~~ 17 | 18 | This will give an AP of `37.4` if setup correctly. `--keep_res` is for keep the original image resolution. Without `--keep_res` it will resize the images to `512 x 512`. You can add `--flip_test` and `--flip_test --test_scales 0.5,0.75,1,1.25,1.5` to the above commend, for flip test and multi_scale test, respectively. The expected APs are `39.2` and `41.7`, respectively. 19 | 20 | To test with hourglass net, run 21 | 22 | ~~~ 23 | python test.py ctdet --exp_id coco_hg --arch hourglass --fix_res --load_model ../models/ctdet_coco_hg.pth 24 | ~~~ 25 | 26 | Similarly, to evaluate human pose estimation, run the following command for dla 27 | 28 | ~~~ 29 | python test.py multi_pose --exp_id dla --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test 30 | ~~~ 31 | 32 | and the following for hourglass 33 | 34 | ~~~ 35 | python test.py multi_pose --exp_id hg --arch hourglass --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test 36 | ~~~ 37 | 38 | The expected results can be found in the model zoo. 39 | 40 | ### Pascal 41 | 42 | To evaluate object detection on Pascal VOC (test2007), run 43 | 44 | ~~~ 45 | python test.py ctdet --exp_id dla --dataset pascal --load_model ../models/ctdet_pascal_dla.pth --flip_test 46 | ~~~ 47 | 48 | Note that we fix the resolution during testing. 49 | And you can change to other network architectures and resolutions by specifying `--arch` and `--input_res 512`. 50 | 51 | ### KITTI 52 | 53 | To evaluate the kitti dataset, first compile the evaluation tool (from [here](https://github.com/prclibo/kitti_eval)): 54 | 55 | ~~~ 56 | cd CenterNet_ROOT/src/tools/kitti_eval 57 | g++ -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp -O3 58 | ~~~ 59 | 60 | Then run the evaluation with pretrained model: 61 | 62 | ~~~ 63 | python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --load_model ../models/ddd_3dop.pth 64 | ~~~ 65 | 66 | to evaluate the 3DOP split. For the subcnn split, change `--kitti_split` to `subcnn` and load the corresponding models. 67 | Note that test time augmentation is not trivially applicable for 3D orientation. 68 | 69 | ## Training 70 | 71 | We have packed all the training scripts in the [experiments](../experiments) folder. 72 | The experiment names are correspond to the model name in the [model zoo](MODEL_ZOO.md). 73 | The number of GPUs for each experiments can be found in the scripts and the model zoo. 74 | In the case that you don't have 8 GPUs, you can follow the [linear learning rate rule](https://arxiv.org/abs/1706.02677) to scale the learning rate as batch size. 75 | For example, to train COCO object detection with dla on 2 GPUs, run 76 | 77 | ~~~ 78 | python main.py ctdet --exp_id coco_dla --batch_size 32 --master_batch 15 --lr 1.25e-4 --gpus 0,1 79 | ~~~ 80 | 81 | The default learning rate is `1.25e-4` for batch size `32` (on 2 GPUs). 82 | By default, pytorch evenly splits the total batch size to each GPUs. 83 | `--master_batch` allows using different batchsize for the master GPU, which usually costs more memory than other GPUs. 84 | If it encounters GPU memory out, using slightly less batch size (e.g., `112` of `128`) with the same learning is fine. 85 | 86 | If the training is terminated before finishing, you can use the same commond with `--resume` to resume training. It will found the lastest model with the same `exp_id`. 87 | 88 | Our HourglassNet model is finetuned from the pretrained [ExtremeNet model](https://drive.google.com/file/d/1omiOUjWCrFbTJREypuZaODu0bOlF_7Fg/view?usp=sharing) (from the [ExtremeNet repo](https://github.com/xingyizhou/ExtremeNet)). 89 | You will need to download the model, run `python convert_hourglass_weight.py` to convert the model format, and load the model for training (see the [script](../experiments/ctdet_coco_hg.sh)). 90 | -------------------------------------------------------------------------------- /readme/INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | 4 | The code was tested on Ubuntu 18.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v1.1. NVIDIA GPUs are needed for both training and testing. 5 | After install Anaconda: 6 | 7 | 0. [Optional but recommended] create a new conda environment. 8 | 9 | ~~~ 10 | conda create --name CenterNet python=3.6 11 | ~~~ 12 | And activate the environment. 13 | 14 | ~~~ 15 | conda activate CenterNet 16 | ~~~ 17 | 18 | 1. Install pytorch1.1: 19 | 20 | ~~~ 21 | pip install torch==1.1 22 | ~~~ 23 | 24 | 2. Install [COCOAPI](https://github.com/cocodataset/cocoapi): 25 | 26 | ~~~ 27 | # COCOAPI=/path/to/clone/cocoapi 28 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI 29 | cd $COCOAPI/PythonAPI 30 | make 31 | python setup.py install --user 32 | ~~~ 33 | 34 | 3. Clone this repo: 35 | 36 | ~~~ 37 | CenterNet_ROOT=/path/to/clone/CenterNet 38 | git clone https://github.com/tensorboy/centerpose $CenterNet_ROOT 39 | ~~~ 40 | 41 | 42 | 4. Install the requirements 43 | 44 | ~~~ 45 | pip install -r requirements.txt 46 | ~~~ 47 | 48 | 49 | 5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_0.4)). 50 | 51 | ~~~ 52 | cd $CenterNet_ROOT/src/lib/models/networks/DCNv2 53 | ./make.sh 54 | ~~~ 55 | 6. [Optional, only required if you are using extremenet or multi-scale testing] Compile NMS if your want to use multi-scale testing or test ExtremeNet. 56 | 57 | ~~~ 58 | cd $CenterNet_ROOT/src/lib/external 59 | make 60 | ~~~ 61 | -------------------------------------------------------------------------------- /readme/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/demo.gif -------------------------------------------------------------------------------- /readme/fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/fig2.png -------------------------------------------------------------------------------- /readme/multi_pose_screenshot_27.11.2019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/multi_pose_screenshot_27.11.2019.png -------------------------------------------------------------------------------- /readme/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/performance.png -------------------------------------------------------------------------------- /readme/plot_speed_accuracy.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import pandas as pd 4 | import plotly.express as px 5 | import plotly.graph_objects as go 6 | 7 | sizeref = 2000 8 | 9 | # Dictionary with dataframes for each continent 10 | continent_names = ['DLA', 'Resnet', 'MobileNet', 'ShuffleNet', 'HigherResolution', 'HardNet'] 11 | continent_data = {} 12 | 13 | continent_data['DLA-34'] = {'map':[62.3], 'speed':[23], 'size':82.7/1.5} 14 | continent_data['Resnet50'] = {'map':[54.5], 'speed':[28], 'size':139.8/1.5} 15 | continent_data['MobileNetV3'] = {'map':[46.0], 'speed':[30], 'size':9.7/1.5} 16 | continent_data['ShuffleNetV2'] = {'map':[43.9], 'speed':[25], 'size':40./1.5} 17 | continent_data['HigherResolution'] = {'map':[63.8], 'speed':[16], 'size':115.2/1.5} 18 | continent_data['HardNet'] = {'map':[46.0], 'speed':[30], 'size':19.3/1.5} 19 | continent_data['Darknet53'] = {'map':[38.2], 'speed':[30], 'size':27.1/1.5} 20 | 21 | # Create figure 22 | fig = go.Figure() 23 | 24 | for continent_name, continent in continent_data.items(): 25 | fig.add_trace(go.Scatter( 26 | x=continent['speed'], y=continent['map'], 27 | name=continent_name, text='model performance', 28 | marker_size=continent['size'], 29 | )) 30 | # Tune marker appearance and layout 31 | fig.update_traces(mode='markers', marker=dict(sizemode='area', 32 | sizeref=sizeref, line_width=2)) 33 | 34 | fig.update_layout( 35 | title='mAP v.s. FPS', 36 | xaxis=dict( 37 | title='FPS (frames per second)', 38 | gridcolor='white', 39 | type='log', 40 | gridwidth=2, 41 | ), 42 | yaxis=dict( 43 | title='Mean Average Precision (mAP)', 44 | gridcolor='white', 45 | gridwidth=2, 46 | ), 47 | paper_bgcolor='rgb(243, 243, 243)', 48 | plot_bgcolor='rgb(243, 243, 243)', 49 | ) 50 | fig.show() 51 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | Cython 3 | numba 4 | progress 5 | matplotlib 6 | easydict 7 | scipy 8 | pycocotools 9 | yacs 10 | pthflops 11 | -------------------------------------------------------------------------------- /samples/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | 5 | def add_path(path): 6 | if path not in sys.path: 7 | sys.path.insert(0, path) 8 | 9 | this_dir = osp.dirname(__file__) 10 | 11 | # Add lib to PYTHONPATH 12 | lib_path = osp.join(this_dir, '..', 'lib') 13 | add_path(lib_path) 14 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | 5 | def add_path(path): 6 | if path not in sys.path: 7 | sys.path.insert(0, path) 8 | 9 | this_dir = osp.dirname(__file__) 10 | 11 | # Add lib to PYTHONPATH 12 | lib_path = osp.join(this_dir, '..', 'lib') 13 | add_path(lib_path) 14 | -------------------------------------------------------------------------------- /tools/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import argparse 4 | import os 5 | 6 | import cv2 7 | 8 | import _init_paths 9 | from config import cfg, update_config 10 | from detectors.detector_factory import detector_factory 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='Train keypoints network') 15 | # general 16 | parser.add_argument('--cfg', 17 | help='experiment configure file name', 18 | required=True, 19 | type=str) 20 | parser.add_argument('--TESTMODEL', 21 | help='model directory', 22 | type=str, 23 | default='') 24 | parser.add_argument('--DEMOFILE', 25 | help='source images or video', 26 | type=str, 27 | default='') 28 | parser.add_argument('--DEBUG', type=int, default=0, 29 | help='level of visualization.' 30 | '1: only show the final detection results' 31 | '2: show the network output features' 32 | '3: use matplot to display' # useful when lunching training with ipython notebook 33 | '4: save all visualizations to disk') 34 | parser.add_argument('--NMS', 35 | help='whether to do NMS', 36 | type=bool, 37 | default=True) 38 | args = parser.parse_args() 39 | 40 | return args 41 | 42 | 43 | image_ext = ['jpg', 'jpeg', 'png', 'webp'] 44 | video_ext = ['mp4', 'mov', 'avi', 'mkv'] 45 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] 46 | 47 | def demo(cfg): 48 | Detector = detector_factory[cfg.TEST.TASK] 49 | detector = Detector(cfg) 50 | 51 | if cfg.TEST.DEMO_FILE == 'webcam' or \ 52 | cfg.TEST.DEMO_FILE[cfg.TEST.DEMO_FILE.rfind('.') + 1:].lower() in video_ext: 53 | cam = cv2.VideoCapture(0 if cfg.TEST.DEMO_FILE == 'webcam' else cfg.TEST.DEMO_FILE) 54 | detector.pause = False 55 | while True: 56 | _, img = cam.read() 57 | cv2.imshow('input', img) 58 | ret = detector.run(img) 59 | time_str = '' 60 | for stat in time_stats: 61 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat]) 62 | print(time_str) 63 | if cv2.waitKey(1) == 27: 64 | return # esc to quit 65 | else: 66 | if os.path.isdir(cfg.TEST.DEMO_FILE): 67 | image_names = [] 68 | ls = os.listdir(cfg.TEST.DEMO_FILE) 69 | for file_name in sorted(ls): 70 | ext = file_name[file_name.rfind('.') + 1:].lower() 71 | if ext in image_ext: 72 | image_names.append(os.path.join(cfg.TEST.DEMO_FILE, file_name)) 73 | else: 74 | image_names = [cfg.TEST.DEMO_FILE] 75 | 76 | for (image_name) in image_names: 77 | ret = detector.run(image_name) 78 | 79 | time_str = '' 80 | for stat in time_stats: 81 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat]) 82 | print(time_str) 83 | if __name__ == '__main__': 84 | args = parse_args() 85 | update_config(cfg, args.cfg) 86 | cfg.defrost() 87 | cfg.TEST.MODEL_PATH = args.TESTMODEL 88 | cfg.TEST.DEMO_FILE = args.DEMOFILE 89 | cfg.TEST.NMS = args.NMS 90 | cfg.DEBUG = args.DEBUG 91 | cfg.freeze() 92 | demo(cfg) 93 | -------------------------------------------------------------------------------- /tools/evaluate.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import argparse 4 | import json 5 | import os 6 | import time 7 | 8 | import cv2 9 | import numpy as np 10 | import torch 11 | from progress.bar import Bar 12 | 13 | import _init_paths 14 | from config import cfg, update_config 15 | from datasets.dataset_factory import dataset_factory 16 | from detectors.detector_factory import detector_factory 17 | from external.nms import soft_nms 18 | from logger import Logger 19 | from utils.utils import AverageMeter 20 | 21 | 22 | def parse_args(): 23 | parser = argparse.ArgumentParser(description='Train keypoints network') 24 | # general 25 | parser.add_argument('--cfg', 26 | help='experiment configure file name', 27 | required=True, 28 | type=str) 29 | parser.add_argument('--NMS', 30 | help='whether to do NMS', 31 | type=bool, 32 | default=True) 33 | parser.add_argument('--TESTMODEL', 34 | help='model directory', 35 | type=str, 36 | default='') 37 | parser.add_argument('--DEBUG', type=int, default=0, 38 | help='level of visualization.' 39 | '1: only show the final detection results' 40 | '2: show the network output features' 41 | '3: use matplot to display' # useful when lunching training with ipython notebook 42 | '4: save all visualizations to disk') 43 | args = parser.parse_args() 44 | 45 | return args 46 | 47 | 48 | def test(cfg): 49 | 50 | Dataset = dataset_factory[cfg.SAMPLE_METHOD] 51 | Logger(cfg) 52 | Detector = detector_factory[cfg.TEST.TASK] 53 | 54 | dataset = Dataset(cfg, 'val') 55 | detector = Detector(cfg) 56 | 57 | results = {} 58 | num_iters = len(dataset) 59 | bar = Bar('{}'.format(cfg.EXP_ID), max=num_iters) 60 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] 61 | avg_time_stats = {t: AverageMeter() for t in time_stats} 62 | for ind in range(num_iters): 63 | img_id = dataset.images[ind] 64 | img_info = dataset.coco.loadImgs(ids=[img_id])[0] 65 | img_path = os.path.join(dataset.img_dir, img_info['file_name']) 66 | #img_path = '/home/tensorboy/data/coco/images/val2017/000000004134.jpg' 67 | ret = detector.run(img_path) 68 | 69 | results[img_id] = ret['results'] 70 | 71 | Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( 72 | ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td) 73 | for t in avg_time_stats: 74 | avg_time_stats[t].update(ret[t]) 75 | Bar.suffix = Bar.suffix + '|{} {:.3f} '.format(t, avg_time_stats[t].avg) 76 | bar.next() 77 | bar.finish() 78 | dataset.run_eval(results, cfg.OUTPUT_DIR) 79 | 80 | if __name__ == '__main__': 81 | args = parse_args() 82 | update_config(cfg, args.cfg) 83 | cfg.defrost() 84 | cfg.DEBUG = args.DEBUG 85 | cfg.TEST.MODEL_PATH = args.TESTMODEL 86 | cfg.TEST.NMS = args.NMS 87 | cfg.freeze() 88 | test(cfg) 89 | --------------------------------------------------------------------------------