├── .gitignore
├── LICENSE
├── NOTICE
├── README.md
├── demo
├── _init_paths.py
├── centernet_tensorrt_engine.py
├── convert2onnx.py
├── demo_main.py
├── face
│ ├── __init__.py
│ ├── centerface.py
│ ├── demo.py
│ ├── prnet.py
│ ├── reid
│ │ ├── __init__.py
│ │ ├── reid_manager.py
│ │ └── reid_table
│ │ │ ├── __init__.py
│ │ │ ├── base_idbase.py
│ │ │ ├── head_pose_base.py
│ │ │ └── reid_utils.py
│ ├── resfcn256.py
│ └── utils
│ │ ├── BFM_UV.mat
│ │ ├── cv_plot.py
│ │ ├── estimate_pose.py
│ │ ├── generate_posmap_300WLP.py
│ │ ├── losses.py
│ │ ├── render.py
│ │ ├── render_app.py
│ │ ├── rotate_vertices.py
│ │ ├── utils.py
│ │ └── uv_data
│ │ ├── canonical_vertices.npy
│ │ ├── face_ind.txt
│ │ ├── triangles.txt
│ │ ├── uv_kpt_ind.txt
│ │ └── uv_weight_mask_gdh.png
├── result.png
├── tensorrt_model.py
└── tracking
│ ├── __init__.py
│ ├── deep_sort.py
│ ├── feature_extractor.py
│ ├── model.py
│ ├── sort
│ ├── __init__.py
│ ├── detection.py
│ ├── iou_matching.py
│ ├── kalman_filter.py
│ ├── linear_assignment.py
│ ├── nn_matching.py
│ ├── preprocessing.py
│ ├── track.py
│ └── tracker.py
│ └── util.py
├── experiments
├── darknet53_512x512.yaml
├── dla_34_512x512.yaml
├── efficientdet_512x512.yaml
├── ghost_net.yaml
├── hardnet_512x512.yaml
├── hrnet_w32_512.yaml
├── hrnet_w48_512.yaml
├── mobilenetv2_512x512.yaml
├── mobilenetv3_512x512.yaml
├── res_50_512x512.yaml
└── shufflenetV2_512x512.yaml
├── images
└── image1.jpeg
├── lib
├── config
│ ├── __init__.py
│ └── default.py
├── datasets
│ ├── coco_hp.py
│ ├── data.py
│ ├── dataset_factory.py
│ └── multi_pose.py
├── detectors
│ ├── base_detector.py
│ ├── detector_factory.py
│ └── multi_pose.py
├── external
│ ├── .gitignore
│ ├── Makefile
│ ├── __init__.py
│ ├── build
│ │ └── temp.linux-x86_64-3.6
│ │ │ └── nms.o
│ ├── make.sh
│ ├── nms.pyx
│ └── setup.py
├── logger.py
├── models
│ ├── backbones
│ │ ├── DCNv2
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── dcn_v2.py
│ │ │ ├── make.sh
│ │ │ ├── setup.py
│ │ │ ├── src
│ │ │ │ ├── cpu
│ │ │ │ │ ├── dcn_v2_cpu.cpp
│ │ │ │ │ └── vision.h
│ │ │ │ ├── cuda
│ │ │ │ │ ├── dcn_v2_cuda.cu
│ │ │ │ │ ├── dcn_v2_im2col_cuda.cu
│ │ │ │ │ ├── dcn_v2_im2col_cuda.h
│ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu
│ │ │ │ │ └── vision.h
│ │ │ │ ├── dcn_v2.h
│ │ │ │ └── vision.cpp
│ │ │ └── test.py
│ │ ├── Utitled Document
│ │ ├── __init__.py
│ │ ├── darknet.py
│ │ ├── dlav0.py
│ │ ├── efficientdet
│ │ │ ├── __init__.py
│ │ │ ├── bifpn.py
│ │ │ ├── conv_module.py
│ │ │ ├── efficientdet.py
│ │ │ ├── efficientnet.py
│ │ │ ├── module.py
│ │ │ ├── retinahead.py
│ │ │ └── utils.py
│ │ ├── ghost_net.py
│ │ ├── hardnet.py
│ │ ├── large_hourglass.py
│ │ ├── mobilenet
│ │ │ ├── __init__.py
│ │ │ ├── mobilenetv2.py
│ │ │ └── mobilenetv3.py
│ │ ├── msra_resnet.py
│ │ ├── pose_dla_dcn.py
│ │ ├── pose_higher_hrnet.py
│ │ ├── resnet_dcn.py
│ │ ├── shufflenetv2_dcn.py
│ │ └── test_mode.py
│ ├── decode.py
│ ├── heads
│ │ ├── __init__.py
│ │ ├── keypoint.py
│ │ └── mask.py
│ ├── losses.py
│ ├── model.py
│ └── utils.py
├── trains
│ ├── base_trainer.py
│ ├── multi_pose.py
│ └── train_factory.py
└── utils
│ ├── __init__.py
│ ├── debugger.py
│ ├── image.py
│ ├── oracle_utils.py
│ ├── post_process.py
│ └── utils.py
├── readme
├── DATA.md
├── DEVELOP.md
├── GETTING_STARTED.md
├── INSTALL.md
├── demo.gif
├── fig2.png
├── multi_pose_screenshot_27.11.2019.png
├── performance.png
└── plot_speed_accuracy.py
├── requirements.txt
├── samples
├── _init_paths.py
├── data_inspect_utils.py
└── inspect_data.ipynb
└── tools
├── _init_paths.py
├── demo.py
├── evaluate.py
└── train.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *pyc
2 | *~
3 | *so
4 | lib/models/backbones/DCNv2/build/*
5 | lib/models/backbones/DCNv2/DCNv2.egg-info/*
6 | *build/*
7 | */.vscode/*
8 | *__pycache__*
9 | *.ipynb_checkpoints*
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Xingyi Zhou
4 | All rights reserved.
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # The repo is based on [CenterNet](https://arxiv.org/abs/1904.07850), which aimed for push the boundary of human pose estimation
2 | multi person pose estimation using center point detection:
3 | 
4 |
5 | ## Main results
6 |
7 | ### Keypoint detection on COCO validation 2017
8 |

9 |
10 | | Backbone | AP | FPS | TensorRT Speed | GFLOPs |Download |
11 | |--------------|-----------|--------------|----------|----------|----------|
12 | |DLA-34 | 62.7 | 23 | - | - |[model](https://drive.google.com/open?id=1IahJ3vpjTVu1p-Okf6lcn-bM7fVKNg6N) |
13 | |Resnet-50 | 54.5 | 28 | 33 | - |[model](https://drive.google.com/open?id=1oBgWrfigo2fGtpQJXQ0stADTgVFxPWGq) |
14 | |MobilenetV3 | 46.0 | 30 | - | - |[model](https://drive.google.com/open?id=1snJnADAD1NUzyO1QXCftuZu1rsr8095G) |
15 | |ShuffleNetV2 | 43.9 | 25 | - | - |[model](https://drive.google.com/open?id=1FK7YQzCB6mLcb0v4SOmlqtRJfA-PQSvN) |
16 | |[HRNet_W32](https://drive.google.com/open?id=1mJoK7KEx35Wgf6uAZ-Ez5IwAeOk1RYw0)| 63.8 | 16 | - | - |[model](https://drive.google.com/open?id=1X0yxGeeNsD4VwU2caDo-BaH_MoCAnU_J) |
17 | |[HardNet](https://github.com/PingoLH/FCHarDNet)| 46.0 | 30 | - | - |[model](https://drive.google.com/open?id=1CFc_qAAT4NFfrAG8JOxRVG8CAw9ySuYp) |
18 | |[Darknet53]()| 34.2 | 30 | - | - |[model](https://drive.google.com/open?id=1S8spP_QKHqIYmWpfF9Bb4-4OoUXIOnkh) |
19 | |[EfficientDet]()| 38.2 | 30 | - | - |[model](https://drive.google.com/open?id=1S8spP_QKHqIYmWpfF9Bb4-4OoUXIOnkh) |
20 |
21 | ## Installation
22 |
23 | git submodule init&git submodule update
24 | Please refer to [INSTALL.md](readme/INSTALL.md) for installation instructions.
25 |
26 | ## Use CenterNet
27 |
28 | We support demo for image/ image folder, video, and webcam.
29 |
30 | First, download the model [DLA-34](https://drive.google.com/open?id=1OkHjjViB0dzbuicdtIam-YcoT0sYpmjP)
31 | from the [Model zoo](https://drive.google.com/open?id=1UG2l8XtjOfBtG_GLpSdxlWS2wxFR8hQF) and put them in anywhere.
32 |
33 | Run:
34 |
35 | ~~~
36 | cd tools; python demo.py --cfg ../experiments/dla_34_512x512.yaml --TESTMODEL /your/model/path/dla34_best.pth --DEMOFILE ../images/33823288584_1d21cf0a26_k.jpg --DEBUG 1
37 | ~~~
38 | The result for the example images should look like:
39 | 
40 |
41 | ## Evaluation
42 | ~~~
43 | cd tools; python evaluate.py --cfg ../experiments/dla_34_512x512.yaml --TESTMODEL /your/model/path/dla34_best.pth --DEMOFILE --DEBUG 0
44 | ~~~
45 |
46 | ## Training
47 |
48 | After [installation](readme/INSTALL.md), follow the instructions in [DATA.md](readme/DATA.md) to setup the datasets.
49 |
50 | We provide config files for all the experiments in the [experiments](experiments) folder.
51 |
52 | ```
53 | cd ./tools python -m torch.distributed.launch --nproc_per_node 4 train.py --cfg ../experiments/*yalm
54 | ```
55 |
56 | ## Demo
57 |
58 | the demo files located in the `demo` directory, which is would be a very robust human detection+tracking+face reid system.
59 |
60 |
61 |
62 |
63 |
64 | ## License
65 |
66 | MIT License (refer to the LICENSE file for details).
67 |
68 | ## Citation
69 |
70 | If you find this project useful for your research, please use the following BibTeX entry.
71 |
72 | @inproceedings{zhou2019objects,
73 | title={Objects as Points},
74 | author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
75 | booktitle={arXiv preprint arXiv:1904.07850},
76 | year={2019}
77 | }
78 |
--------------------------------------------------------------------------------
/demo/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import sys
3 |
4 |
5 | def add_path(path):
6 | if path not in sys.path:
7 | sys.path.insert(0, path)
8 |
9 | this_dir = osp.dirname(__file__)
10 |
11 | # Add lib to PYTHONPATH
12 | lib_path = osp.join(this_dir, '..', 'lib')
13 | add_path(lib_path)
14 |
--------------------------------------------------------------------------------
/demo/face/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/__init__.py
--------------------------------------------------------------------------------
/demo/face/centerface.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | import cv2
4 | import numpy as np
5 |
6 |
7 | class CenterFace(object):
8 | def __init__(self, model_path, landmarks=True):
9 | self.landmarks = landmarks
10 | if self.landmarks:
11 | self.net = cv2.dnn.readNetFromONNX(model_path)
12 | else:
13 | self.net = cv2.dnn.readNetFromONNX('cface.1k.onnx')
14 |
15 | def __call__(self, img, threshold=0.5):
16 | blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(self.img_w_new, self.img_h_new), mean=(0, 0, 0), swapRB=True, crop=False)
17 | self.net.setInput(blob)
18 | begin = datetime.datetime.now()
19 | if self.landmarks:
20 | heatmap, scale, offset, lms = self.net.forward(["537", "538", "539", '540'])
21 | else:
22 | heatmap, scale, offset = self.net.forward(["535", "536", "537"])
23 |
24 | end = datetime.datetime.now()
25 | print("cpu times = ", end - begin)
26 | if self.landmarks:
27 | dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold)
28 | else:
29 | dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold)
30 |
31 | if len(dets) > 0:
32 | dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_h
33 | if self.landmarks:
34 | lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_h
35 | else:
36 | dets = np.empty(shape=[0, 5], dtype=np.float32)
37 | if self.landmarks:
38 | lms = np.empty(shape=[0, 10], dtype=np.float32)
39 | if self.landmarks:
40 | return dets, lms
41 | else:
42 | return dets
43 |
44 | def transform(self, h, w):
45 | img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32)
46 | scale_h, scale_w = img_h_new / h, img_w_new / w
47 | self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = img_h_new, img_w_new, scale_h, scale_w
48 |
49 | def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1):
50 | heatmap = np.squeeze(heatmap)
51 | scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :]
52 | offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :]
53 | c0, c1 = np.where(heatmap > threshold)
54 | if self.landmarks:
55 | boxes, lms = [], []
56 | else:
57 | boxes = []
58 | if len(c0) > 0:
59 | for i in range(len(c0)):
60 | s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4
61 | o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]]
62 | s = heatmap[c0[i], c1[i]]
63 | x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2)
64 | x1, y1 = min(x1, size[1]), min(y1, size[0])
65 | boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s])
66 | if self.landmarks:
67 | lm = []
68 | for j in range(5):
69 | lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1)
70 | lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1)
71 | lms.append(lm)
72 | boxes = np.asarray(boxes, dtype=np.float32)
73 | keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3)
74 | boxes = boxes[keep, :]
75 | if self.landmarks:
76 | lms = np.asarray(lms, dtype=np.float32)
77 | lms = lms[keep, :]
78 | if self.landmarks:
79 | return boxes, lms
80 | else:
81 | return boxes
82 |
83 | def nms(self, boxes, scores, nms_thresh):
84 | x1 = boxes[:, 0]
85 | y1 = boxes[:, 1]
86 | x2 = boxes[:, 2]
87 | y2 = boxes[:, 3]
88 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
89 | order = np.argsort(scores)[::-1]
90 | num_detections = boxes.shape[0]
91 | suppressed = np.zeros((num_detections,), dtype=np.bool)
92 |
93 | keep = []
94 | for _i in range(num_detections):
95 | i = order[_i]
96 | if suppressed[i]:
97 | continue
98 | keep.append(i)
99 |
100 | ix1 = x1[i]
101 | iy1 = y1[i]
102 | ix2 = x2[i]
103 | iy2 = y2[i]
104 | iarea = areas[i]
105 |
106 | for _j in range(_i + 1, num_detections):
107 | j = order[_j]
108 | if suppressed[j]:
109 | continue
110 |
111 | xx1 = max(ix1, x1[j])
112 | yy1 = max(iy1, y1[j])
113 | xx2 = min(ix2, x2[j])
114 | yy2 = min(iy2, y2[j])
115 | w = max(0, xx2 - xx1 + 1)
116 | h = max(0, yy2 - yy1 + 1)
117 |
118 | inter = w * h
119 | ovr = inter / (iarea + areas[j] - inter)
120 | if ovr >= nms_thresh:
121 | suppressed[j] = True
122 |
123 | return keep
124 |
--------------------------------------------------------------------------------
/demo/face/demo.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import cv2
4 | import scipy.io as sio
5 |
6 | from centerface import CenterFace
7 |
8 |
9 | def test_image(image_path, model_path):
10 | frame = cv2.imread(image_path)
11 | h, w = frame.shape[:2]
12 | landmarks = True
13 | centerface = CenterFace(model_path=model_path, landmarks=landmarks)
14 | centerface.transform(h, w)
15 | if landmarks:
16 | dets, lms = centerface(frame, threshold=0.35)
17 | else:
18 | dets = centerface(frame, threshold=0.35)
19 |
20 | for det in dets:
21 | boxes, score = det[:4], det[4]
22 | cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
23 | if landmarks:
24 | for lm in lms:
25 | cv2.circle(frame, (int(lm[0]), int(lm[1])), 2, (0, 0, 255), -1)
26 | cv2.circle(frame, (int(lm[2]), int(lm[3])), 2, (0, 0, 255), -1)
27 | cv2.circle(frame, (int(lm[4]), int(lm[5])), 2, (0, 0, 255), -1)
28 | cv2.circle(frame, (int(lm[6]), int(lm[7])), 2, (0, 0, 255), -1)
29 | cv2.circle(frame, (int(lm[8]), int(lm[9])), 2, (0, 0, 255), -1)
30 | cv2.imshow('out', frame)
31 | cv2.waitKey(0)
32 |
33 |
34 |
35 | if __name__ == '__main__':
36 | image_path = '/home/tensorboy/centerpose/images/image1.jpg'
37 | model_path = '/home/tensorboy/CenterFace/models/onnx/centerface.onnx'
38 | test_image(image_path, model_path)
39 |
--------------------------------------------------------------------------------
/demo/face/reid/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/reid/__init__.py
--------------------------------------------------------------------------------
/demo/face/reid/reid_manager.py:
--------------------------------------------------------------------------------
1 | from .reid_table.head_pose_base import ReidDataBase
2 |
3 |
4 | class ReIDManager(object):
5 | def __init__(self, config):
6 | self.reid_table = ReidDataBase(config)
7 |
8 | def query_targets(self, reappear_targets, ignored_targets):
9 | if len(reappear_targets) == 0:
10 | return [], []
11 | reappear_detections = []
12 | for single_target in reappear_targets:
13 | best_detection = self._get_detection_with_highest_face(single_target)
14 | reappear_detections.append(best_detection)
15 |
16 | ignored_id = [t.id for t in ignored_targets]
17 | hash_ids, hash_status = self.reid_table.reid_query_detections(reappear_detections, ignored_id)
18 | return hash_ids, hash_status
19 |
20 | def update_targets(self, tracked_targets):
21 | all_detections = [self._get_latest_detection(target) for target in tracked_targets]
22 | # update reid features
23 | self.reid_table.update(all_detections)
24 |
25 | def remove_targets(self, removed_targets):
26 | will_remove_ids = [target.id for target in removed_targets]
27 | self.reid_table.remove(will_remove_ids)
28 |
29 | def query_certain_id(self, detection_list, target_id):
30 | if len(detection_list) == 0:
31 | return []
32 | return self.reid_table.reid_query_certain_id(detection_list, target_id)
33 |
34 | def _get_detection_with_highest_face(self, target):
35 | detection_list = target.last_detections # list(target.get_detections())
36 | detection_list = sorted(detection_list, key=lambda t: t.face_score)[::-1]
37 | return detection_list[0]
38 |
39 | def _get_latest_detection(self, target):
40 | # target.last_detections: a list of last detections sorted by time (currently contains 4 detections)
41 | return target.last_detections[0]
42 |
--------------------------------------------------------------------------------
/demo/face/reid/reid_table/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/reid/reid_table/__init__.py
--------------------------------------------------------------------------------
/demo/face/reid/reid_table/base_idbase.py:
--------------------------------------------------------------------------------
1 | class BaseReidDatabase(object):
2 | """
3 | Args:
4 | dataset: a collection of (hash_id, features) pairs in some format
5 | (maybe proto?)
6 | """
7 |
8 | def __init__(self):
9 | self.dataset = {}
10 |
11 | def update(self, features, cameras, hash_ids):
12 | """Update dataset with features for a specific hash_id
13 | Args:
14 | features: List[M_i x L-dimensional np.float32 array]
15 | cameras: List[M_i np.float32 vetor]
16 | hash_ids: List[ids] of length M_i
17 | """
18 | raise NotImplementedError
19 |
20 | def get_all_ids(self):
21 | return ['{:04d}'.format(abs(k) % 10000) for k, v in self.dataset.items()]
22 |
23 | def get_current_table_size(self):
24 | return len(self.dataset)
25 |
26 | def check_if_in_table(self, new_id):
27 | return new_id in self.dataset
28 |
29 | # search all persons in one frame
30 | def retrieval(self, features, cameras, tracked_ids):
31 | """Computes and returns closest entity based on features
32 | Args:
33 | features: List[M_i x L-dimensional np.float32 array]
34 | cameras: List[M_i np.float32 vetor]
35 | tracked_ids: List of ids of unknown length, confirmed ids by tracker. ReID should ignore these ids.
36 | Returns:
37 | hash_ids(list): list of ids, id could be none
38 | """
39 | raise NotImplementedError
40 |
41 | def remove(self, hash_id):
42 | """Deletes entity with hash_id and all of it's features from the dataset
43 | Args:
44 | hash_id(string): unique string identifying the specific person
45 | """
46 | raise NotImplementedError
47 |
--------------------------------------------------------------------------------
/demo/face/reid/reid_table/reid_utils.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class MatchFlags(Enum):
5 | MATCHED = 0
6 | BADFEATURE = 1
7 | NOTCONVINCED = 2
8 | FIRSTTIME = 3
9 | EMPTYDICT = 4
10 | UNREGISTER = 5
11 | NODETECTION = 6
12 |
13 |
14 | class HEADFLAGS(Enum):
15 | S0 = 0
16 | S1 = 1
17 | S2 = 2
18 | S3 = 3
19 | S4 = 4
20 | S5 = 5
21 | S6 = 6
22 |
23 |
24 | # Data template
25 | class Detection:
26 | def __init__(self, ind, score, camera, feature, landmarks, bbox):
27 | self.target_id = ind
28 | self.face_score = score
29 | self.features = feature
30 | self.camera_id = camera
31 | self.landmarks = landmarks
32 | self.bbox = bbox
33 |
34 | def set_new_id(self, new_id):
35 | self.target_id = new_id
36 |
37 | def get_id(self):
38 | return self.target_id
39 |
40 | def get_face_score(self):
41 | return self.face_score
42 |
43 |
44 | def assign_head_status(yaw):
45 | # if abs(yaw) > 25:
46 | # head_status = HEADFLAGS.S2
47 | # elif abs(yaw) > 15:
48 | # head_status = HEADFLAGS.S1
49 | # else:
50 | # head_status = HEADFLAGS.S0
51 | if abs(yaw) > 30:
52 | head_status = HEADFLAGS.S6
53 | elif abs(yaw) > 25:
54 | head_status = HEADFLAGS.S5
55 | elif abs(yaw) > 20:
56 | head_status = HEADFLAGS.S4
57 | elif abs(yaw) > 15:
58 | head_status = HEADFLAGS.S3
59 | elif abs(yaw) > 10:
60 | head_status = HEADFLAGS.S2
61 | elif abs(yaw) > 5:
62 | head_status = HEADFLAGS.S1
63 | else:
64 | head_status = HEADFLAGS.S0
65 | return head_status
66 |
--------------------------------------------------------------------------------
/demo/face/utils/BFM_UV.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/utils/BFM_UV.mat
--------------------------------------------------------------------------------
/demo/face/utils/cv_plot.py:
--------------------------------------------------------------------------------
1 | # coding: UTF-8
2 | """
3 | @func: draw landmark & mesh on image.
4 | @source: YadiraF/PRNet/utils/cv_plot.py
5 | """
6 | import cv2
7 | import numpy as np
8 |
9 | end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1
10 |
11 |
12 | def plot_kpt(image, kpt):
13 | ''' Draw 68 key points
14 | Args:
15 | image: the input image
16 | kpt: (68, 3).
17 | '''
18 | image = image.copy()
19 | kpt = np.round(kpt).astype(np.int32)
20 | for i in range(kpt.shape[0]):
21 | st = kpt[i, :2]
22 | image = cv2.circle(image, (st[0], st[1]), 1, (0, 0, 255), 2)
23 | if i in end_list:
24 | continue
25 | ed = kpt[i + 1, :2]
26 | image = cv2.line(image, (st[0], st[1]), (ed[0], ed[1]), (255, 255, 255), 1)
27 | return image
28 |
29 |
30 | def plot_vertices(image, vertices):
31 | image = image.copy()
32 | vertices = np.round(vertices).astype(np.int32)
33 | for i in range(0, vertices.shape[0], 2):
34 | st = vertices[i, :2]
35 | image = cv2.circle(image, (st[0], st[1]), 1, (255, 0, 0), -1)
36 | return image
37 |
38 |
39 | def plot_pose_box(image, P, kpt, color=(0, 255, 0), line_width=2):
40 | ''' Draw a 3D box as annotation of pose. Ref:https://github.com/yinguobing/head-pose-estimation/blob/master/pose_estimator.py
41 | Args:
42 | image: the input image
43 | P: (3, 4). Affine Camera Matrix.
44 | kpt: (68, 3).
45 | '''
46 | image = image.copy()
47 |
48 | point_3d = []
49 | rear_size = 90
50 | rear_depth = 0
51 | point_3d.append((-rear_size, -rear_size, rear_depth))
52 | point_3d.append((-rear_size, rear_size, rear_depth))
53 | point_3d.append((rear_size, rear_size, rear_depth))
54 | point_3d.append((rear_size, -rear_size, rear_depth))
55 | point_3d.append((-rear_size, -rear_size, rear_depth))
56 |
57 | front_size = 105
58 | front_depth = 110
59 | point_3d.append((-front_size, -front_size, front_depth))
60 | point_3d.append((-front_size, front_size, front_depth))
61 | point_3d.append((front_size, front_size, front_depth))
62 | point_3d.append((front_size, -front_size, front_depth))
63 | point_3d.append((-front_size, -front_size, front_depth))
64 | point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3)
65 |
66 | # Map to 2d image points
67 | point_3d_homo = np.hstack((point_3d, np.ones([point_3d.shape[0], 1]))) # n x 4
68 | point_2d = point_3d_homo.dot(P.T)[:, :2]
69 | point_2d[:, :2] = point_2d[:, :2] - np.mean(point_2d[:4, :2], 0) + np.mean(kpt[:27, :2], 0)
70 | point_2d = np.int32(point_2d.reshape(-1, 2))
71 |
72 | # Draw all the lines
73 | cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
74 | cv2.line(image, tuple(point_2d[1]), tuple(
75 | point_2d[6]), color, line_width, cv2.LINE_AA)
76 | cv2.line(image, tuple(point_2d[2]), tuple(
77 | point_2d[7]), color, line_width, cv2.LINE_AA)
78 | cv2.line(image, tuple(point_2d[3]), tuple(
79 | point_2d[8]), color, line_width, cv2.LINE_AA)
80 |
81 | return image
82 |
--------------------------------------------------------------------------------
/demo/face/utils/estimate_pose.py:
--------------------------------------------------------------------------------
1 | # coding: UTF-8
2 |
3 | from math import asin, atan2, cos, sin
4 |
5 | import numpy as np
6 |
7 |
8 | def isRotationMatrix(R):
9 | ''' checks if a matrix is a valid rotation matrix(whether orthogonal or not)
10 | '''
11 | Rt = np.transpose(R)
12 | shouldBeIdentity = np.dot(Rt, R)
13 | I = np.identity(3, dtype=R.dtype)
14 | n = np.linalg.norm(I - shouldBeIdentity)
15 | return n < 1e-6
16 |
17 |
18 | def matrix2angle(R):
19 | ''' compute three Euler angles from a Rotation Matrix. Ref: http://www.gregslabaugh.net/publications/euler.pdf
20 | Args:
21 | R: (3,3). rotation matrix
22 | Returns:
23 | x: yaw
24 | y: pitch
25 | z: roll
26 | '''
27 | # assert(isRotationMatrix(R))
28 |
29 | if R[2, 0] != 1 or R[2, 0] != -1:
30 | x = asin(R[2, 0])
31 | y = atan2(R[2, 1] / cos(x), R[2, 2] / cos(x))
32 | z = atan2(R[1, 0] / cos(x), R[0, 0] / cos(x))
33 |
34 | else: # Gimbal lock
35 | z = 0 # can be anything
36 | if R[2, 0] == -1:
37 | x = np.pi / 2
38 | y = z + atan2(R[0, 1], R[0, 2])
39 | else:
40 | x = -np.pi / 2
41 | y = -z + atan2(-R[0, 1], -R[0, 2])
42 |
43 | return x, y, z
44 |
45 |
46 | def P2sRt(P):
47 | ''' decompositing camera matrix P.
48 | Args:
49 | P: (3, 4). Affine Camera Matrix.
50 | Returns:
51 | s: scale factor.
52 | R: (3, 3). rotation matrix.
53 | t2d: (2,). 2d translation.
54 | '''
55 | t2d = P[:2, 3]
56 | R1 = P[0:1, :3]
57 | R2 = P[1:2, :3]
58 | s = (np.linalg.norm(R1) + np.linalg.norm(R2)) / 2.0
59 | r1 = R1 / np.linalg.norm(R1)
60 | r2 = R2 / np.linalg.norm(R2)
61 | r3 = np.cross(r1, r2)
62 |
63 | R = np.concatenate((r1, r2, r3), 0)
64 | return s, R, t2d
65 |
66 |
67 | def compute_similarity_transform(points_static, points_to_transform):
68 | # http://nghiaho.com/?page_id=671
69 | p0 = np.copy(points_static).T
70 | p1 = np.copy(points_to_transform).T
71 |
72 | t0 = -np.mean(p0, axis=1).reshape(3, 1)
73 | t1 = -np.mean(p1, axis=1).reshape(3, 1)
74 | t_final = t1 - t0
75 |
76 | p0c = p0 + t0
77 | p1c = p1 + t1
78 |
79 | covariance_matrix = p0c.dot(p1c.T)
80 | U, S, V = np.linalg.svd(covariance_matrix)
81 | R = U.dot(V)
82 | if np.linalg.det(R) < 0:
83 | R[:, 2] *= -1
84 |
85 | rms_d0 = np.sqrt(np.mean(np.linalg.norm(p0c, axis=0) ** 2))
86 | rms_d1 = np.sqrt(np.mean(np.linalg.norm(p1c, axis=0) ** 2))
87 |
88 | s = (rms_d0 / rms_d1)
89 | P = np.c_[s * np.eye(3).dot(R), t_final]
90 | return P
91 |
92 |
93 | def estimate_pose(vertices):
94 | canonical_vertices = np.load('face/utils/uv_data/canonical_vertices.npy')
95 | P = compute_similarity_transform(vertices, canonical_vertices)
96 | _, R, _ = P2sRt(P) # decompose affine matrix to s, R, t
97 | pose = matrix2angle(R)
98 |
99 | return P, pose
100 |
--------------------------------------------------------------------------------
/demo/face/utils/generate_posmap_300WLP.py:
--------------------------------------------------------------------------------
1 | # coding: UTF-8
2 | '''
3 | Generate uv position map of 300W_LP.
4 | '''
5 | import argparse
6 | import os
7 | import sys
8 | from time import time
9 |
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 | import scipy.io as sio
13 | import skimage.transform
14 | from skimage import io
15 |
16 | import face3d
17 | from face3d import mesh
18 | from face3d.morphable_model import MorphabelModel
19 |
20 | sys.path.append('..')
21 |
22 |
23 | def process_uv(uv_coords, uv_h=256, uv_w=256):
24 | uv_coords[:, 0] = uv_coords[:, 0] * (uv_w - 1)
25 | uv_coords[:, 1] = uv_coords[:, 1] * (uv_h - 1)
26 | uv_coords[:, 1] = uv_h - uv_coords[:, 1] - 1
27 | uv_coords = np.hstack((uv_coords, np.zeros((uv_coords.shape[0], 1)))) # add z
28 | return uv_coords
29 |
30 |
31 | def run_posmap_300W_LP(bfm, image_path, mat_path, save_folder, idx=0, uv_h=256, uv_w=256, image_h=256, image_w=256):
32 | # 1. load image and fitted parameters
33 | image_name = image_path.strip().split('/')[-1]
34 | image = io.imread(image_path) / 255.
35 | [h, w, c] = image.shape
36 |
37 | info = sio.loadmat(mat_path)
38 | pose_para = info['Pose_Para'].T.astype(np.float32)
39 | shape_para = info['Shape_Para'].astype(np.float32)
40 | exp_para = info['Exp_Para'].astype(np.float32)
41 |
42 | # 2. generate mesh
43 | # generate shape
44 | vertices = bfm.generate_vertices(shape_para, exp_para)
45 | # transform mesh
46 | s = pose_para[-1, 0]
47 | angles = pose_para[:3, 0]
48 | t = pose_para[3:6, 0]
49 | transformed_vertices = bfm.transform_3ddfa(vertices, s, angles, t)
50 | projected_vertices = transformed_vertices.copy() # using stantard camera & orth projection as in 3DDFA
51 | image_vertices = projected_vertices.copy()
52 | image_vertices[:, 1] = h - image_vertices[:, 1] - 1
53 |
54 | # 3. crop image with key points
55 | kpt = image_vertices[bfm.kpt_ind, :].astype(np.int32)
56 | left = np.min(kpt[:, 0])
57 | right = np.max(kpt[:, 0])
58 | top = np.min(kpt[:, 1])
59 | bottom = np.max(kpt[:, 1])
60 | center = np.array([right - (right - left) / 2.0,
61 | bottom - (bottom - top) / 2.0])
62 | old_size = (right - left + bottom - top) / 2
63 | size = int(old_size * 1.5)
64 | # random pertube. you can change the numbers
65 | marg = old_size * 0.1
66 | t_x = np.random.rand() * marg * 2 - marg
67 | t_y = np.random.rand() * marg * 2 - marg
68 | center[0] = center[0] + t_x
69 | center[1] = center[1] + t_y
70 | size = size * (np.random.rand() * 0.2 + 0.9)
71 |
72 | # crop and record the transform parameters
73 | src_pts = np.array([[center[0] - size / 2, center[1] - size / 2], [center[0] - size / 2, center[1] + size / 2],
74 | [center[0] + size / 2, center[1] - size / 2]])
75 | DST_PTS = np.array([[0, 0], [0, image_h - 1], [image_w - 1, 0]])
76 | tform = skimage.transform.estimate_transform('similarity', src_pts, DST_PTS)
77 | cropped_image = skimage.transform.warp(image, tform.inverse, output_shape=(image_h, image_w))
78 |
79 | # transform face position(image vertices) along with 2d facial image
80 | position = image_vertices.copy()
81 | position[:, 2] = 1
82 | position = np.dot(position, tform.params.T)
83 | position[:, 2] = image_vertices[:, 2] * tform.params[0, 0] # scale z
84 | position[:, 2] = position[:, 2] - np.min(position[:, 2]) # translate z
85 |
86 | # 4. uv position map: render position in uv space
87 | uv_position_map = mesh.render.render_colors(uv_coords, bfm.full_triangles, position, uv_h, uv_w, c=3)
88 |
89 | # 5. save files
90 | if not os.path.exists(os.path.join(save_folder, str(idx) + '/')):
91 | os.mkdir(os.path.join(save_folder, str(idx) + '/'))
92 |
93 | io.imsave('{}/{}/{}'.format(save_folder, idx, 'original.jpg'), np.squeeze(cropped_image))
94 | np.save('{}/{}/{}'.format(save_folder, idx, image_name.replace('jpg', 'npy')), uv_position_map)
95 | io.imsave('{}/{}/{}'.format(save_folder, idx, 'uv_posmap.jpg'),
96 | (uv_position_map) / max(image_h, image_w)) # only for show
97 |
98 | # --verify
99 | # import cv2
100 | # uv_texture_map_rec = cv2.remap(cropped_image, uv_position_map[:,:,:2].astype(np.float32), None, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT,borderValue=(0))
101 | # io.imsave('{}/{}'.format(save_folder, image_name.replace('.jpg', '_tex.jpg')), np.squeeze(uv_texture_map_rec))
102 |
103 |
104 | def generate_batch_sample(input_dir, save_folder='./300WLP'):
105 | if not os.path.exists(save_folder):
106 | os.mkdir(save_folder)
107 | # set para
108 | uv_h = uv_w = 256
109 |
110 | # load uv coords
111 | global uv_coords
112 | uv_coords = face3d.morphable_model.load.load_uv_coords('BFM/BFM_UV.mat') #
113 | uv_coords = process_uv(uv_coords, uv_h, uv_w)
114 |
115 | # load bfm
116 | bfm = MorphabelModel('BFM/BFM.mat')
117 |
118 | # Batch generating uv_map Dataset
119 | """
120 | @date: 2019/07/19
121 | Train Dataset:
122 | AFW. 10413.
123 | HELEN. 75351.
124 | LFPW. 33111.
125 | Test Dataset:
126 | IBUG. 3571.
127 |
128 | """
129 | base = 0
130 |
131 | for idx, item in enumerate(os.listdir(input_dir)):
132 | if 'jpg' in item:
133 | ab_path = os.path.join(input_dir, item)
134 | img_path = ab_path
135 | mat_path = ab_path.replace('jpg', 'mat')
136 |
137 | run_posmap_300W_LP(bfm, img_path, mat_path, save_folder, idx + base)
138 | print("Number {} uv_pos_map was generated!".format(idx))
139 |
140 |
141 | if __name__ == '__main__':
142 | parser = argparse.ArgumentParser()
143 | parser.add_argument("--save_dir", help="specify output uv_map directory.")
144 | parser.add_argument("--input_dir", help="specify input origin mat & image directory.")
145 | args = parser.parse_args()
146 |
147 | generate_batch_sample(save_folder=args.save_dir,
148 | input_dir=args.input_dir)
149 |
--------------------------------------------------------------------------------
/demo/face/utils/render_app.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .render import vis_of_vertices, render_texture
3 | from scipy import ndimage
4 |
5 | def get_visibility(vertices, triangles, h, w):
6 | triangles = triangles.T
7 | vertices_vis = vis_of_vertices(vertices.T, triangles, h, w)
8 | vertices_vis = vertices_vis.astype(bool)
9 | for k in range(2):
10 | tri_vis = vertices_vis[triangles[0,:]] | vertices_vis[triangles[1,:]] | vertices_vis[triangles[2,:]]
11 | ind = triangles[:, tri_vis]
12 | vertices_vis[ind] = True
13 | # for k in range(2):
14 | # tri_vis = vertices_vis[triangles[0,:]] & vertices_vis[triangles[1,:]] & vertices_vis[triangles[2,:]]
15 | # ind = triangles[:, tri_vis]
16 | # vertices_vis[ind] = True
17 | vertices_vis = vertices_vis.astype(np.float32) #1 for visible and 0 for non-visible
18 | return vertices_vis
19 |
20 | def get_uv_mask(vertices_vis, triangles, uv_coords, h, w, resolution):
21 | triangles = triangles.T
22 | vertices_vis = vertices_vis.astype(np.float32)
23 | uv_mask = render_texture(uv_coords.T, vertices_vis[np.newaxis, :], triangles, resolution, resolution, 1)
24 | uv_mask = np.squeeze(uv_mask > 0)
25 | uv_mask = ndimage.binary_closing(uv_mask)
26 | uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4)))
27 | uv_mask = ndimage.binary_closing(uv_mask)
28 | uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4)))
29 | uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4)))
30 | uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4)))
31 | uv_mask = uv_mask.astype(np.float32)
32 |
33 | return np.squeeze(uv_mask)
34 |
35 | def get_depth_image(vertices, triangles, h, w, isShow = False):
36 | z = vertices[:, 2:]
37 | if isShow:
38 | z = z/max(z)
39 | depth_image = render_texture(vertices.T, z.T, triangles.T, h, w, 1)
40 | return np.squeeze(depth_image)
41 |
--------------------------------------------------------------------------------
/demo/face/utils/rotate_vertices.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def frontalize(vertices):
5 | canonical_vertices = np.load('utils/uv_data/canonical_vertices.npy')
6 |
7 | vertices_homo = np.hstack((vertices, np.ones([vertices.shape[0], 1]))) # n x 4
8 | P = np.linalg.lstsq(vertices_homo, canonical_vertices)[0].T # Affine matrix. 3 x 4
9 | front_vertices = vertices_homo.dot(P.T)
10 |
11 | return front_vertices
12 |
--------------------------------------------------------------------------------
/demo/face/utils/uv_data/canonical_vertices.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/utils/uv_data/canonical_vertices.npy
--------------------------------------------------------------------------------
/demo/face/utils/uv_data/uv_kpt_ind.txt:
--------------------------------------------------------------------------------
1 | 1.500000000000000000e+01 2.200000000000000000e+01 2.600000000000000000e+01 3.200000000000000000e+01 4.500000000000000000e+01 6.700000000000000000e+01 9.100000000000000000e+01 1.120000000000000000e+02 1.280000000000000000e+02 1.430000000000000000e+02 1.640000000000000000e+02 1.880000000000000000e+02 2.100000000000000000e+02 2.230000000000000000e+02 2.290000000000000000e+02 2.330000000000000000e+02 2.400000000000000000e+02 5.800000000000000000e+01 7.100000000000000000e+01 8.500000000000000000e+01 9.700000000000000000e+01 1.060000000000000000e+02 1.490000000000000000e+02 1.580000000000000000e+02 1.700000000000000000e+02 1.840000000000000000e+02 1.970000000000000000e+02 1.280000000000000000e+02 1.280000000000000000e+02 1.280000000000000000e+02 1.280000000000000000e+02 1.170000000000000000e+02 1.220000000000000000e+02 1.280000000000000000e+02 1.330000000000000000e+02 1.380000000000000000e+02 7.800000000000000000e+01 8.600000000000000000e+01 9.500000000000000000e+01 1.020000000000000000e+02 9.600000000000000000e+01 8.700000000000000000e+01 1.530000000000000000e+02 1.600000000000000000e+02 1.690000000000000000e+02 1.770000000000000000e+02 1.680000000000000000e+02 1.590000000000000000e+02 1.080000000000000000e+02 1.160000000000000000e+02 1.240000000000000000e+02 1.280000000000000000e+02 1.310000000000000000e+02 1.390000000000000000e+02 1.460000000000000000e+02 1.370000000000000000e+02 1.320000000000000000e+02 1.280000000000000000e+02 1.230000000000000000e+02 1.180000000000000000e+02 1.100000000000000000e+02 1.220000000000000000e+02 1.280000000000000000e+02 1.330000000000000000e+02 1.450000000000000000e+02 1.320000000000000000e+02 1.280000000000000000e+02 1.230000000000000000e+02
2 | 9.600000000000000000e+01 1.180000000000000000e+02 1.410000000000000000e+02 1.650000000000000000e+02 1.830000000000000000e+02 1.900000000000000000e+02 1.880000000000000000e+02 1.870000000000000000e+02 1.930000000000000000e+02 1.870000000000000000e+02 1.880000000000000000e+02 1.900000000000000000e+02 1.830000000000000000e+02 1.650000000000000000e+02 1.410000000000000000e+02 1.180000000000000000e+02 9.600000000000000000e+01 4.900000000000000000e+01 4.200000000000000000e+01 3.900000000000000000e+01 4.000000000000000000e+01 4.200000000000000000e+01 4.200000000000000000e+01 4.000000000000000000e+01 3.900000000000000000e+01 4.200000000000000000e+01 4.900000000000000000e+01 5.900000000000000000e+01 7.300000000000000000e+01 8.600000000000000000e+01 9.600000000000000000e+01 1.110000000000000000e+02 1.130000000000000000e+02 1.150000000000000000e+02 1.130000000000000000e+02 1.110000000000000000e+02 6.700000000000000000e+01 6.000000000000000000e+01 6.100000000000000000e+01 6.500000000000000000e+01 6.800000000000000000e+01 6.900000000000000000e+01 6.500000000000000000e+01 6.100000000000000000e+01 6.000000000000000000e+01 6.700000000000000000e+01 6.900000000000000000e+01 6.800000000000000000e+01 1.420000000000000000e+02 1.310000000000000000e+02 1.270000000000000000e+02 1.280000000000000000e+02 1.270000000000000000e+02 1.310000000000000000e+02 1.420000000000000000e+02 1.480000000000000000e+02 1.500000000000000000e+02 1.500000000000000000e+02 1.500000000000000000e+02 1.480000000000000000e+02 1.410000000000000000e+02 1.350000000000000000e+02 1.340000000000000000e+02 1.350000000000000000e+02 1.420000000000000000e+02 1.430000000000000000e+02 1.420000000000000000e+02 1.430000000000000000e+02
3 |
--------------------------------------------------------------------------------
/demo/face/utils/uv_data/uv_weight_mask_gdh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/utils/uv_data/uv_weight_mask_gdh.png
--------------------------------------------------------------------------------
/demo/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/result.png
--------------------------------------------------------------------------------
/demo/tensorrt_model.py:
--------------------------------------------------------------------------------
1 | import atexit
2 |
3 | import tensorrt as trt
4 | import torch
5 |
6 |
7 | def torch_dtype_to_trt(dtype):
8 | if dtype == torch.int8:
9 | return trt.int8
10 | elif dtype == torch.int32:
11 | return trt.int32
12 | elif dtype == torch.float16:
13 | return trt.float16
14 | elif dtype == torch.float32:
15 | return trt.float32
16 | else:
17 | raise TypeError('%s is not supported by tensorrt' % dtype)
18 |
19 |
20 | def torch_dtype_from_trt(dtype):
21 | if dtype == trt.int8:
22 | return torch.int8
23 | elif dtype == trt.int32:
24 | return torch.int32
25 | elif dtype == trt.float16:
26 | return torch.float16
27 | elif dtype == trt.float32:
28 | return torch.float32
29 | else:
30 | raise TypeError('%s is not supported by torch' % dtype)
31 |
32 |
33 | def torch_device_to_trt(device):
34 | if device.type == torch.device('cuda').type:
35 | return trt.TensorLocation.DEVICE
36 | elif device.type == torch.device('cpu').type:
37 | return trt.TensorLocation.HOST
38 | else:
39 | return TypeError('%s is not supported by tensorrt' % device)
40 |
41 |
42 | def torch_device_from_trt(device):
43 | if device == trt.TensorLocation.DEVICE:
44 | return torch.device('cuda')
45 | elif device == trt.TensorLocation.HOST:
46 | return torch.device('cpu')
47 | else:
48 | return TypeError('%s is not supported by torch' % device)
49 |
50 |
51 | class TRTModel(object):
52 |
53 | def __init__(self, engine_path, input_names=None, output_names=None, final_shapes=None):
54 |
55 | # load engine
56 | self.logger = trt.Logger()
57 | self.runtime = trt.Runtime(self.logger)
58 | with open(engine_path, 'rb') as f:
59 | self.engine = self.runtime.deserialize_cuda_engine(f.read())
60 | self.context = self.engine.create_execution_context()
61 |
62 | if input_names is None:
63 | self.input_names = self._trt_input_names()
64 | else:
65 | self.input_names = input_names
66 |
67 | if output_names is None:
68 | self.output_names = self._trt_output_names()
69 | else:
70 | self.output_names = output_names
71 |
72 | self.final_shapes = final_shapes
73 |
74 | def _input_binding_indices(self):
75 | return [i for i in range(self.engine.num_bindings) if self.engine.binding_is_input(i)]
76 |
77 | def _output_binding_indices(self):
78 | return [i for i in range(self.engine.num_bindings) if not self.engine.binding_is_input(i)]
79 |
80 | def _trt_input_names(self):
81 | return [self.engine.get_binding_name(i) for i in self._input_binding_indices()]
82 |
83 | def _trt_output_names(self):
84 | return [self.engine.get_binding_name(i) for i in self._output_binding_indices()]
85 |
86 | def create_output_buffers(self, batch_size):
87 | outputs = [None] * len(self.output_names)
88 | for i, output_name in enumerate(self.output_names):
89 | idx = self.engine.get_binding_index(output_name)
90 | dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))
91 | if self.final_shapes is not None:
92 | shape = (batch_size, ) + self.final_shapes[i]
93 | else:
94 | shape = (batch_size, ) + tuple(self.engine.get_binding_shape(idx))
95 | device = torch_device_from_trt(self.engine.get_location(idx))
96 | output = torch.empty(size=shape, dtype=dtype, device=device)
97 | outputs[i] = output
98 | return outputs
99 |
100 | def execute(self, *inputs):
101 | batch_size = inputs[0].shape[0]
102 |
103 | bindings = [None] * (len(self.input_names) + len(self.output_names))
104 |
105 | # map input bindings
106 | inputs_torch = [None] * len(self.input_names)
107 | for i, name in enumerate(self.input_names):
108 | idx = self.engine.get_binding_index(name)
109 |
110 | # convert to appropriate format
111 | inputs_torch[i] = torch.from_numpy(inputs[i])
112 | inputs_torch[i] = inputs_torch[i].to(torch_device_from_trt(self.engine.get_location(idx)))
113 | inputs_torch[i] = inputs_torch[i].type(torch_dtype_from_trt(self.engine.get_binding_dtype(idx)))
114 |
115 | bindings[idx] = int(inputs_torch[i].data_ptr())
116 |
117 | output_buffers = self.create_output_buffers(batch_size)
118 |
119 | # map output bindings
120 | for i, name in enumerate(self.output_names):
121 | idx = self.engine.get_binding_index(name)
122 | bindings[idx] = int(output_buffers[i].data_ptr())
123 |
124 | self.context.execute(batch_size, bindings)
125 |
126 | outputs = [buffer for buffer in output_buffers]
127 |
128 | return outputs
129 |
130 | def __call__(self, *inputs):
131 | return self.execute(*inputs)
132 |
--------------------------------------------------------------------------------
/demo/tracking/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/tracking/__init__.py
--------------------------------------------------------------------------------
/demo/tracking/deep_sort.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import numpy as np
4 |
5 | from .feature_extractor import Extractor
6 | from .sort.detection import Detection
7 | from .sort.nn_matching import NearestNeighborDistanceMetric
8 | from .sort.preprocessing import non_max_suppression
9 | from .sort.tracker import Tracker
10 |
11 |
12 | class DeepSort(object):
13 | def __init__(self, model_path):
14 | self.min_confidence = 0.3
15 | self.nms_max_overlap = 1.0
16 |
17 | self.extractor = Extractor(model_path, use_cuda=True)
18 |
19 | max_cosine_distance = 0.2
20 | nn_budget = 100
21 | metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
22 | self.tracker = Tracker(metric)
23 |
24 | def update(self, bbox_xywh, confidences, ori_img):
25 | self.height, self.width = ori_img.shape[:2]
26 |
27 |
28 | # generate detections
29 | try :
30 | features = self._get_features(bbox_xywh, ori_img)
31 | except :
32 | print('a')
33 | detections = [Detection(bbox_xywh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence]
34 |
35 |
36 | # run on non-maximum supression
37 | boxes = np.array([d.tlwh for d in detections])
38 | scores = np.array([d.confidence for d in detections])
39 | indices = non_max_suppression( boxes, self.nms_max_overlap, scores)
40 | detections = [detections[i] for i in indices]
41 |
42 |
43 | # update tracker
44 | self.tracker.predict()
45 | self.tracker.update(detections)
46 |
47 |
48 | # output bbox identities
49 | outputs = []
50 | for track in self.tracker.tracks:
51 | if not track.is_confirmed() or track.time_since_update > 1:
52 | continue
53 | box = track.to_tlwh()
54 | x1,y1,x2,y2 = self._xywh_to_xyxy_centernet(box)
55 | track_id = track.track_id
56 | outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int))
57 | if len(outputs) > 0:
58 | outputs = np.stack(outputs,axis=0)
59 |
60 |
61 | return outputs
62 |
63 |
64 |
65 | # for centernet (x1,x2 w,h -> x1,y1,x2,y2)
66 | def _xywh_to_xyxy_centernet(self, bbox_xywh):
67 | x1,y1,w,h = bbox_xywh
68 | x1 = max(x1,0)
69 | y1 = max(y1,0)
70 | x2 = min(int(x1+w),self.width-1)
71 | y2 = min(int(y1+h),self.height-1)
72 | return int(x1),int(y1),x2,y2
73 |
74 | # for yolo (centerx,centerx, w,h -> x1,y1,x2,y2)
75 | def _xywh_to_xyxy_yolo(self, bbox_xywh):
76 | x,y,w,h = bbox_xywh
77 | x1 = max(int(x-w/2),0)
78 | x2 = min(int(x+w/2),self.width-1)
79 | y1 = max(int(y-h/2),0)
80 | y2 = min(int(y+h/2),self.height-1)
81 | return x1,y1,x2,y2
82 |
83 | def _get_features(self, bbox_xywh, ori_img):
84 | features = []
85 | for box in bbox_xywh:
86 | x1,y1,x2,y2 = self._xywh_to_xyxy_centernet(box)
87 | im = ori_img[y1:y2,x1:x2]
88 | feature = self.extractor(im)[0]
89 | features.append(feature)
90 | if len(features):
91 | features = np.stack(features, axis=0)
92 | else:
93 | features = np.array([])
94 | return features
95 |
96 | if __name__ == '__main__':
97 | pass
98 |
--------------------------------------------------------------------------------
/demo/tracking/feature_extractor.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import torch
4 | import torchvision.transforms as transforms
5 |
6 | from .model import Net
7 |
8 |
9 | class Extractor(object):
10 | def __init__(self, model_path, use_cuda=True):
11 | self.net = Net(reid=True)
12 | self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
13 | state_dict = torch.load(model_path)['net_dict']
14 | self.net.load_state_dict(state_dict)
15 | print("Loading weights from {}... Done!".format(model_path))
16 | self.net.to(self.device)
17 | self.norm = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
18 |
19 | def __call__(self, img):
20 | assert isinstance(img, np.ndarray), "type error"
21 | img = img.astype(np.float)#/255.
22 | img = cv2.resize(img, (64,128))
23 | img = torch.from_numpy(img).float().permute(2,0,1)
24 | img = self.norm(img).unsqueeze(0)
25 | with torch.no_grad():
26 | img = img.to(self.device)
27 | feature = self.net(img)
28 | return feature.cpu().numpy()
29 |
30 |
31 | if __name__ == '__main__':
32 | img = cv2.imread("demo.jpg")[:,:,(2,1,0)]
33 | extr = Extractor("checkpoint/ckpt.t7")
34 | feature = extr(img)
35 | print(feature.shape)
36 |
--------------------------------------------------------------------------------
/demo/tracking/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class BasicBlock(nn.Module):
7 | def __init__(self, c_in, c_out,is_downsample=False):
8 | super(BasicBlock,self).__init__()
9 | self.is_downsample = is_downsample
10 | if is_downsample:
11 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
12 | else:
13 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
14 | self.bn1 = nn.BatchNorm2d(c_out)
15 | self.relu = nn.ReLU(True)
16 | self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
17 | self.bn2 = nn.BatchNorm2d(c_out)
18 | if is_downsample:
19 | self.downsample = nn.Sequential(
20 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
21 | nn.BatchNorm2d(c_out)
22 | )
23 | elif c_in != c_out:
24 | self.downsample = nn.Sequential(
25 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
26 | nn.BatchNorm2d(c_out)
27 | )
28 | self.is_downsample = True
29 |
30 | def forward(self,x):
31 | y = self.conv1(x)
32 | y = self.bn1(y)
33 | y = self.relu(y)
34 | y = self.conv2(y)
35 | y = self.bn2(y)
36 | if self.is_downsample:
37 | x = self.downsample(x)
38 | return F.relu(x.add(y),True)
39 |
40 | def make_layers(c_in,c_out,repeat_times, is_downsample=False):
41 | blocks = []
42 | for i in range(repeat_times):
43 | if i ==0:
44 | blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
45 | else:
46 | blocks += [BasicBlock(c_out,c_out),]
47 | return nn.Sequential(*blocks)
48 |
49 | class Net(nn.Module):
50 | def __init__(self, num_classes=751 ,reid=False):
51 | super(Net,self).__init__()
52 | # 3 128 64
53 | self.conv = nn.Sequential(
54 | nn.Conv2d(3,64,3,stride=1,padding=1),
55 | nn.BatchNorm2d(64),
56 | nn.ReLU(inplace=True),
57 | # nn.Conv2d(32,32,3,stride=1,padding=1),
58 | # nn.BatchNorm2d(32),
59 | # nn.ReLU(inplace=True),
60 | nn.MaxPool2d(3,2,padding=1),
61 | )
62 | # 32 64 32
63 | self.layer1 = make_layers(64,64,2,False)
64 | # 32 64 32
65 | self.layer2 = make_layers(64,128,2,True)
66 | # 64 32 16
67 | self.layer3 = make_layers(128,256,2,True)
68 | # 128 16 8
69 | self.layer4 = make_layers(256,512,2,True)
70 | # 256 8 4
71 | self.avgpool = nn.AvgPool2d((8,4),1)
72 | # 256 1 1
73 | self.reid = reid
74 | self.classifier = nn.Sequential(
75 | nn.Linear(512, 256),
76 | nn.BatchNorm1d(256),
77 | nn.ReLU(inplace=True),
78 | nn.Dropout(),
79 | nn.Linear(256, num_classes),
80 | )
81 |
82 | def forward(self, x):
83 | x = self.conv(x)
84 | x = self.layer1(x)
85 | x = self.layer2(x)
86 | x = self.layer3(x)
87 | x = self.layer4(x)
88 | x = self.avgpool(x)
89 | x = x.view(x.size(0),-1)
90 | # B x 128
91 | if self.reid:
92 | x = x.div(x.norm(p=2,dim=1,keepdim=True))
93 | return x
94 | # classifier
95 | x = self.classifier(x)
96 | return x
97 |
98 |
99 | if __name__ == '__main__':
100 | net = Net()
101 | x = torch.randn(4,3,128,64)
102 | y = net(x)
103 | import ipdb; ipdb.set_trace()
104 |
--------------------------------------------------------------------------------
/demo/tracking/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/tracking/sort/__init__.py
--------------------------------------------------------------------------------
/demo/tracking/sort/detection.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 |
4 |
5 | class Detection(object):
6 | """
7 | This class represents a bounding box detection in a single image.
8 |
9 | Parameters
10 | ----------
11 | tlwh : array_like
12 | Bounding box in format `(x, y, w, h)`.
13 | confidence : float
14 | Detector confidence score.
15 | feature : array_like
16 | A feature vector that describes the object contained in this image.
17 |
18 | Attributes
19 | ----------
20 | tlwh : ndarray
21 | Bounding box in format `(top left x, top left y, width, height)`.
22 | confidence : ndarray
23 | Detector confidence score.
24 | feature : ndarray | NoneType
25 | A feature vector that describes the object contained in this image.
26 |
27 | """
28 |
29 | def __init__(self, tlwh, confidence, feature):
30 | self.tlwh = np.asarray(tlwh, dtype=np.float)
31 | self.confidence = float(confidence)
32 | self.feature = np.asarray(feature, dtype=np.float32)
33 |
34 | def to_tlbr(self):
35 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
36 | `(top left, bottom right)`.
37 | """
38 | ret = self.tlwh.copy()
39 | ret[2:] += ret[:2]
40 | return ret
41 |
42 | def to_xyah(self):
43 | """Convert bounding box to format `(center x, center y, aspect ratio,
44 | height)`, where the aspect ratio is `width / height`.
45 | """
46 | ret = self.tlwh.copy()
47 | ret[:2] += ret[2:] / 2
48 | ret[2] /= ret[3]
49 | return ret
50 |
--------------------------------------------------------------------------------
/demo/tracking/sort/iou_matching.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 |
4 | import numpy as np
5 |
6 | from . import linear_assignment
7 |
8 |
9 | def iou(bbox, candidates):
10 | """Computer intersection over union.
11 |
12 | Parameters
13 | ----------
14 | bbox : ndarray
15 | A bounding box in format `(top left x, top left y, width, height)`.
16 | candidates : ndarray
17 | A matrix of candidate bounding boxes (one per row) in the same format
18 | as `bbox`.
19 |
20 | Returns
21 | -------
22 | ndarray
23 | The intersection over union in [0, 1] between the `bbox` and each
24 | candidate. A higher score means a larger fraction of the `bbox` is
25 | occluded by the candidate.
26 |
27 | """
28 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
29 | candidates_tl = candidates[:, :2]
30 | candidates_br = candidates[:, :2] + candidates[:, 2:]
31 |
32 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
33 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
34 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
35 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
36 | wh = np.maximum(0., br - tl)
37 |
38 | area_intersection = wh.prod(axis=1)
39 | area_bbox = bbox[2:].prod()
40 | area_candidates = candidates[:, 2:].prod(axis=1)
41 | return area_intersection / (area_bbox + area_candidates - area_intersection)
42 |
43 |
44 | def iou_cost(tracks, detections, track_indices=None,
45 | detection_indices=None):
46 | """An intersection over union distance metric.
47 |
48 | Parameters
49 | ----------
50 | tracks : List[deep_sort.track.Track]
51 | A list of tracks.
52 | detections : List[deep_sort.detection.Detection]
53 | A list of detections.
54 | track_indices : Optional[List[int]]
55 | A list of indices to tracks that should be matched. Defaults to
56 | all `tracks`.
57 | detection_indices : Optional[List[int]]
58 | A list of indices to detections that should be matched. Defaults
59 | to all `detections`.
60 |
61 | Returns
62 | -------
63 | ndarray
64 | Returns a cost matrix of shape
65 | len(track_indices), len(detection_indices) where entry (i, j) is
66 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
67 |
68 | """
69 | if track_indices is None:
70 | track_indices = np.arange(len(tracks))
71 | if detection_indices is None:
72 | detection_indices = np.arange(len(detections))
73 |
74 | cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
75 | for row, track_idx in enumerate(track_indices):
76 | if tracks[track_idx].time_since_update > 1:
77 | cost_matrix[row, :] = linear_assignment.INFTY_COST
78 | continue
79 |
80 | bbox = tracks[track_idx].to_tlwh()
81 | candidates = np.asarray([detections[i].tlwh for i in detection_indices])
82 | cost_matrix[row, :] = 1. - iou(bbox, candidates)
83 | return cost_matrix
84 |
--------------------------------------------------------------------------------
/demo/tracking/sort/preprocessing.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import cv2
3 | import numpy as np
4 |
5 |
6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None):
7 | """Suppress overlapping detections.
8 |
9 | Original code from [1]_ has been adapted to include confidence score.
10 |
11 | .. [1] http://www.pyimagesearch.com/2015/02/16/
12 | faster-non-maximum-suppression-python/
13 |
14 | Examples
15 | --------
16 |
17 | >>> boxes = [d.roi for d in detections]
18 | >>> scores = [d.confidence for d in detections]
19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20 | >>> detections = [detections[i] for i in indices]
21 |
22 | Parameters
23 | ----------
24 | boxes : ndarray
25 | Array of ROIs (x, y, width, height).
26 | max_bbox_overlap : float
27 | ROIs that overlap more than this values are suppressed.
28 | scores : Optional[array_like]
29 | Detector confidence score.
30 |
31 | Returns
32 | -------
33 | List[int]
34 | Returns indices of detections that have survived non-maxima suppression.
35 |
36 | """
37 | if len(boxes) == 0:
38 | return []
39 |
40 | boxes = boxes.astype(np.float)
41 | pick = []
42 |
43 | x1 = boxes[:, 0]
44 | y1 = boxes[:, 1]
45 | x2 = boxes[:, 2] + boxes[:, 0]
46 | y2 = boxes[:, 3] + boxes[:, 1]
47 |
48 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
49 | if scores is not None:
50 | idxs = np.argsort(scores)
51 | else:
52 | idxs = np.argsort(y2)
53 |
54 | while len(idxs) > 0:
55 | last = len(idxs) - 1
56 | i = idxs[last]
57 | pick.append(i)
58 |
59 | xx1 = np.maximum(x1[i], x1[idxs[:last]])
60 | yy1 = np.maximum(y1[i], y1[idxs[:last]])
61 | xx2 = np.minimum(x2[i], x2[idxs[:last]])
62 | yy2 = np.minimum(y2[i], y2[idxs[:last]])
63 |
64 | w = np.maximum(0, xx2 - xx1 + 1)
65 | h = np.maximum(0, yy2 - yy1 + 1)
66 |
67 | overlap = (w * h) / area[idxs[:last]]
68 |
69 | idxs = np.delete(
70 | idxs, np.concatenate(
71 | ([last], np.where(overlap > max_bbox_overlap)[0])))
72 |
73 | return pick
74 |
--------------------------------------------------------------------------------
/demo/tracking/sort/track.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 |
3 |
4 | class TrackState:
5 | """
6 | Enumeration type for the single target track state. Newly created tracks are
7 | classified as `tentative` until enough evidence has been collected. Then,
8 | the track state is changed to `confirmed`. Tracks that are no longer alive
9 | are classified as `deleted` to mark them for removal from the set of active
10 | tracks.
11 |
12 | """
13 |
14 | Tentative = 1
15 | Confirmed = 2
16 | Deleted = 3
17 |
18 |
19 | class Track:
20 | """
21 | A single target track with state space `(x, y, a, h)` and associated
22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the
23 | aspect ratio and `h` is the height.
24 |
25 | Parameters
26 | ----------
27 | mean : ndarray
28 | Mean vector of the initial state distribution.
29 | covariance : ndarray
30 | Covariance matrix of the initial state distribution.
31 | track_id : int
32 | A unique track identifier.
33 | n_init : int
34 | Number of consecutive detections before the track is confirmed. The
35 | track state is set to `Deleted` if a miss occurs within the first
36 | `n_init` frames.
37 | max_age : int
38 | The maximum number of consecutive misses before the track state is
39 | set to `Deleted`.
40 | feature : Optional[ndarray]
41 | Feature vector of the detection this track originates from. If not None,
42 | this feature is added to the `features` cache.
43 |
44 | Attributes
45 | ----------
46 | mean : ndarray
47 | Mean vector of the initial state distribution.
48 | covariance : ndarray
49 | Covariance matrix of the initial state distribution.
50 | track_id : int
51 | A unique track identifier.
52 | hits : int
53 | Total number of measurement updates.
54 | age : int
55 | Total number of frames since first occurance.
56 | time_since_update : int
57 | Total number of frames since last measurement update.
58 | state : TrackState
59 | The current track state.
60 | features : List[ndarray]
61 | A cache of features. On each measurement update, the associated feature
62 | vector is added to this list.
63 |
64 | """
65 |
66 | def __init__(self, mean, covariance, track_id, n_init, max_age,
67 | feature=None):
68 | self.mean = mean
69 | self.covariance = covariance
70 | self.track_id = track_id
71 | self.hits = 1
72 | self.age = 1
73 | self.time_since_update = 0
74 |
75 | self.state = TrackState.Tentative
76 | self.features = []
77 | if feature is not None:
78 | self.features.append(feature)
79 |
80 | self._n_init = n_init
81 | self._max_age = max_age
82 |
83 | def to_tlwh(self):
84 | """Get current position in bounding box format `(top left x, top left y,
85 | width, height)`.
86 |
87 | Returns
88 | -------
89 | ndarray
90 | The bounding box.
91 |
92 | """
93 | ret = self.mean[:4].copy()
94 | ret[2] *= ret[3]
95 | ret[:2] -= ret[2:] / 2
96 | return ret
97 |
98 | def to_tlbr(self):
99 | """Get current position in bounding box format `(min x, miny, max x,
100 | max y)`.
101 |
102 | Returns
103 | -------
104 | ndarray
105 | The bounding box.
106 |
107 | """
108 | ret = self.to_tlwh()
109 | ret[2:] = ret[:2] + ret[2:]
110 | return ret
111 |
112 | def predict(self, kf):
113 | """Propagate the state distribution to the current time step using a
114 | Kalman filter prediction step.
115 |
116 | Parameters
117 | ----------
118 | kf : kalman_filter.KalmanFilter
119 | The Kalman filter.
120 |
121 | """
122 | self.mean, self.covariance = kf.predict(self.mean, self.covariance)
123 | self.age += 1
124 | self.time_since_update += 1
125 |
126 | def update(self, kf, detection):
127 | """Perform Kalman filter measurement update step and update the feature
128 | cache.
129 |
130 | Parameters
131 | ----------
132 | kf : kalman_filter.KalmanFilter
133 | The Kalman filter.
134 | detection : Detection
135 | The associated detection.
136 |
137 | """
138 | self.mean, self.covariance = kf.update(
139 | self.mean, self.covariance, detection.to_xyah())
140 | self.features.append(detection.feature)
141 |
142 | self.hits += 1
143 | self.time_since_update = 0
144 | if self.state == TrackState.Tentative and self.hits >= self._n_init:
145 | self.state = TrackState.Confirmed
146 |
147 | def mark_missed(self):
148 | """Mark this track as missed (no association at the current time step).
149 | """
150 | if self.state == TrackState.Tentative:
151 | self.state = TrackState.Deleted
152 | elif self.time_since_update > self._max_age:
153 | self.state = TrackState.Deleted
154 |
155 | def is_tentative(self):
156 | """Returns True if this track is tentative (unconfirmed).
157 | """
158 | return self.state == TrackState.Tentative
159 |
160 | def is_confirmed(self):
161 | """Returns True if this track is confirmed."""
162 | return self.state == TrackState.Confirmed
163 |
164 | def is_deleted(self):
165 | """Returns True if this track is dead and should be deleted."""
166 | return self.state == TrackState.Deleted
167 |
--------------------------------------------------------------------------------
/demo/tracking/sort/tracker.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 |
4 | import numpy as np
5 |
6 | from . import iou_matching, kalman_filter, linear_assignment
7 | from .track import Track
8 |
9 |
10 | class Tracker:
11 | """
12 | This is the multi-target tracker.
13 |
14 | Parameters
15 | ----------
16 | metric : nn_matching.NearestNeighborDistanceMetric
17 | A distance metric for measurement-to-track association.
18 | max_age : int
19 | Maximum number of missed misses before a track is deleted.
20 | n_init : int
21 | Number of consecutive detections before the track is confirmed. The
22 | track state is set to `Deleted` if a miss occurs within the first
23 | `n_init` frames.
24 |
25 | Attributes
26 | ----------
27 | metric : nn_matching.NearestNeighborDistanceMetric
28 | The distance metric used for measurement to track association.
29 | max_age : int
30 | Maximum number of missed misses before a track is deleted.
31 | n_init : int
32 | Number of frames that a track remains in initialization phase.
33 | kf : kalman_filter.KalmanFilter
34 | A Kalman filter to filter target trajectories in image space.
35 | tracks : List[Track]
36 | The list of active tracks at the current time step.
37 |
38 | """
39 |
40 | def __init__(self, metric, max_iou_distance=0.7, max_age=30, n_init=3):
41 | self.metric = metric
42 | self.max_iou_distance = max_iou_distance
43 | self.max_age = max_age
44 | self.n_init = n_init
45 |
46 | self.kf = kalman_filter.KalmanFilter()
47 | self.tracks = []
48 | self._next_id = 1
49 |
50 | def predict(self):
51 | """Propagate track state distributions one time step forward.
52 |
53 | This function should be called once every time step, before `update`.
54 | """
55 | for track in self.tracks:
56 | track.predict(self.kf)
57 |
58 | def update(self, detections):
59 | """Perform measurement update and track management.
60 |
61 | Parameters
62 | ----------
63 | detections : List[deep_sort.detection.Detection]
64 | A list of detections at the current time step.
65 |
66 | """
67 | # Run matching cascade.
68 | matches, unmatched_tracks, unmatched_detections = \
69 | self._match(detections)
70 |
71 | # Update track set.
72 | for track_idx, detection_idx in matches:
73 | self.tracks[track_idx].update(
74 | self.kf, detections[detection_idx])
75 | for track_idx in unmatched_tracks:
76 | self.tracks[track_idx].mark_missed()
77 | for detection_idx in unmatched_detections:
78 | self._initiate_track(detections[detection_idx])
79 | self.tracks = [t for t in self.tracks if not t.is_deleted()]
80 |
81 | # Update distance metric.
82 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
83 | features, targets = [], []
84 | for track in self.tracks:
85 | if not track.is_confirmed():
86 | continue
87 | features += track.features
88 | targets += [track.track_id for _ in track.features]
89 | track.features = []
90 | self.metric.partial_fit(
91 | np.asarray(features), np.asarray(targets), active_targets)
92 |
93 | def _match(self, detections):
94 |
95 | def gated_metric(tracks, dets, track_indices, detection_indices):
96 | features = np.array([dets[i].feature for i in detection_indices])
97 | targets = np.array([tracks[i].track_id for i in track_indices])
98 | cost_matrix = self.metric.distance(features, targets)
99 | cost_matrix = linear_assignment.gate_cost_matrix(
100 | self.kf, cost_matrix, tracks, dets, track_indices,
101 | detection_indices)
102 |
103 | return cost_matrix
104 |
105 | # Split track set into confirmed and unconfirmed tracks.
106 | confirmed_tracks = [
107 | i for i, t in enumerate(self.tracks) if t.is_confirmed()]
108 | unconfirmed_tracks = [
109 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
110 |
111 | # Associate confirmed tracks using appearance features.
112 | matches_a, unmatched_tracks_a, unmatched_detections = \
113 | linear_assignment.matching_cascade(
114 | gated_metric, self.metric.matching_threshold, self.max_age,
115 | self.tracks, detections, confirmed_tracks)
116 |
117 | # Associate remaining tracks together with unconfirmed tracks using IOU.
118 | iou_track_candidates = unconfirmed_tracks + [
119 | k for k in unmatched_tracks_a if
120 | self.tracks[k].time_since_update == 1]
121 | unmatched_tracks_a = [
122 | k for k in unmatched_tracks_a if
123 | self.tracks[k].time_since_update != 1]
124 | matches_b, unmatched_tracks_b, unmatched_detections = \
125 | linear_assignment.min_cost_matching(
126 | iou_matching.iou_cost, self.max_iou_distance, self.tracks,
127 | detections, iou_track_candidates, unmatched_detections)
128 |
129 | matches = matches_a + matches_b
130 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
131 | return matches, unmatched_tracks, unmatched_detections
132 |
133 | def _initiate_track(self, detection):
134 | mean, covariance = self.kf.initiate(detection.to_xyah())
135 | self.tracks.append(Track(
136 | mean, covariance, self._next_id, self.n_init, self.max_age,
137 | detection.feature))
138 | self._next_id += 1
139 |
--------------------------------------------------------------------------------
/demo/tracking/util.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | COLORS_10 =[(144,238,144),(178, 34, 34),(221,160,221),( 0,255, 0),( 0,128, 0),(210,105, 30),(220, 20, 60),
5 | (192,192,192),(255,228,196),( 50,205, 50),(139, 0,139),(100,149,237),(138, 43,226),(238,130,238),
6 | (255, 0,255),( 0,100, 0),(127,255, 0),(255, 0,255),( 0, 0,205),(255,140, 0),(255,239,213),
7 | (199, 21,133),(124,252, 0),(147,112,219),(106, 90,205),(176,196,222),( 65,105,225),(173,255, 47),
8 | (255, 20,147),(219,112,147),(186, 85,211),(199, 21,133),(148, 0,211),(255, 99, 71),(144,238,144),
9 | (255,255, 0),(230,230,250),( 0, 0,255),(128,128, 0),(189,183,107),(255,255,224),(128,128,128),
10 | (105,105,105),( 64,224,208),(205,133, 63),( 0,128,128),( 72,209,204),(139, 69, 19),(255,245,238),
11 | (250,240,230),(152,251,152),( 0,255,255),(135,206,235),( 0,191,255),(176,224,230),( 0,250,154),
12 | (245,255,250),(240,230,140),(245,222,179),( 0,139,139),(143,188,143),(255, 0, 0),(240,128,128),
13 | (102,205,170),( 60,179,113),( 46,139, 87),(165, 42, 42),(178, 34, 34),(175,238,238),(255,248,220),
14 | (218,165, 32),(255,250,240),(253,245,230),(244,164, 96),(210,105, 30)]
15 |
16 |
17 | def draw_bbox(img, box, cls_name, identity=None, offset=(0,0)):
18 | '''
19 | draw box of an id
20 | '''
21 | x1,y1,x2,y2 = [int(i+offset[idx%2]) for idx,i in enumerate(box)]
22 | # set color and label text
23 | color = COLORS_10[identity%len(COLORS_10)] if identity is not None else COLORS_10[0]
24 | label = '{} {}'.format(cls_name, identity)
25 | # box text and bar
26 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
27 | cv2.rectangle(img,(x1, y1),(x2,y2),color,2)
28 | cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
29 | cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 1)
30 | return img
31 |
32 |
33 | def draw_bboxes(img, bbox, identities=None, offset=(0,0)):
34 | for i,box in enumerate(bbox):
35 | x1,y1,x2,y2 = [int(i) for i in box]
36 | x1 += offset[0]
37 | x2 += offset[0]
38 | y1 += offset[1]
39 | y2 += offset[1]
40 | # box text and bar
41 | id = int(identities[i]) if identities is not None else 0
42 | color = COLORS_10[id%len(COLORS_10)]
43 | label = '{} {}'.format("object", id)
44 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
45 | cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
46 | cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
47 | cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
48 | return img
49 |
50 | def softmax(x):
51 | assert isinstance(x, np.ndarray), "expect x be a numpy array"
52 | x_exp = np.exp(x*5)
53 | return x_exp/x_exp.sum()
54 |
55 | def softmin(x):
56 | assert isinstance(x, np.ndarray), "expect x be a numpy array"
57 | x_exp = np.exp(-x)
58 | return x_exp/x_exp.sum()
59 |
60 |
61 |
62 | if __name__ == '__main__':
63 | x = np.arange(10)/10.
64 | x = np.array([0.5,0.5,0.5,0.6,1.])
65 | y = softmax(x)
66 | z = softmin(x)
67 | import ipdb; ipdb.set_trace()
68 |
--------------------------------------------------------------------------------
/experiments/darknet53_512x512.yaml:
--------------------------------------------------------------------------------
1 | SAMPLE_METHOD: 'coco_hp'
2 | DATA_DIR: '/home/tensorboy/data'
3 | EXP_ID: 'darknet53'
4 | DEBUG: 0
5 | DEBUG_THEME: 'white'
6 | SEED: 317
7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/darknet53'
8 | LOG_DIR: ''
9 | EXPERIMENT_NAME: ''
10 | GPUS: [0, 1, 2, 3]
11 | WORKERS: 4
12 | PRINT_FREQ: 0
13 | PIN_MEMORY: true
14 | RANK: 0
15 | SAVE_RESULTS: true
16 | TASK : 'multi_pose'
17 |
18 | CUDNN:
19 | BENCHMARK: true
20 |
21 | MODEL:
22 |
23 | CENTER_THRESH: 0.1
24 | NUM_CLASSES: 1
25 | NAME: 'darknet'
26 | HEADS_NAME: 'keypoint'
27 | HEADS_NUM: [1, 2, 34, 2, 17, 2]
28 | INTERMEDIATE_CHANNEL: 256
29 | HEAD_CONV: 256
30 | DOWN_RATIO: 4
31 | NUM_STACKS: 1
32 | INPUT_RES: 512
33 | OUTPUT_RES: 128
34 | INPUT_H: 512
35 | INPUT_W: 512
36 | PAD: 31
37 | NUM_KEYPOINTS: 17
38 | TAG_PER_JOINT: true
39 | TARGET_TYPE: 'gaussian'
40 | SIGMA: 2
41 |
42 | LOSS:
43 | METRIC: 'loss'
44 | MSE_LOSS: false
45 | REG_LOSS: 'l1'
46 | USE_OHKM: false
47 | TOPK: 8
48 | USE_TARGET_WEIGHT: true
49 | USE_DIFFERENT_JOINTS_WEIGHT: false
50 | HP_WEIGHT: 1.
51 | HM_HP_WEIGHT: 1.
52 | DENSE_HP: false
53 | HM_HP: true
54 | REG_BBOX: true
55 | WH_WEIGHT: 0.1
56 | REG_OFFSET: true
57 | OFF_WEIGHT: 1.
58 | REG_HP_OFFSET: true
59 |
60 | DATASET:
61 | DATASET: 'coco_hp'
62 | TRAIN_SET: 'train'
63 | TEST_SET: 'valid'
64 | TRAIN_IMAGE_DIR: 'images/train2017'
65 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
66 | VAL_IMAGE_DIR: 'images/val2017'
67 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
68 |
69 | # training data augmentation
70 | MEAN: [0.408, 0.447, 0.470]
71 | STD: [0.289, 0.274, 0.278]
72 | SHIFT: 0.1
73 | SCALE: 0.4
74 | ROTATE: 0.
75 | # for pose
76 | AUG_ROT: 0.
77 | FLIP: 0.5
78 | NO_COLOR_AUG: false
79 |
80 | ROT_FACTOR: 30
81 | SCALE_MIN: 0.5
82 | SCALE_MAX: 1.1
83 | IMAGE_SIZE: 512
84 | RANDOM_CROP: true
85 |
86 | TRAIN:
87 | OPTIMIZER: 'adam'
88 | DISTRIBUTE: true
89 | LOCAL_RANK: 0
90 | HIDE_DATA_TIME: false
91 | SAVE_ALL_MODEL: false
92 | RESUME: false
93 | LR_FACTOR: 0.1
94 | LR_STEP: [270, 300]
95 | EPOCHS: 320
96 | NUM_ITERS: -1
97 | LR: 1.875e-4
98 | BATCH_SIZE: 48
99 | MASTER_BATCH_SIZE: 12
100 |
101 | MOMENTUM: 0.9
102 | WD: 0.0001
103 | NESTEROV: false
104 | GAMMA1: 0.99
105 | GAMMA2: 0.0
106 |
107 | # 'apply and reset gradients every n batches'
108 | STRIDE_APPLY: 1
109 |
110 | CHECKPOINT: ''
111 | SHUFFLE: true
112 | VAL_INTERVALS: 1
113 | TRAINVAL: false
114 |
115 | TEST:
116 | # Test Model Epoch
117 | MODEL_PATH: '/home/tensorboy/data/centerpose/darknet53/model_best.pth'
118 | TASK: 'multi_pose'
119 | FLIP_TEST: true
120 |
121 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
122 | MODEL_FILE: ''
123 | TEST_SCALES: [1]
124 | IMAGE_THRE: 0.1
125 | TOPK: 100
126 | NMS: false
127 | NMS_THRE: 0.5
128 | NOT_PREFETCH_TEST: false
129 | FIX_RES: true
130 |
131 | SOFT_NMS: false
132 | OKS_THRE: 0.5
133 | VIS_THRESH: 0.3
134 | KEYPOINT_THRESH: 0.2
135 | NUM_MIN_KPT: 4
136 | THRESH_HUMAN: 0.5
137 |
138 | EVAL_ORACLE_HM: false
139 | EVAL_ORACLE_WH: false
140 | EVAL_ORACLE_OFFSET: false
141 | EVAL_ORACLE_KPS: false
142 | EVAL_ORACLE_HMHP: false
143 | EVAL_ORACLE_HP_OFFSET: false
144 | EVAL_ORACLE_DEP: false
145 |
--------------------------------------------------------------------------------
/experiments/dla_34_512x512.yaml:
--------------------------------------------------------------------------------
1 | SAMPLE_METHOD: 'coco_hp'
2 | DATA_DIR: '/data'
3 | EXP_ID: 'sgd_lr6e3'
4 | DEBUG: 0
5 | DEBUG_THEME: 'white'
6 | SEED: 317
7 | OUTPUT_DIR: '/data/centerpose/dla34_lr6e3'
8 | LOG_DIR: ''
9 | EXPERIMENT_NAME: ''
10 | GPUS: [0, 1, 2, 3]
11 | WORKERS: 8
12 | PRINT_FREQ: 0
13 | PIN_MEMORY: true
14 | RANK: 0
15 | SAVE_RESULTS: true
16 |
17 | CUDNN:
18 | BENCHMARK: true
19 |
20 | MODEL:
21 | INIT_WEIGHTS: false
22 | PRETRAINED: ''
23 | CENTER_THRESH: 0.1
24 | NUM_CLASSES: 1
25 | NAME: 'dla_34'
26 | HEADS_NAME: 'keypoint'
27 | HEADS_NUM: [1, 2, 34, 2, 17, 2]
28 | HEAD_CONV: 256
29 | INTERMEDIATE_CHANNEL: 64
30 | DOWN_RATIO: 4
31 | NUM_STACKS: 1
32 | INPUT_RES: 512
33 | OUTPUT_RES: 128
34 | INPUT_H: 512
35 | INPUT_W: 512
36 | PAD: 31
37 | NUM_KEYPOINTS: 17
38 | TAG_PER_JOINT: true
39 | TARGET_TYPE: 'gaussian'
40 | SIGMA: 2
41 |
42 | LOSS:
43 | METRIC: 'loss'
44 | MSE_LOSS: false
45 | REG_LOSS: 'l1'
46 | USE_OHKM: false
47 | TOPK: 8
48 | USE_TARGET_WEIGHT: true
49 | USE_DIFFERENT_JOINTS_WEIGHT: false
50 | HP_WEIGHT: 1.
51 | HM_HP_WEIGHT: 1.
52 | DENSE_HP: false
53 | HM_HP: true
54 | REG_BBOX: true
55 | WH_WEIGHT: 0.1
56 | REG_OFFSET: true
57 | OFF_WEIGHT: 1.
58 | REG_HP_OFFSET: true
59 | HM_HP_WEIGHT: 1.
60 |
61 | DATASET:
62 | DATASET: 'coco_hp'
63 | TRAIN_SET: 'train'
64 | TEST_SET: 'valid'
65 | TRAIN_IMAGE_DIR: 'images/train2017'
66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
67 | VAL_IMAGE_DIR: 'images/val2017'
68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
69 |
70 | # training data augmentation
71 | MEAN: [0.408, 0.447, 0.470]
72 | STD: [0.289, 0.274, 0.278]
73 | SHIFT: 0.1
74 | SCALE: 0.4
75 | ROTATE: 0.
76 | # for pose
77 | AUG_ROT: 0.
78 | FLIP: 0.5
79 | NO_COLOR_AUG: false
80 |
81 | ROT_FACTOR: 30
82 | SCALE_MIN: 0.5
83 | SCALE_MAX: 1.1
84 | IMAGE_SIZE: 512
85 | RANDOM_CROP: true
86 |
87 | TRAIN:
88 | DISTRIBUTE: true
89 | OPTIMIZER: 'adam'
90 | LOCAL_RANK: 0
91 | HIDE_DATA_TIME: false
92 | SAVE_ALL_MODEL: false
93 | RESUME: false
94 | LR_FACTOR: 0.1
95 | LR_STEP: [270, 300]
96 | EPOCHS: 320
97 | NUM_ITERS: -1
98 | LR: 2.8125e-3
99 | BATCH_SIZE: 72
100 | MASTER_BATCH_SIZE: 18
101 |
102 | MOMENTUM: 0.9
103 | WD: 0.0001
104 | NESTEROV: false
105 | GAMMA1: 0.99
106 | GAMMA2: 0.0
107 |
108 | # 'apply and reset gradients every n batches'
109 | STRIDE_APPLY: 1
110 | CHECKPOINT: ''
111 | SHUFFLE: true
112 | VAL_INTERVALS: 1
113 | TRAINVAL: false
114 |
115 | TEST:
116 | # Test Model Epoch
117 | MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/dla34_best.pth'
118 | TASK: 'multi_pose'
119 | FLIP_TEST: true
120 |
121 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
122 | MODEL_FILE: ''
123 | TEST_SCALES: [1]
124 | IMAGE_THRE: 0.1
125 | TOPK: 100
126 | NMS: true
127 | NMS_THRE: 0.5
128 | NOT_PREFETCH_TEST: false
129 | FIX_RES: false
130 |
131 | SOFT_NMS: false
132 | OKS_THRE: 0.5
133 | VIS_THRESH: 0.3
134 | KEYPOINT_THRESH: 0.2
135 | NUM_MIN_KPT: 4
136 | THRESH_HUMAN: 0.4
137 |
138 | EVAL_ORACLE_HM: false
139 | EVAL_ORACLE_WH: false
140 | EVAL_ORACLE_OFFSET: false
141 | EVAL_ORACLE_KPS: false
142 | EVAL_ORACLE_HMHP: false
143 | EVAL_ORACLE_HP_OFFSET: false
144 | EVAL_ORACLE_DEP: false
145 |
--------------------------------------------------------------------------------
/experiments/efficientdet_512x512.yaml:
--------------------------------------------------------------------------------
1 | SAMPLE_METHOD: 'coco_hp'
2 | DATA_DIR: '/home/tensorboy/data'
3 | EXP_ID: 'coco_pose_efficientdet'
4 | DEBUG: 0
5 | DEBUG_THEME: 'white'
6 | SEED: 317
7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/efficientdet'
8 | LOG_DIR: ''
9 | EXPERIMENT_NAME: ''
10 | GPUS: [0, 1, 2, 3]
11 | WORKERS: 8
12 | PRINT_FREQ: 0
13 | PIN_MEMORY: true
14 | RANK: 0
15 | SAVE_RESULTS: true
16 | TASK : 'multi_pose'
17 |
18 | CUDNN:
19 | BENCHMARK: true
20 |
21 | MODEL:
22 | INIT_WEIGHTS: false
23 | PRETRAINED: ''
24 | CENTER_THRESH: 0.1
25 | NUM_CLASSES: 1
26 | NAME: 'efficientdet'
27 | HEADS_NAME: 'keypoint'
28 | HEADS_NUM: [1, 2, 34, 2, 17, 2]
29 | HEAD_CONV: 64
30 | INTERMEDIATE_CHANNEL: 172
31 | DOWN_RATIO: 4
32 | NUM_STACKS: 1
33 | INPUT_RES: 512
34 | OUTPUT_RES: 128
35 | INPUT_H: 512
36 | INPUT_W: 512
37 | PAD: 31
38 | NUM_KEYPOINTS: 17
39 | TAG_PER_JOINT: true
40 | TARGET_TYPE: 'gaussian'
41 | SIGMA: 2
42 |
43 | LOSS:
44 | METRIC: 'loss'
45 | MSE_LOSS: false
46 | REG_LOSS: 'l1'
47 | USE_OHKM: false
48 | TOPK: 8
49 | USE_TARGET_WEIGHT: true
50 | USE_DIFFERENT_JOINTS_WEIGHT: false
51 | HP_WEIGHT: 1.
52 | HM_HP_WEIGHT: 1.
53 | DENSE_HP: false
54 | HM_HP: true
55 | REG_BBOX: true
56 | WH_WEIGHT: 0.1
57 | REG_OFFSET: true
58 | OFF_WEIGHT: 1.
59 | REG_HP_OFFSET: true
60 |
61 | DATASET:
62 | DATASET: 'coco_hp'
63 | TRAIN_SET: 'train'
64 | TEST_SET: 'valid'
65 | TRAIN_IMAGE_DIR: 'images/train2017'
66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
67 | VAL_IMAGE_DIR: 'images/val2017'
68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
69 |
70 | # training data augmentation
71 | MEAN: [0.408, 0.447, 0.470]
72 | STD: [0.289, 0.274, 0.278]
73 | SHIFT: 0.1
74 | SCALE: 0.4
75 | ROTATE: 0.
76 | # for pose
77 | AUG_ROT: 0.
78 | FLIP: 0.5
79 | NO_COLOR_AUG: false
80 |
81 | ROT_FACTOR: 30
82 | SCALE_MIN: 0.5
83 | SCALE_MAX: 1.1
84 | IMAGE_SIZE: 512
85 | RANDOM_CROP: true
86 |
87 | TRAIN:
88 | DISTRIBUTE: true
89 | OPTIMIZER: 'adam'
90 | LOCAL_RANK: 0
91 | HIDE_DATA_TIME: false
92 | SAVE_ALL_MODEL: false
93 | RESUME: false
94 | LR_FACTOR: 0.1
95 | LR_STEP: [270, 300]
96 | EPOCHS: 320
97 | NUM_ITERS: -1
98 | LR: 2.1875e-4
99 | BATCH_SIZE: 56
100 | MASTER_BATCH_SIZE: 14
101 |
102 | MOMENTUM: 0.9
103 | WD: 0.0001
104 | NESTEROV: false
105 | GAMMA1: 0.99
106 | GAMMA2: 0.0
107 |
108 | # 'apply and reset gradients every n batches'
109 | STRIDE_APPLY: 1
110 | CHECKPOINT: ''
111 | SHUFFLE: true
112 | VAL_INTERVALS: 1
113 | TRAINVAL: false
114 |
115 | TEST:
116 | # Test Model Epoch
117 | MODEL_PATH: '/home/tensorboy/data/centerpose/efficientdet/model_best.pth'
118 | TASK: 'multi_pose'
119 | FLIP_TEST: false
120 |
121 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
122 | MODEL_FILE: ''
123 | TEST_SCALES: [1]
124 | IMAGE_THRE: 0.1
125 | TOPK: 32
126 | NMS: false
127 | NMS_THRE: 0.5
128 | NOT_PREFETCH_TEST: false
129 | FIX_RES: true
130 |
131 | SOFT_NMS: false
132 | OKS_THRE: 0.5
133 | VIS_THRESH: 0.3
134 | KEYPOINT_THRESH: 0.2
135 | NUM_MIN_KPT: 4
136 | THRESH_HUMAN: 0.5
137 |
138 | EVAL_ORACLE_HM: false
139 | EVAL_ORACLE_WH: false
140 | EVAL_ORACLE_OFFSET: false
141 | EVAL_ORACLE_KPS: false
142 | EVAL_ORACLE_HMHP: false
143 | EVAL_ORACLE_HP_OFFSET: false
144 | EVAL_ORACLE_DEP: false
145 |
--------------------------------------------------------------------------------
/experiments/ghost_net.yaml:
--------------------------------------------------------------------------------
1 | SAMPLE_METHOD: 'coco_hp'
2 | DATA_DIR: '/data'
3 | EXP_ID: 'ghostnet'
4 | DEBUG: 0
5 | DEBUG_THEME: 'white'
6 | SEED: 317
7 | OUTPUT_DIR: '/data/centerpose/ghostnet'
8 | LOG_DIR: ''
9 | EXPERIMENT_NAME: ''
10 | GPUS: [0, 1, 2, 3]
11 | WORKERS: 8
12 | PRINT_FREQ: 0
13 | PIN_MEMORY: true
14 | RANK: 0
15 | SAVE_RESULTS: true
16 |
17 | CUDNN:
18 | BENCHMARK: true
19 |
20 | MODEL:
21 | INIT_WEIGHTS: false
22 | PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth'
23 | CENTER_THRESH: 0.1
24 | NUM_CLASSES: 1
25 | NAME: 'ghostnet'
26 | HEADS_NAME: 'keypoint'
27 | HEADS_NUM: [1, 2, 34, 2, 17, 2]
28 | HEAD_CONV: 256
29 | INTERMEDIATE_CHANNEL: 160
30 | DOWN_RATIO: 4
31 | NUM_STACKS: 1
32 | INPUT_RES: 512
33 | OUTPUT_RES: 128
34 | INPUT_H: 512
35 | INPUT_W: 512
36 | PAD: 31
37 | NUM_KEYPOINTS: 17
38 | TAG_PER_JOINT: true
39 | TARGET_TYPE: 'gaussian'
40 | SIGMA: 2
41 |
42 | LOSS:
43 | METRIC: 'loss'
44 | MSE_LOSS: false
45 | REG_LOSS: 'l1'
46 | USE_OHKM: false
47 | TOPK: 8
48 | USE_TARGET_WEIGHT: true
49 | USE_DIFFERENT_JOINTS_WEIGHT: false
50 | HP_WEIGHT: 1.
51 | HM_HP_WEIGHT: 1.
52 | DENSE_HP: false
53 | HM_HP: true
54 | REG_BBOX: true
55 | WH_WEIGHT: 0.1
56 | REG_OFFSET: true
57 | OFF_WEIGHT: 1.
58 | REG_HP_OFFSET: true
59 | HM_HP_WEIGHT: 1.
60 |
61 | DATASET:
62 | DATASET: 'coco_hp'
63 | TRAIN_SET: 'train'
64 | TEST_SET: 'valid'
65 | TRAIN_IMAGE_DIR: 'images/train2017'
66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
67 | VAL_IMAGE_DIR: 'images/val2017'
68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
69 |
70 | # training data augmentation
71 | MEAN: [0.408, 0.447, 0.470]
72 | STD: [0.289, 0.274, 0.278]
73 | SHIFT: 0.1
74 | SCALE: 0.4
75 | ROTATE: 0.
76 | # for pose
77 | AUG_ROT: 0.
78 | FLIP: 0.5
79 | NO_COLOR_AUG: false
80 |
81 | ROT_FACTOR: 30
82 | SCALE_MIN: 0.5
83 | SCALE_MAX: 1.1
84 | IMAGE_SIZE: 512
85 | RANDOM_CROP: true
86 |
87 | TRAIN:
88 | OPTIMIZER: 'adam'
89 | DISTRIBUTE: true
90 | LOCAL_RANK: 0
91 | HIDE_DATA_TIME: false
92 | SAVE_ALL_MODEL: false
93 | RESUME: false
94 | LR_FACTOR: 0.1
95 | LR_STEP: [270, 300]
96 | EPOCHS: 320
97 | NUM_ITERS: -1
98 | LR: 2.5e-4
99 | BATCH_SIZE: 64
100 | MASTER_BATCH_SIZE: 16
101 |
102 | MOMENTUM: 0.9
103 | WD: 0.0001
104 | NESTEROV: false
105 | GAMMA1: 0.99
106 | GAMMA2: 0.0
107 |
108 | # 'apply and reset gradients every n batches'
109 | STRIDE_APPLY: 1
110 |
111 | CHECKPOINT: ''
112 | SHUFFLE: true
113 | VAL_INTERVALS: 1
114 | TRAINVAL: false
115 |
116 | TEST:
117 | # Test Model Epoch
118 | MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/mobilenetV3_1x.pth'
119 | TASK: 'multi_pose'
120 | FLIP_TEST: true
121 |
122 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
123 | MODEL_FILE: ''
124 | TEST_SCALES: [1]
125 | IMAGE_THRE: 0.1
126 | TOPK: 100
127 | NMS: false
128 | NMS_THRE: 0.5
129 | NOT_PREFETCH_TEST: false
130 | FIX_RES: false
131 | KEEP_RES: false
132 |
133 | SOFT_NMS: false
134 | OKS_THRE: 0.5
135 | VIS_THRESH: 0.3
136 | KEYPOINT_THRESH: 0.2
137 | NUM_MIN_KPT: 4
138 | THRESH_HUMAN: 0.4
139 |
140 | EVAL_ORACLE_HM: false
141 | EVAL_ORACLE_WH: false
142 | EVAL_ORACLE_OFFSET: false
143 | EVAL_ORACLE_KPS: false
144 | EVAL_ORACLE_HMHP: false
145 | EVAL_ORACLE_HP_OFFSET: false
146 | EVAL_ORACLE_DEP: false
147 |
--------------------------------------------------------------------------------
/experiments/hardnet_512x512.yaml:
--------------------------------------------------------------------------------
1 | SAMPLE_METHOD: 'coco_hp'
2 | DATA_DIR: '/data'
3 | EXP_ID: 'hardnet'
4 | DEBUG: 0
5 | DEBUG_THEME: 'white'
6 | SEED: 317
7 | OUTPUT_DIR: '/data/centerpose/hardnet'
8 | LOG_DIR: ''
9 | EXPERIMENT_NAME: ''
10 | GPUS: [0, 1, 2, 3]
11 | WORKERS: 8
12 | PRINT_FREQ: 0
13 | PIN_MEMORY: true
14 | RANK: 0
15 | SAVE_RESULTS: true
16 |
17 | CUDNN:
18 | BENCHMARK: true
19 |
20 | MODEL:
21 | INIT_WEIGHTS: true
22 | PRETRAINED: '/data/pretrained_models/imagenet/hardnet_petite_base.pth'
23 | CENTER_THRESH: 0.1
24 | INTERMEDIATE_CHANNEL: 48
25 | NUM_CLASSES: 1
26 | NAME: 'hardnet'
27 | HEADS_NAME: 'keypoint'
28 | HEADS_NUM: [1, 2, 34, 2, 17, 2]
29 | HEAD_CONV: 256
30 | DOWN_RATIO: 4
31 | NUM_STACKS: 1
32 | INPUT_RES: 512
33 | OUTPUT_RES: 128
34 | INPUT_H: 512
35 | INPUT_W: 512
36 | PAD: 31
37 | NUM_KEYPOINTS: 17
38 | TAG_PER_JOINT: true
39 | TARGET_TYPE: 'gaussian'
40 | SIGMA: 2
41 |
42 | LOSS:
43 | METRIC: 'loss'
44 | MSE_LOSS: false
45 | REG_LOSS: 'l1'
46 | USE_OHKM: false
47 | TOPK: 8
48 | USE_TARGET_WEIGHT: true
49 | USE_DIFFERENT_JOINTS_WEIGHT: false
50 | HP_WEIGHT: 1.
51 | HM_HP_WEIGHT: 1.
52 | DENSE_HP: false
53 | HM_HP: true
54 | REG_BBOX: true
55 | WH_WEIGHT: 0.1
56 | REG_OFFSET: true
57 | OFF_WEIGHT: 1.
58 | REG_HP_OFFSET: true
59 | HM_HP_WEIGHT: 1.
60 |
61 | DATASET:
62 | DATASET: 'coco_hp'
63 | TRAIN_SET: 'train'
64 | TEST_SET: 'valid'
65 | TRAIN_IMAGE_DIR: 'images/train2017'
66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
67 | VAL_IMAGE_DIR: 'images/val2017'
68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
69 |
70 | # training data augmentation
71 | MEAN: [0.408, 0.447, 0.470]
72 | STD: [0.289, 0.274, 0.278]
73 | SHIFT: 0.1
74 | SCALE: 0.4
75 | ROTATE: 0.
76 | # for pose
77 | AUG_ROT: 0.
78 | FLIP: 0.5
79 | NO_COLOR_AUG: false
80 |
81 | ROT_FACTOR: 30
82 | SCALE_MIN: 0.5
83 | SCALE_MAX: 1.1
84 | IMAGE_SIZE: 512
85 | RANDOM_CROP: true
86 |
87 | TRAIN:
88 | OPTIMIZER: 'adam'
89 | DISTRIBUTE: true
90 | LOCAL_RANK: 0
91 | HIDE_DATA_TIME: false
92 | SAVE_ALL_MODEL: false
93 | RESUME: false
94 | LR_FACTOR: 0.1
95 | LR_STEP: [270, 300]
96 | EPOCHS: 320
97 | NUM_ITERS: -1
98 | LR: 5.e-4
99 | BATCH_SIZE: 128
100 | MASTER_BATCH_SIZE: 32
101 |
102 | MOMENTUM: 0.9
103 | WD: 0.0001
104 | NESTEROV: false
105 | GAMMA1: 0.99
106 | GAMMA2: 0.0
107 |
108 | # 'apply and reset gradients every n batches'
109 | STRIDE_APPLY: 1
110 |
111 | CHECKPOINT: ''
112 | SHUFFLE: true
113 | VAL_INTERVALS: 1
114 | TRAINVAL: false
115 |
116 | TEST:
117 | # Test Model Epoch
118 | MODEL_PATH: '/data/centerpose/hardnet/model_best.pth'
119 | TASK: 'multi_pose'
120 | FLIP_TEST: true
121 |
122 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
123 | MODEL_FILE: ''
124 | TEST_SCALES: [1]
125 | IMAGE_THRE: 0.1
126 | TOPK: 100
127 | NMS: false
128 | NMS_THRE: 0.5
129 | NOT_PREFETCH_TEST: false
130 | FIX_RES: false
131 | KEEP_RES: false
132 |
133 | SOFT_NMS: false
134 | OKS_THRE: 0.5
135 | VIS_THRESH: 0.3
136 | KEYPOINT_THRESH: 0.2
137 | NUM_MIN_KPT: 4
138 | THRESH_HUMAN: 0.4
139 |
140 | EVAL_ORACLE_HM: false
141 | EVAL_ORACLE_WH: false
142 | EVAL_ORACLE_OFFSET: false
143 | EVAL_ORACLE_KPS: false
144 | EVAL_ORACLE_HMHP: false
145 | EVAL_ORACLE_HP_OFFSET: false
146 | EVAL_ORACLE_DEP: false
147 |
--------------------------------------------------------------------------------
/experiments/hrnet_w32_512.yaml:
--------------------------------------------------------------------------------
1 | SAMPLE_METHOD: 'coco_hp'
2 | DATA_DIR: '/home/tensorboy/data'
3 | EXP_ID: 'coco_pose_hrnet'
4 | DEBUG: 0
5 | DEBUG_THEME: 'white'
6 | SEED: 317
7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/hrnet'
8 | LOG_DIR: ''
9 | EXPERIMENT_NAME: ''
10 | GPUS: [0, 1, 2, 3]
11 | WORKERS: 8
12 | PIN_MEMORY: true
13 | RANK: 0
14 | PRINT_FREQ: 100
15 | CUDNN:
16 | BENCHMARK: true
17 | DETERMINISTIC: false
18 | ENABLED: true
19 | DATASET:
20 | DATASET: 'coco_hp'
21 | TRAIN_SET: 'train'
22 | TEST_SET: 'valid'
23 | TRAIN_IMAGE_DIR: 'images/train2017'
24 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
25 | VAL_IMAGE_DIR: 'images/val2017'
26 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
27 |
28 | # training data augmentation
29 | MEAN: [0.408, 0.447, 0.470]
30 | STD: [0.289, 0.274, 0.278]
31 | SHIFT: 0.1
32 | SCALE: 0.4
33 | ROTATE: 0.
34 | # for pose
35 | AUG_ROT: 0.
36 | FLIP: 0.5
37 | NO_COLOR_AUG: false
38 |
39 | ROT_FACTOR: 30
40 | SCALE_MIN: 0.5
41 | SCALE_MAX: 1.1
42 | IMAGE_SIZE: 512
43 | RANDOM_CROP: true
44 |
45 | LOSS:
46 | METRIC: 'loss'
47 | MSE_LOSS: false
48 | REG_LOSS: 'l1'
49 | USE_OHKM: false
50 | TOPK: 8
51 | USE_TARGET_WEIGHT: true
52 | USE_DIFFERENT_JOINTS_WEIGHT: false
53 | HP_WEIGHT: 1.
54 | HM_HP_WEIGHT: 1.
55 | DENSE_HP: false
56 | HM_HP: true
57 | REG_BBOX: true
58 | WH_WEIGHT: 0.1
59 | REG_OFFSET: true
60 | OFF_WEIGHT: 1.
61 | REG_HP_OFFSET: true
62 | HM_HP_WEIGHT: 1.
63 | MODEL:
64 | HEADS_NAME: 'keypoint'
65 | INTERMEDIATE_CHANNEL: 32
66 | CENTER_THRESH: 0.1
67 | NUM_CLASSES: 1
68 | NAME: 'hrnet'
69 | DOWN_RATIO: 4
70 | NUM_STACKS: 1
71 | INPUT_RES: 512
72 | OUTPUT_RES: 128
73 | INPUT_H: 512
74 | INPUT_W: 512
75 | PAD: 31
76 | NUM_KEYPOINTS: 17
77 | SIGMA: 2
78 | HEAD_CONV: 64
79 | EXTRA:
80 | FINAL_CONV_KERNEL: 1
81 | PRETRAINED_LAYERS: ['*']
82 | STEM_INPLANES: 64
83 | STAGE2:
84 | NUM_MODULES: 1
85 | NUM_BRANCHES: 2
86 | BLOCK: BASIC
87 | NUM_BLOCKS:
88 | - 4
89 | - 4
90 | NUM_CHANNELS:
91 | - 32
92 | - 64
93 | FUSE_METHOD: SUM
94 | STAGE3:
95 | NUM_MODULES: 4
96 | NUM_BRANCHES: 3
97 | BLOCK: BASIC
98 | NUM_BLOCKS:
99 | - 4
100 | - 4
101 | - 4
102 | NUM_CHANNELS:
103 | - 32
104 | - 64
105 | - 128
106 | FUSE_METHOD: SUM
107 | STAGE4:
108 | NUM_MODULES: 3
109 | NUM_BRANCHES: 4
110 | BLOCK: BASIC
111 | NUM_BLOCKS:
112 | - 4
113 | - 4
114 | - 4
115 | - 4
116 | NUM_CHANNELS:
117 | - 32
118 | - 64
119 | - 128
120 | - 256
121 | FUSE_METHOD: SUM
122 | DECONV:
123 | NUM_DECONVS: 0
124 | NUM_CHANNELS:
125 | - 32
126 | KERNEL_SIZE:
127 | - 4
128 | NUM_BASIC_BLOCKS: 4
129 | CAT_OUTPUT:
130 | - True
131 | INIT_WEIGHTS: true
132 | PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth'
133 | TAG_PER_JOINT: true
134 | TEST:
135 | # Test Model Epoch
136 | MODEL_PATH: '/home/tensorboy/data/centerpose/hrnet/model_best.pth'
137 | TASK: 'multi_pose'
138 | FLIP_TEST: true
139 | FIX_RES: false
140 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
141 | MODEL_FILE: ''
142 | TEST_SCALES: [1,2]
143 | IMAGE_THRE: 0.1
144 | TOPK: 100
145 | NMS: false
146 | NMS_THRE: 0.5
147 | NOT_PREFETCH_TEST: false
148 |
149 | KEEP_RES: false
150 |
151 | SOFT_NMS: false
152 | OKS_THRE: 0.5
153 | VIS_THRESH: 0.3
154 | KEYPOINT_THRESH: 0.2
155 | NUM_MIN_KPT: 4
156 | THRESH_HUMAN: 0.4
157 |
158 | EVAL_ORACLE_HM: false
159 | EVAL_ORACLE_WH: false
160 | EVAL_ORACLE_OFFSET: false
161 | EVAL_ORACLE_KPS: false
162 | EVAL_ORACLE_HMHP: false
163 | EVAL_ORACLE_HP_OFFSET: false
164 | EVAL_ORACLE_DEP: false
165 | TRAIN:
166 | DISTRIBUTE: true
167 | OPTIMIZER: 'sgd'
168 | LOCAL_RANK: 0
169 | HIDE_DATA_TIME: false
170 | SAVE_ALL_MODEL: false
171 | RESUME: false
172 | LR_FACTOR: 0.1
173 | LR_STEP: [270, 300]
174 | EPOCHS: 320
175 | NUM_ITERS: -1
176 | LR: 1.71875e-4
177 | BATCH_SIZE: 44
178 | MASTER_BATCH_SIZE: 11
179 |
180 | MOMENTUM: 0.9
181 | WD: 0.0001
182 | NESTEROV: false
183 | GAMMA1: 0.99
184 | GAMMA2: 0.0
185 |
186 | # 'apply and reset gradients every n batches'
187 | STRIDE_APPLY: 1
188 |
189 | CHECKPOINT: ''
190 | SHUFFLE: true
191 | VAL_INTERVALS: 1
192 | TRAINVAL: false
193 |
--------------------------------------------------------------------------------
/experiments/hrnet_w48_512.yaml:
--------------------------------------------------------------------------------
1 | SAMPLE_METHOD: 'coco_hp'
2 | DATA_DIR: '/home/tensorboy/data'
3 | EXP_ID: 'coco_pose_hrnet'
4 | DEBUG: 0
5 | DEBUG_THEME: 'white'
6 | SEED: 317
7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/hrnet'
8 | LOG_DIR: ''
9 | EXPERIMENT_NAME: ''
10 | GPUS: [0, 1, 2, 3]
11 | WORKERS: 8
12 | PIN_MEMORY: true
13 | RANK: 0
14 | PRINT_FREQ: 100
15 | CUDNN:
16 | BENCHMARK: true
17 | DETERMINISTIC: false
18 | ENABLED: true
19 | DATASET:
20 | DATASET: 'coco_hp'
21 | TRAIN_SET: 'train'
22 | TEST_SET: 'valid'
23 | TRAIN_IMAGE_DIR: 'images/train2017'
24 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
25 | VAL_IMAGE_DIR: 'images/val2017'
26 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
27 |
28 | # training data augmentation
29 | MEAN: [0.408, 0.447, 0.470]
30 | STD: [0.289, 0.274, 0.278]
31 | SHIFT: 0.1
32 | SCALE: 0.4
33 | ROTATE: 0.
34 | # for pose
35 | AUG_ROT: 0.
36 | FLIP: 0.5
37 | NO_COLOR_AUG: false
38 |
39 | ROT_FACTOR: 30
40 | SCALE_MIN: 0.5
41 | SCALE_MAX: 1.1
42 | IMAGE_SIZE: 512
43 | RANDOM_CROP: true
44 |
45 | LOSS:
46 | METRIC: 'loss'
47 | MSE_LOSS: false
48 | REG_LOSS: 'l1'
49 | USE_OHKM: false
50 | TOPK: 8
51 | USE_TARGET_WEIGHT: true
52 | USE_DIFFERENT_JOINTS_WEIGHT: false
53 | HP_WEIGHT: 1.
54 | HM_HP_WEIGHT: 1.
55 | DENSE_HP: false
56 | HM_HP: true
57 | REG_BBOX: true
58 | WH_WEIGHT: 0.1
59 | REG_OFFSET: true
60 | OFF_WEIGHT: 1.
61 | REG_HP_OFFSET: true
62 | HM_HP_WEIGHT: 1.
63 | MODEL:
64 | HEADS_NAME: 'keypoint'
65 | INTERMEDIATE_CHANNEL: 48
66 | CENTER_THRESH: 0.1
67 | NUM_CLASSES: 1
68 | NAME: 'hrnet'
69 | DOWN_RATIO: 4
70 | NUM_STACKS: 1
71 | INPUT_RES: 512
72 | OUTPUT_RES: 128
73 | INPUT_H: 512
74 | INPUT_W: 512
75 | PAD: 31
76 | NUM_KEYPOINTS: 17
77 | SIGMA: 2
78 | HEAD_CONV: 64
79 | EXTRA:
80 | FINAL_CONV_KERNEL: 1
81 | PRETRAINED_LAYERS: ['*']
82 | STEM_INPLANES: 64
83 | STAGE2:
84 | NUM_MODULES: 1
85 | NUM_BRANCHES: 2
86 | BLOCK: BASIC
87 | NUM_BLOCKS:
88 | - 4
89 | - 4
90 | NUM_CHANNELS:
91 | - 48
92 | - 96
93 | FUSE_METHOD: SUM
94 | STAGE3:
95 | NUM_MODULES: 4
96 | NUM_BRANCHES: 3
97 | BLOCK: BASIC
98 | NUM_BLOCKS:
99 | - 4
100 | - 4
101 | - 4
102 | NUM_CHANNELS:
103 | - 48
104 | - 96
105 | - 192
106 | FUSE_METHOD: SUM
107 | STAGE4:
108 | NUM_MODULES: 3
109 | NUM_BRANCHES: 4
110 | BLOCK: BASIC
111 | NUM_BLOCKS:
112 | - 4
113 | - 4
114 | - 4
115 | - 4
116 | NUM_CHANNELS:
117 | - 48
118 | - 96
119 | - 192
120 | - 384
121 | FUSE_METHOD: SUM
122 | DECONV:
123 | NUM_DECONVS: 1
124 | NUM_CHANNELS:
125 | - 48
126 | KERNEL_SIZE:
127 | - 4
128 | NUM_BASIC_BLOCKS: 4
129 | CAT_OUTPUT:
130 | - True
131 | INIT_WEIGHTS: true
132 | PRETRAINED: '/home/tensorboy/data/pretrained_models/imagenet/hrnet_w48-8ef0771d.pth'
133 | TAG_PER_JOINT: true
134 | TEST:
135 | # Test Model Epoch
136 | MODEL_PATH: '/home/tensorboy/data/centerpose/hrnet/model_best.pth'
137 | TASK: 'multi_pose'
138 | FLIP_TEST: true
139 | FIX_RES: false
140 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
141 | MODEL_FILE: ''
142 | TEST_SCALES: [1,2]
143 | IMAGE_THRE: 0.1
144 | TOPK: 100
145 | NMS: false
146 | NMS_THRE: 0.5
147 | NOT_PREFETCH_TEST: false
148 |
149 | KEEP_RES: false
150 |
151 | SOFT_NMS: false
152 | OKS_THRE: 0.5
153 | VIS_THRESH: 0.3
154 | KEYPOINT_THRESH: 0.2
155 | NUM_MIN_KPT: 4
156 | THRESH_HUMAN: 0.4
157 |
158 | EVAL_ORACLE_HM: false
159 | EVAL_ORACLE_WH: false
160 | EVAL_ORACLE_OFFSET: false
161 | EVAL_ORACLE_KPS: false
162 | EVAL_ORACLE_HMHP: false
163 | EVAL_ORACLE_HP_OFFSET: false
164 | EVAL_ORACLE_DEP: false
165 | TRAIN:
166 | DISTRIBUTE: true
167 | OPTIMIZER: 'adam'
168 | LOCAL_RANK: 0
169 | HIDE_DATA_TIME: false
170 | SAVE_ALL_MODEL: false
171 | RESUME: false
172 | LR_FACTOR: 0.1
173 | LR_STEP: [270, 300]
174 | EPOCHS: 320
175 | NUM_ITERS: -1
176 | LR: 1.25e-4
177 | BATCH_SIZE: 32
178 | MASTER_BATCH_SIZE: 8
179 |
180 | MOMENTUM: 0.9
181 | WD: 0.0001
182 | NESTEROV: false
183 | GAMMA1: 0.99
184 | GAMMA2: 0.0
185 |
186 | # 'apply and reset gradients every n batches'
187 | STRIDE_APPLY: 1
188 |
189 | CHECKPOINT: ''
190 | SHUFFLE: true
191 | VAL_INTERVALS: 1
192 | TRAINVAL: false
193 |
--------------------------------------------------------------------------------
/experiments/mobilenetv2_512x512.yaml:
--------------------------------------------------------------------------------
1 | SAMPLE_METHOD: 'coco_hp'
2 | DATA_DIR: '/data'
3 | EXP_ID: 'coco_pose_mobilenetv2'
4 | DEBUG: 0
5 | DEBUG_THEME: 'white'
6 | SEED: 317
7 | OUTPUT_DIR: '/data/centerpose/mobilenetv2'
8 | LOG_DIR: ''
9 | EXPERIMENT_NAME: ''
10 | GPUS: [0, 1, 2, 3]
11 | WORKERS: 8
12 | PRINT_FREQ: 0
13 | PIN_MEMORY: true
14 | RANK: 0
15 | SAVE_RESULTS: true
16 |
17 | CUDNN:
18 | BENCHMARK: true
19 |
20 | MODEL:
21 | INIT_WEIGHTS: false
22 | PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth'
23 | CENTER_THRESH: 0.1
24 | NUM_CLASSES: 1
25 | NAME: 'mobilenetv2'
26 | HEADS_NAME: 'keypoint'
27 | HEADS_NUM: [1, 2, 34, 2, 17, 2]
28 | HEAD_CONV: 256
29 | INTERMEDIATE_CHANNEL: 24
30 | DOWN_RATIO: 4
31 | NUM_STACKS: 1
32 | INPUT_RES: 512
33 | OUTPUT_RES: 128
34 | INPUT_H: 512
35 | INPUT_W: 512
36 | PAD: 31
37 | NUM_KEYPOINTS: 17
38 | TAG_PER_JOINT: true
39 | TARGET_TYPE: 'gaussian'
40 | SIGMA: 2
41 |
42 | LOSS:
43 | METRIC: 'loss'
44 | MSE_LOSS: false
45 | REG_LOSS: 'l1'
46 | USE_OHKM: false
47 | TOPK: 8
48 | USE_TARGET_WEIGHT: true
49 | USE_DIFFERENT_JOINTS_WEIGHT: false
50 | HP_WEIGHT: 1.
51 | HM_HP_WEIGHT: 1.
52 | DENSE_HP: false
53 | HM_HP: true
54 | REG_BBOX: true
55 | WH_WEIGHT: 0.1
56 | REG_OFFSET: true
57 | OFF_WEIGHT: 1.
58 | REG_HP_OFFSET: true
59 | HM_HP_WEIGHT: 1.
60 |
61 | DATASET:
62 | DATASET: 'coco_hp'
63 | TRAIN_SET: 'train'
64 | TEST_SET: 'valid'
65 | TRAIN_IMAGE_DIR: 'images/train2017'
66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
67 | VAL_IMAGE_DIR: 'images/val2017'
68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
69 |
70 | # training data augmentation
71 | MEAN: [0.408, 0.447, 0.470]
72 | STD: [0.289, 0.274, 0.278]
73 | SHIFT: 0.1
74 | SCALE: 0.4
75 | ROTATE: 0.
76 | # for pose
77 | AUG_ROT: 0.
78 | FLIP: 0.5
79 | NO_COLOR_AUG: false
80 |
81 | ROT_FACTOR: 30
82 | SCALE_MIN: 0.5
83 | SCALE_MAX: 1.1
84 | IMAGE_SIZE: 512
85 | RANDOM_CROP: true
86 |
87 | TRAIN:
88 | OPTIMIZER: 'adam'
89 | DISTRIBUTE: true
90 | LOCAL_RANK: 0
91 | HIDE_DATA_TIME: false
92 | SAVE_ALL_MODEL: false
93 | RESUME: false
94 | LR_FACTOR: 0.1
95 | LR_STEP: [270, 300]
96 | EPOCHS: 320
97 | NUM_ITERS: -1
98 | LR: 5.e-4
99 | BATCH_SIZE: 128
100 | MASTER_BATCH_SIZE: 32
101 |
102 | MOMENTUM: 0.9
103 | WD: 0.0001
104 | NESTEROV: false
105 | GAMMA1: 0.99
106 | GAMMA2: 0.0
107 |
108 | # 'apply and reset gradients every n batches'
109 | STRIDE_APPLY: 1
110 |
111 | CHECKPOINT: ''
112 | SHUFFLE: true
113 | VAL_INTERVALS: 1
114 | TRAINVAL: false
115 |
116 | TEST:
117 | # Test Model Epoch
118 | MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/mobilenetV3_1x.pth'
119 | TASK: 'multi_pose'
120 | FLIP_TEST: true
121 |
122 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
123 | MODEL_FILE: ''
124 | TEST_SCALES: [1]
125 | IMAGE_THRE: 0.1
126 | TOPK: 100
127 | NMS: false
128 | NMS_THRE: 0.5
129 | NOT_PREFETCH_TEST: false
130 | FIX_RES: false
131 | KEEP_RES: false
132 |
133 | SOFT_NMS: false
134 | OKS_THRE: 0.5
135 | VIS_THRESH: 0.3
136 | KEYPOINT_THRESH: 0.2
137 | NUM_MIN_KPT: 4
138 | THRESH_HUMAN: 0.4
139 |
140 | EVAL_ORACLE_HM: false
141 | EVAL_ORACLE_WH: false
142 | EVAL_ORACLE_OFFSET: false
143 | EVAL_ORACLE_KPS: false
144 | EVAL_ORACLE_HMHP: false
145 | EVAL_ORACLE_HP_OFFSET: false
146 | EVAL_ORACLE_DEP: false
147 |
--------------------------------------------------------------------------------
/experiments/mobilenetv3_512x512.yaml:
--------------------------------------------------------------------------------
1 | SAMPLE_METHOD: 'coco_hp'
2 | DATA_DIR: '/home/tensorboy/data'
3 | EXP_ID: 'coco_pose_mobilenet'
4 | DEBUG: 0
5 | DEBUG_THEME: 'white'
6 | SEED: 317
7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/mobilenetv3'
8 | LOG_DIR: ''
9 | EXPERIMENT_NAME: ''
10 | GPUS: [0, 1, 2, 3]
11 | WORKERS: 8
12 | PRINT_FREQ: 0
13 | PIN_MEMORY: true
14 | RANK: 0
15 | SAVE_RESULTS: true
16 |
17 | CUDNN:
18 | BENCHMARK: true
19 |
20 | MODEL:
21 | INIT_WEIGHTS: false
22 | PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth'
23 | CENTER_THRESH: 0.1
24 | NUM_CLASSES: 1
25 | NAME: 'mobilenetv3'
26 | HEADS_NAME: 'keypoint'
27 | HEADS_NUM: [1, 2, 34, 2, 17, 2]
28 | HEAD_CONV: 256
29 | INTERMEDIATE_CHANNEL: 24
30 | DOWN_RATIO: 4
31 | NUM_STACKS: 1
32 | INPUT_RES: 512
33 | OUTPUT_RES: 128
34 | INPUT_H: 512
35 | INPUT_W: 512
36 | PAD: 31
37 | NUM_KEYPOINTS: 17
38 | TAG_PER_JOINT: true
39 | TARGET_TYPE: 'gaussian'
40 | SIGMA: 2
41 |
42 | LOSS:
43 | METRIC: 'loss'
44 | MSE_LOSS: false
45 | REG_LOSS: 'l1'
46 | USE_OHKM: false
47 | TOPK: 8
48 | USE_TARGET_WEIGHT: true
49 | USE_DIFFERENT_JOINTS_WEIGHT: false
50 | HP_WEIGHT: 1.
51 | HM_HP_WEIGHT: 1.
52 | DENSE_HP: false
53 | HM_HP: true
54 | REG_BBOX: true
55 | WH_WEIGHT: 0.1
56 | REG_OFFSET: true
57 | OFF_WEIGHT: 1.
58 | REG_HP_OFFSET: true
59 | HM_HP_WEIGHT: 1.
60 |
61 | DATASET:
62 | DATASET: 'coco_hp'
63 | TRAIN_SET: 'train'
64 | TEST_SET: 'valid'
65 | TRAIN_IMAGE_DIR: 'images/train2017'
66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
67 | VAL_IMAGE_DIR: 'images/val2017'
68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
69 |
70 | # training data augmentation
71 | MEAN: [0.408, 0.447, 0.470]
72 | STD: [0.289, 0.274, 0.278]
73 | SHIFT: 0.1
74 | SCALE: 0.4
75 | ROTATE: 0.
76 | # for pose
77 | AUG_ROT: 0.
78 | FLIP: 0.5
79 | NO_COLOR_AUG: false
80 |
81 | ROT_FACTOR: 30
82 | SCALE_MIN: 0.5
83 | SCALE_MAX: 1.1
84 | IMAGE_SIZE: 512
85 | RANDOM_CROP: true
86 |
87 | TRAIN:
88 | OPTIMIZER: 'adam'
89 | DISTRIBUTE: true
90 | LOCAL_RANK: 0
91 | HIDE_DATA_TIME: false
92 | SAVE_ALL_MODEL: false
93 | RESUME: false
94 | LR_FACTOR: 0.1
95 | LR_STEP: [270, 300]
96 | EPOCHS: 320
97 | NUM_ITERS: -1
98 | LR: 3.359375e-4
99 | BATCH_SIZE: 86
100 | MASTER_BATCH_SIZE: 20
101 |
102 | MOMENTUM: 0.9
103 | WD: 0.0001
104 | NESTEROV: false
105 | GAMMA1: 0.99
106 | GAMMA2: 0.0
107 |
108 | # 'apply and reset gradients every n batches'
109 | STRIDE_APPLY: 1
110 |
111 | CHECKPOINT: ''
112 | SHUFFLE: true
113 | VAL_INTERVALS: 1
114 | TRAINVAL: false
115 |
116 | TEST:
117 | # Test Model Epoch
118 | MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/mobilenetV3_1x.pth'
119 | TASK: 'multi_pose'
120 | FLIP_TEST: true
121 |
122 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
123 | MODEL_FILE: ''
124 | TEST_SCALES: [1]
125 | IMAGE_THRE: 0.1
126 | TOPK: 100
127 | NMS: false
128 | NMS_THRE: 0.5
129 | NOT_PREFETCH_TEST: false
130 | FIX_RES: false
131 | KEEP_RES: false
132 |
133 | SOFT_NMS: false
134 | OKS_THRE: 0.5
135 | VIS_THRESH: 0.3
136 | KEYPOINT_THRESH: 0.2
137 | NUM_MIN_KPT: 4
138 | THRESH_HUMAN: 0.4
139 |
140 | EVAL_ORACLE_HM: false
141 | EVAL_ORACLE_WH: false
142 | EVAL_ORACLE_OFFSET: false
143 | EVAL_ORACLE_KPS: false
144 | EVAL_ORACLE_HMHP: false
145 | EVAL_ORACLE_HP_OFFSET: false
146 | EVAL_ORACLE_DEP: false
147 |
--------------------------------------------------------------------------------
/experiments/res_50_512x512.yaml:
--------------------------------------------------------------------------------
1 | SAMPLE_METHOD: 'coco_hp'
2 | DATA_DIR: '/data'
3 | EXP_ID: 'coco_pose_res_50'
4 | DEBUG: 0
5 | DEBUG_THEME: 'white'
6 | SEED: 317
7 | OUTPUT_DIR: '/data/centerpose/res50_lre2'
8 | LOG_DIR: ''
9 | EXPERIMENT_NAME: ''
10 | GPUS: [0, 1, 2, 3]
11 | WORKERS: 8
12 | PRINT_FREQ: 0
13 | PIN_MEMORY: true
14 | RANK: 0
15 | SAVE_RESULTS: true
16 | TASK : 'multi_pose'
17 |
18 | CUDNN:
19 | BENCHMARK: true
20 |
21 | MODEL:
22 | INIT_WEIGHTS: false
23 | PRETRAINED: ''
24 | CENTER_THRESH: 0.1
25 | NUM_CLASSES: 1
26 | NAME: 'res_50'
27 | HEADS_NAME: 'keypoint'
28 | HEADS_NUM: [1, 2, 34, 2, 17, 2]
29 | HEAD_CONV: 64
30 | INTERMEDIATE_CHANNEL: 256
31 | DOWN_RATIO: 4
32 | NUM_STACKS: 1
33 | INPUT_RES: 512
34 | OUTPUT_RES: 128
35 | INPUT_H: 512
36 | INPUT_W: 512
37 | PAD: 31
38 | NUM_KEYPOINTS: 17
39 | TAG_PER_JOINT: true
40 | TARGET_TYPE: 'gaussian'
41 | SIGMA: 2
42 |
43 | LOSS:
44 | METRIC: 'loss'
45 | MSE_LOSS: false
46 | REG_LOSS: 'l1'
47 | USE_OHKM: false
48 | TOPK: 8
49 | USE_TARGET_WEIGHT: true
50 | USE_DIFFERENT_JOINTS_WEIGHT: false
51 | HP_WEIGHT: 1.
52 | HM_HP_WEIGHT: 1.
53 | DENSE_HP: false
54 | HM_HP: true
55 | REG_BBOX: true
56 | WH_WEIGHT: 0.1
57 | REG_OFFSET: true
58 | OFF_WEIGHT: 1.
59 | REG_HP_OFFSET: true
60 |
61 | DATASET:
62 | DATASET: 'coco_hp'
63 | TRAIN_SET: 'train'
64 | TEST_SET: 'valid'
65 | TRAIN_IMAGE_DIR: 'images/train2017'
66 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
67 | VAL_IMAGE_DIR: 'images/val2017'
68 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
69 |
70 | # training data augmentation
71 | MEAN: [0.408, 0.447, 0.470]
72 | STD: [0.289, 0.274, 0.278]
73 | SHIFT: 0.1
74 | SCALE: 0.4
75 | ROTATE: 0.
76 | # for pose
77 | AUG_ROT: 0.
78 | FLIP: 0.5
79 | NO_COLOR_AUG: false
80 |
81 | ROT_FACTOR: 30
82 | SCALE_MIN: 0.5
83 | SCALE_MAX: 1.1
84 | IMAGE_SIZE: 512
85 | RANDOM_CROP: true
86 |
87 | TRAIN:
88 | DISTRIBUTE: true
89 | OPTIMIZER: 'sgd'
90 | LOCAL_RANK: 0
91 | HIDE_DATA_TIME: false
92 | SAVE_ALL_MODEL: false
93 | RESUME: false
94 | LR_FACTOR: 0.1
95 | LR_STEP: [800, 900]
96 | EPOCHS: 1000
97 | NUM_ITERS: -1
98 | LR: 7.e-3
99 | BATCH_SIZE: 56
100 | MASTER_BATCH_SIZE: 14
101 |
102 | MOMENTUM: 0.9
103 | WD: 0.0001
104 | NESTEROV: false
105 | GAMMA1: 0.99
106 | GAMMA2: 0.0
107 |
108 | # 'apply and reset gradients every n batches'
109 | STRIDE_APPLY: 1
110 |
111 | CHECKPOINT: ''
112 | SHUFFLE: true
113 | VAL_INTERVALS: 1
114 | TRAINVAL: false
115 |
116 | TEST:
117 | # Test Model Epoch
118 | MODEL_PATH: '/home/tensorboy/data/centerpose/res50/model_best.pth'
119 | TASK: 'multi_pose'
120 | FLIP_TEST: true
121 |
122 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
123 | MODEL_FILE: ''
124 | TEST_SCALES: [1]
125 | IMAGE_THRE: 0.1
126 | TOPK: 100
127 | NMS: false
128 | NMS_THRE: 0.5
129 | NOT_PREFETCH_TEST: false
130 | FIX_RES: true
131 |
132 | SOFT_NMS: false
133 | OKS_THRE: 0.5
134 | VIS_THRESH: 0.3
135 | KEYPOINT_THRESH: 0.2
136 | NUM_MIN_KPT: 4
137 | THRESH_HUMAN: 0.5
138 |
139 | EVAL_ORACLE_HM: false
140 | EVAL_ORACLE_WH: false
141 | EVAL_ORACLE_OFFSET: false
142 | EVAL_ORACLE_KPS: false
143 | EVAL_ORACLE_HMHP: false
144 | EVAL_ORACLE_HP_OFFSET: false
145 | EVAL_ORACLE_DEP: false
146 |
--------------------------------------------------------------------------------
/experiments/shufflenetV2_512x512.yaml:
--------------------------------------------------------------------------------
1 | SAMPLE_METHOD: 'coco_hp'
2 | DATA_DIR: '/home/tensorboy/data'
3 | EXP_ID: 'coco_pose_shufflenetv2'
4 | DEBUG: 0
5 | DEBUG_THEME: 'white'
6 | SEED: 317
7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/shufflenet_3x_sgd'
8 | LOG_DIR: ''
9 | EXPERIMENT_NAME: ''
10 | GPUS: [0, 1, 2, 3]
11 | WORKERS: 8
12 | PRINT_FREQ: 0
13 | PIN_MEMORY: true
14 | RANK: 0
15 | SAVE_RESULTS: true
16 |
17 | CUDNN:
18 | BENCHMARK: true
19 |
20 | MODEL:
21 | CENTER_THRESH: 0.1
22 | NUM_CLASSES: 1
23 | NAME: 'shufflenetV2'
24 | HEADS_NAME: 'keypoint'
25 | HEADS_NUM: [1, 2, 34, 2, 17, 2]
26 | HEAD_CONV: 256
27 | INTERMEDIATE_CHANNEL: 256
28 | DOWN_RATIO: 4
29 | NUM_STACKS: 1
30 | INPUT_RES: 512
31 | OUTPUT_RES: 128
32 | INPUT_H: 512
33 | INPUT_W: 512
34 | PAD: 31
35 | NUM_KEYPOINTS: 17
36 | TAG_PER_JOINT: true
37 | TARGET_TYPE: 'gaussian'
38 | SIGMA: 2
39 |
40 | LOSS:
41 | METRIC: 'loss'
42 | MSE_LOSS: false
43 | REG_LOSS: 'l1'
44 | USE_OHKM: false
45 | TOPK: 8
46 | USE_TARGET_WEIGHT: true
47 | USE_DIFFERENT_JOINTS_WEIGHT: false
48 | HP_WEIGHT: 1.
49 | HM_HP_WEIGHT: 1.
50 | DENSE_HP: false
51 | HM_HP: true
52 | REG_BBOX: true
53 | WH_WEIGHT: 0.1
54 | REG_OFFSET: true
55 | OFF_WEIGHT: 1.
56 | REG_HP_OFFSET: true
57 | HM_HP_WEIGHT: 1.
58 |
59 | DATASET:
60 | DATASET: 'coco_hp'
61 | TRAIN_SET: 'train'
62 | TEST_SET: 'valid'
63 | TRAIN_IMAGE_DIR: 'images/train2017'
64 | TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
65 | VAL_IMAGE_DIR: 'images/val2017'
66 | VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
67 |
68 | # training data augmentation
69 | MEAN: [0.408, 0.447, 0.470]
70 | STD: [0.289, 0.274, 0.278]
71 | SHIFT: 0.1
72 | SCALE: 0.4
73 | ROTATE: 0.
74 | # for pose
75 | AUG_ROT: 0.
76 | FLIP: 0.5
77 | NO_COLOR_AUG: false
78 |
79 | ROT_FACTOR: 30
80 | SCALE_MIN: 0.5
81 | SCALE_MAX: 1.1
82 | IMAGE_SIZE: 512
83 | RANDOM_CROP: true
84 |
85 | TRAIN:
86 | OPTIMIZER: 'adam'
87 | DISTRIBUTE: true
88 | LOCAL_RANK: 0
89 | HIDE_DATA_TIME: false
90 | SAVE_ALL_MODEL: false
91 | RESUME: false
92 | LR_FACTOR: 0.1
93 | LR_STEP: [270, 300]
94 | EPOCHS: 320
95 | NUM_ITERS: -1
96 | LR: 4.6875e-4
97 | BATCH_SIZE: 120
98 | MASTER_BATCH_SIZE: 30
99 |
100 | MOMENTUM: 0.9
101 | WD: 0.0001
102 | NESTEROV: false
103 | GAMMA1: 0.99
104 | GAMMA2: 0.0
105 |
106 | # 'apply and reset gradients every n batches'
107 | STRIDE_APPLY: 1
108 |
109 | CHECKPOINT: ''
110 | SHUFFLE: true
111 | VAL_INTERVALS: 1
112 | TRAINVAL: false
113 |
114 | TEST:
115 | # Test Model Epoch
116 | MODEL_PATH: '/home/tensorboy/data/centerpose/shufflenet_3x_sgd/model_best.pth'
117 | TASK: 'multi_pose'
118 | FLIP_TEST: true
119 |
120 | DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
121 | MODEL_FILE: ''
122 | TEST_SCALES: [1]
123 | IMAGE_THRE: 0.1
124 | TOPK: 100
125 | NMS: false
126 | NMS_THRE: 0.5
127 | NOT_PREFETCH_TEST: false
128 | FIX_RES: false
129 | KEEP_RES: false
130 |
131 | SOFT_NMS: false
132 | OKS_THRE: 0.5
133 | VIS_THRESH: 0.3
134 | KEYPOINT_THRESH: 0.2
135 | NUM_MIN_KPT: 4
136 | THRESH_HUMAN: 0.4
137 |
138 | EVAL_ORACLE_HM: false
139 | EVAL_ORACLE_WH: false
140 | EVAL_ORACLE_OFFSET: false
141 | EVAL_ORACLE_KPS: false
142 | EVAL_ORACLE_HMHP: false
143 | EVAL_ORACLE_HP_OFFSET: false
144 | EVAL_ORACLE_DEP: false
145 |
--------------------------------------------------------------------------------
/images/image1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/images/image1.jpeg
--------------------------------------------------------------------------------
/lib/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .default import _C as cfg
2 | from .default import update_config
3 |
--------------------------------------------------------------------------------
/lib/config/default.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | from yacs.config import CfgNode as CN
4 |
5 | _C = CN()
6 |
7 | _C.TASK = 'multi_pose'
8 | _C.SAMPLE_METHOD = 'coco_hp'
9 | _C.DATA_DIR = '/data'
10 | _C.EXP_ID = 'default'
11 | _C.DEBUG = 0
12 | _C.DEBUG_THEME = 'white'
13 | _C.TEST = False
14 | _C.SEED = 317
15 | _C.SAVE_RESULTS = False
16 |
17 | _C.OUTPUT_DIR = ''
18 | _C.LOG_DIR = ''
19 | _C.EXPERIMENT_NAME = ''
20 | _C.GPUS = [0, 1, 2, 3]
21 | _C.WORKERS = 4
22 | _C.PRINT_FREQ = 20
23 | _C.PIN_MEMORY = True
24 | _C.RANK = 0
25 |
26 | # Cudnn related params
27 | _C.CUDNN = CN()
28 | _C.CUDNN.ENABLED = True
29 | _C.CUDNN.BENCHMARK = True
30 | _C.CUDNN.DETERMINISTIC = False
31 |
32 | # common params for NETWORK
33 | _C.MODEL = CN()
34 | _C.MODEL.PRETRAINED = ''
35 | _C.MODEL.INIT_WEIGHTS = False
36 | _C.MODEL.NAME = 'res_50'
37 | # 0 for no conv layer, -1 for defaults setting, 64 for resnets and 256 for dla
38 | _C.MODEL.HEAD_CONV = 64
39 | _C.MODEL.INTERMEDIATE_CHANNEL = 64
40 | _C.MODEL.NUM_STACKS = 1
41 | _C.MODEL.HEADS_NAME = 'keypoint'
42 | _C.MODEL.HEADS_NUM = [1, 2, 34, 2, 17, 2]
43 | _C.MODEL.DOWN_RATIO = 4
44 | _C.MODEL.INPUT_RES = 512
45 | _C.MODEL.OUTPUT_RES = 128
46 | _C.MODEL.INPUT_H = 512
47 | _C.MODEL.INPUT_W = 512
48 | _C.MODEL.PAD = 31
49 | _C.MODEL.NUM_CLASSES = 1
50 | _C.MODEL.NUM_KEYPOINTS = 17
51 | _C.MODEL.TAG_PER_JOINT = True
52 | _C.MODEL.TARGET_TYPE = 'gaussian'
53 | _C.MODEL.SIGMA = 2
54 | _C.MODEL.CENTER_THRESH = 0.1
55 | _C.MODEL.EXTRA = CN(new_allowed=True)
56 |
57 | _C.LOSS = CN()
58 | _C.LOSS.METRIC = 'loss'
59 | _C.LOSS.MSE_LOSS = False
60 | _C.LOSS.USE_OHKM = False
61 | _C.LOSS.TOPK = 8
62 | _C.LOSS.USE_TARGET_WEIGHT = True
63 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False
64 |
65 | # multi pose
66 | _C.LOSS.HP_WEIGHT = 1.
67 | _C.LOSS.HM_WEIGHT = 1.
68 | _C.LOSS.REG_LOSS = 'l1'
69 | _C.LOSS.HM_HP_WEIGHT = 1.
70 | _C.LOSS.DENSE_HP = False
71 | _C.LOSS.HM_HP = True
72 | _C.LOSS.REG_HP_OFFSET = True
73 | _C.LOSS.REG_BBOX = True
74 | _C.LOSS.WH_WEIGHT = 0.1
75 | _C.LOSS.REG_OFFSET = True
76 | _C.LOSS.OFF_WEIGHT = 1.
77 |
78 |
79 | # DATASET related params
80 | _C.DATASET = CN()
81 | _C.DATASET.DATASET = 'coco_hp'
82 | _C.DATASET.TRAIN_SET = 'train'
83 | _C.DATASET.TEST_SET = 'valid'
84 | _C.DATASET.TRAIN_IMAGE_DIR = 'images/train2017'
85 | _C.DATASET.TRAIN_ANNOTATIONS = ['person_keypoints_train2017.json']
86 | _C.DATASET.VAL_IMAGE_DIR = 'images/val2017'
87 | _C.DATASET.VAL_ANNOTATIONS = 'person_keypoints_val2017.json'
88 | # training data augmentation
89 | _C.DATASET.MEAN = [0.408, 0.447, 0.470]
90 | _C.DATASET.STD = [0.289, 0.274, 0.278]
91 | _C.DATASET.RANDOM_CROP = True
92 | _C.DATASET.SHIFT = 0.1
93 | _C.DATASET.SCALE = 0.4
94 | _C.DATASET.ROTATE = 0.
95 | # for pose
96 | _C.DATASET.AUG_ROT = 0.
97 | _C.DATASET.FLIP = 0.5
98 | _C.DATASET.NO_COLOR_AUG = False
99 | _C.DATASET.ROT_FACTOR = 30
100 | _C.DATASET.SCALE_MIN = 0.5
101 | _C.DATASET.SCALE_MAX = 1.1
102 | _C.DATASET.IMAGE_SIZE = 512
103 |
104 | # train
105 | _C.TRAIN = CN()
106 |
107 | _C.TRAIN.DISTRIBUTE = True
108 | _C.TRAIN.LOCAL_RANK = 0
109 | _C.TRAIN.HIDE_DATA_TIME = False
110 | _C.TRAIN.SAVE_ALL_MODEL = False
111 | _C.TRAIN.RESUME = False
112 | _C.TRAIN.LR_FACTOR = 0.1
113 | _C.TRAIN.LR_STEP = [90, 120]
114 | _C.TRAIN.EPOCHS = 140
115 | _C.TRAIN.NUM_ITERS = -1
116 | _C.TRAIN.LR = 1.25e-4
117 | _C.TRAIN.BATCH_SIZE = 32
118 | _C.TRAIN.MASTER_BATCH_SIZE = -1
119 |
120 |
121 | _C.TRAIN.OPTIMIZER = 'adam'
122 | _C.TRAIN.MOMENTUM = 0.9
123 | _C.TRAIN.WD = 0.0001
124 | _C.TRAIN.NESTEROV = False
125 | _C.TRAIN.GAMMA1 = 0.99
126 | _C.TRAIN.GAMMA2 = 0.0
127 |
128 |
129 | # 'apply and reset gradients every n batches'
130 | _C.TRAIN.STRIDE_APPLY = 1
131 |
132 | _C.TRAIN.RESUME = False
133 | _C.TRAIN.CHECKPOINT = ''
134 | _C.TRAIN.SHUFFLE = True
135 | _C.TRAIN.VAL_INTERVALS = 5
136 | _C.TRAIN.TRAINVAL = False
137 |
138 | # testing
139 | _C.TEST = CN()
140 | # size of images for each device
141 | _C.TEST.BATCH_SIZE_PER_GPU = 32
142 | # Test Model Epoch
143 | _C.TEST.FLIP_TEST = False
144 | _C.TEST.TASK = 'multi_pose'
145 | _C.TEST.MODEL_PATH = ''
146 | _C.TEST.DEMO_FILE = ''
147 | _C.TEST.MODEL_FILE = ''
148 | _C.TEST.TEST_SCALES = [1]
149 | _C.TEST.IMAGE_THRE = 0.1
150 | _C.TEST.TOPK = 100
151 | _C.TEST.NMS = False
152 | _C.TEST.NMS_THRE = 0.5
153 | _C.TEST.NOT_PREFETCH_TEST = False
154 | _C.TEST.FIX_RES = True
155 | _C.TEST.KEEP_RES = False
156 |
157 | _C.TEST.SOFT_NMS = False
158 | _C.TEST.OKS_THRE = 0.5
159 | _C.TEST.VIS_THRESH = 0.3
160 | _C.TEST.KEYPOINT_THRESH = 0.2
161 | _C.TEST.NUM_MIN_KPT = 4
162 | _C.TEST.THRESH_HUMAN = 0.4
163 |
164 | _C.TEST.EVAL_ORACLE_HM = False
165 | _C.TEST.EVAL_ORACLE_WH = False
166 | _C.TEST.EVAL_ORACLE_OFFSET = False
167 | _C.TEST.EVAL_ORACLE_KPS = False
168 | _C.TEST.EVAL_ORACLE_HMHP = False
169 | _C.TEST.EVAL_ORACLE_HP_OFFSET = False
170 | _C.TEST.EVAL_ORACLE_DEP = False
171 |
172 |
173 | def update_config(cfg, args_cfg):
174 |
175 | cfg.defrost()
176 | cfg.merge_from_file(args_cfg)
177 | cfg.freeze()
178 |
179 |
180 | if __name__ == '__main__':
181 | import sys
182 |
183 | with open(sys.argv[1], 'w') as f:
184 | print(_C, file=f)
185 |
--------------------------------------------------------------------------------
/lib/datasets/coco_hp.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import json
4 | import os
5 | import time
6 |
7 | import numpy as np
8 | import pycocotools.coco as coco
9 | import torch.utils.data as data
10 | from pycocotools.cocoeval import COCOeval
11 |
12 |
13 | class COCOHP(data.Dataset):
14 | num_classes = 1
15 | num_joints = 17
16 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
17 | [11, 12], [13, 14], [15, 16]]
18 |
19 | def __init__(self, cfg, split):
20 | super(COCOHP, self).__init__()
21 |
22 | self.data_dir = os.path.join(cfg.DATA_DIR, 'coco')
23 | self.img_dir = os.path.join(self.data_dir, 'images', '{}2017'.format(split))
24 | if split == 'test':
25 | self.annot_path = os.path.join(
26 | self.data_dir, 'annotations',
27 | 'image_info_test-dev2017.json').format(split)
28 | else:
29 | self.annot_path = os.path.join(
30 | self.data_dir, 'annotations',
31 | 'person_keypoints_{}2017.json').format(split)
32 | self.max_objs = 32
33 | self._valid_ids = [1]
34 | self.class_name = ['__background__', 'person']
35 | self._data_rng = np.random.RandomState(123)
36 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
37 | dtype=np.float32)
38 | self._eig_vec = np.array([
39 | [-0.58752847, -0.69563484, 0.41340352],
40 | [-0.5832747, 0.00994535, -0.81221408],
41 | [-0.56089297, 0.71832671, 0.41158938]
42 | ], dtype=np.float32)
43 | self.split = split
44 | self.cfg = cfg
45 |
46 | print('==> initializing coco 2017 {} data.'.format(split))
47 | self.coco = coco.COCO(self.annot_path)
48 | images = self.coco.getImgIds()
49 | catIds = self.coco.getCatIds(self.class_name[-1])
50 | assert catIds == self._valid_ids
51 | self.images = self.coco.getImgIds(images,catIds)
52 | self.num_samples = len(self.images)
53 |
54 | print('Loaded {} {} samples'.format(split, self.num_samples))
55 |
56 | def _to_float(self, x):
57 | return float("{:.2f}".format(x))
58 |
59 | def convert_eval_format(self, all_bboxes):
60 | detections = []
61 | for image_id in all_bboxes:
62 | category_id = 1
63 | for dets in all_bboxes[image_id][category_id]:
64 | bbox = dets[:4]
65 | bbox[2] -= bbox[0]
66 | bbox[3] -= bbox[1]
67 | score = dets[4]
68 | keypoint_prob = np.array(np.array(dets[39:56])>0.1).astype(np.int32).reshape(17,1)
69 | keypoints = np.array(dets[5:39], dtype=np.float32).reshape(-1, 2)
70 | bbox_out = list(map(self._to_float, bbox))
71 | keypoints_pred = np.concatenate([
72 | keypoints, keypoint_prob], axis=1).reshape(51).tolist()
73 | keypoints_pred = list(map(self._to_float, keypoints_pred))
74 |
75 | detection = {
76 | "image_id": int(image_id),
77 | "category_id": int(category_id),
78 | "bbox": bbox_out,
79 | "score": float("{:.2f}".format(score)),
80 | "keypoints": keypoints_pred
81 | }
82 | detections.append(detection)
83 | return detections
84 |
85 | def __len__(self):
86 | return self.num_samples
87 |
88 | def save_results(self, results, save_dir):
89 | json.dump(self.convert_eval_format(results),
90 | open('{}/results.json'.format(save_dir), 'w'))
91 |
92 |
93 | def run_eval(self, results, save_dir):
94 | #self.save_results(results, save_dir)
95 | #seconds = time.time()
96 | #local_time = time.ctime(seconds).replace(' ', '_').replace(':','_')
97 | #coco_dets = self.coco.loadRes('{}/{}_results.json'.format(save_dir, local_time))
98 | coco_dets = self.coco.loadRes(self.convert_eval_format(results))
99 | #coco_eval = COCOeval(self.coco, coco_dets, "bbox")
100 | #coco_eval.evaluate()
101 | #coco_eval.accumulate()
102 |
103 | coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
104 | coco_eval.evaluate()
105 | coco_eval.accumulate()
106 | coco_eval.summarize()
107 | return coco_eval.stats[0]
108 |
--------------------------------------------------------------------------------
/lib/datasets/data.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | COCO_PERSON_SKELETON = [
4 | (16, 14), (14, 12), (17, 15), (15, 13), (12, 13), (6, 12), (7, 13),
5 | (6, 7), (6, 8), (7, 9), (8, 10), (9, 11), (2, 3), (1, 2), (1, 3),
6 | (2, 4), (3, 5), (4, 6), (5, 7),
7 | ]
8 |
9 | COCO_KEYPOINTS = [
10 | 'nose', # 1
11 | 'left_eye', # 2
12 | 'right_eye', # 3
13 | 'left_ear', # 4
14 | 'right_ear', # 5
15 | 'left_shoulder', # 6
16 | 'right_shoulder', # 7
17 | 'left_elbow', # 8
18 | 'right_elbow', # 9
19 | 'left_wrist', # 10
20 | 'right_wrist', # 11
21 | 'left_hip', # 12
22 | 'right_hip', # 13
23 | 'left_knee', # 14
24 | 'right_knee', # 15
25 | 'left_ankle', # 16
26 | 'right_ankle', # 17
27 | ]
28 |
29 |
30 | HFLIP = {
31 | 'left_eye': 'right_eye',
32 | 'right_eye': 'left_eye',
33 | 'left_ear': 'right_ear',
34 | 'right_ear': 'left_ear',
35 | 'left_shoulder': 'right_shoulder',
36 | 'right_shoulder': 'left_shoulder',
37 | 'left_elbow': 'right_elbow',
38 | 'right_elbow': 'left_elbow',
39 | 'left_wrist': 'right_wrist',
40 | 'right_wrist': 'left_wrist',
41 | 'left_hip': 'right_hip',
42 | 'right_hip': 'left_hip',
43 | 'left_knee': 'right_knee',
44 | 'right_knee': 'left_knee',
45 | 'left_ankle': 'right_ankle',
46 | 'right_ankle': 'left_ankle',
47 | }
48 |
--------------------------------------------------------------------------------
/lib/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | from .coco_hp import COCOHP
4 | from .multi_pose import MultiPoseDataset
5 |
6 | dataset_factory = {
7 | 'coco_hp': COCOHP,
8 | }
9 |
10 | _sample_factory = {
11 | 'multi_pose': MultiPoseDataset,
12 | }
13 |
14 |
15 | def get_dataset(dataset, task):
16 | class Dataset(dataset_factory[dataset], _sample_factory[task]):
17 | pass
18 | return Dataset
19 |
--------------------------------------------------------------------------------
/lib/detectors/base_detector.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import time
4 |
5 | import cv2
6 | import numpy as np
7 | import torch
8 | from progress.bar import Bar
9 |
10 | from models.model import create_model, load_model
11 | from utils.debugger import Debugger
12 | from utils.image import get_affine_transform
13 |
14 |
15 | class BaseDetector(object):
16 | def __init__(self, cfg):
17 |
18 | print('Creating model...')
19 | self.model = create_model(cfg.MODEL.NAME, cfg.MODEL.HEAD_CONV, cfg)
20 | self.model = load_model(self.model, cfg.TEST.MODEL_PATH)
21 | self.model = self.model.to(torch.device('cuda'))
22 | self.model.eval()
23 |
24 | self.mean = np.array(cfg.DATASET.MEAN, dtype=np.float32).reshape(1, 1, 3)
25 | self.std = np.array(cfg.DATASET.STD, dtype=np.float32).reshape(1, 1, 3)
26 | self.max_per_image = 100
27 | self.num_classes = cfg.MODEL.NUM_CLASSES
28 | self.scales = cfg.TEST.TEST_SCALES
29 | self.cfg = cfg
30 | self.pause = True
31 |
32 | def pre_process(self, image, scale, meta=None):
33 | height, width = image.shape[0:2]
34 |
35 | new_height = int(height * scale)
36 | new_width = int(width * scale)
37 | if self.cfg.TEST.FIX_RES:
38 | inp_height, inp_width = self.cfg.MODEL.INPUT_H, self.cfg.MODEL.INPUT_W
39 | c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
40 | s = max(height, width) * 1.0
41 | else:
42 | inp_height = (new_height | self.cfg.MODEL.PAD) + 1
43 | inp_width = (new_width | self.cfg.MODEL.PAD) + 1
44 | c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
45 | s = np.array([inp_width, inp_height], dtype=np.float32)
46 |
47 | trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
48 | resized_image = cv2.resize(image, (new_width, new_height))
49 | inp_image = cv2.warpAffine(
50 | resized_image, trans_input, (inp_width, inp_height),
51 | flags=cv2.INTER_LINEAR)
52 |
53 | inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32)
54 |
55 | images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width)
56 | if self.cfg.TEST.FLIP_TEST:
57 | images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
58 | images = torch.from_numpy(images)
59 | meta = {'c': c, 's': s,
60 | 'out_height': inp_height // self.cfg.MODEL.DOWN_RATIO,
61 | 'out_width': inp_width // self.cfg.MODEL.DOWN_RATIO}
62 | return images, meta
63 |
64 | def process(self, images, return_time=False):
65 | raise NotImplementedError
66 |
67 | def post_process(self, dets, meta, scale=1):
68 | raise NotImplementedError
69 |
70 | def merge_outputs(self, detections):
71 | raise NotImplementedError
72 |
73 | def debug(self, debugger, images, dets, output, scale=1):
74 | raise NotImplementedError
75 |
76 | def show_results(self, debugger, image, results):
77 | raise NotImplementedError
78 |
79 | def run(self, image_or_path_or_tensor, meta=None):
80 | load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
81 | merge_time, tot_time = 0, 0
82 | debugger = Debugger((self.cfg.DEBUG==3), theme=self.cfg.DEBUG_THEME,
83 | num_classes=self.cfg.MODEL.NUM_CLASSES, dataset=self.cfg.SAMPLE_METHOD, down_ratio=self.cfg.MODEL.DOWN_RATIO)
84 | start_time = time.time()
85 | pre_processed = False
86 | if isinstance(image_or_path_or_tensor, np.ndarray):
87 | image = image_or_path_or_tensor
88 | elif type(image_or_path_or_tensor) == type (''):
89 | image = cv2.imread(image_or_path_or_tensor)
90 | else:
91 | image = image_or_path_or_tensor['image'][0].numpy()
92 | pre_processed_images = image_or_path_or_tensor
93 | pre_processed = True
94 |
95 | loaded_time = time.time()
96 | load_time += (loaded_time - start_time)
97 |
98 | detections = []
99 | for scale in self.scales:
100 | scale_start_time = time.time()
101 | if not pre_processed:
102 | images, meta = self.pre_process(image, scale, meta)
103 | else:
104 | images = pre_processed_images['images'][scale][0]
105 | meta = pre_processed_images['meta'][scale]
106 | meta = {k: v.numpy()[0] for k, v in meta.items()}
107 | images = images.to(torch.device('cuda'))
108 | torch.cuda.synchronize()
109 | pre_process_time = time.time()
110 | pre_time += pre_process_time - scale_start_time
111 |
112 | output, dets, forward_time = self.process(images, return_time=True)
113 |
114 | torch.cuda.synchronize()
115 | net_time += forward_time - pre_process_time
116 | decode_time = time.time()
117 | dec_time += decode_time - forward_time
118 |
119 | if self.cfg.DEBUG >= 2:
120 | self.debug(debugger, images, dets, output, scale)
121 |
122 | dets= self.post_process(dets, meta, scale)
123 | torch.cuda.synchronize()
124 | post_process_time = time.time()
125 | post_time += post_process_time - decode_time
126 |
127 | detections.append(dets)
128 |
129 | results = self.merge_outputs(detections)
130 | torch.cuda.synchronize()
131 | end_time = time.time()
132 | merge_time += end_time - post_process_time
133 | tot_time += end_time - start_time
134 |
135 | if self.cfg.DEBUG >= 1:
136 | self.show_results(debugger, image, results)
137 |
138 | return {'results': {1:results}, 'tot': tot_time, 'load': load_time,
139 | 'pre': pre_time, 'net': net_time, 'dec': dec_time,
140 | 'post': post_time, 'merge': merge_time}
141 |
--------------------------------------------------------------------------------
/lib/detectors/detector_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | from .multi_pose import MultiPoseDetector
4 |
5 | detector_factory = {
6 | 'multi_pose': MultiPoseDetector,
7 | }
8 |
--------------------------------------------------------------------------------
/lib/detectors/multi_pose.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import time
4 |
5 | import cv2
6 | import numpy as np
7 | import torch
8 | from progress.bar import Bar
9 |
10 | from models.decode import multi_pose_decode
11 | from models.utils import flip_lr, flip_lr_off, flip_tensor
12 | from utils.debugger import Debugger
13 | from utils.image import get_affine_transform
14 | from utils.post_process import multi_pose_post_process
15 | from .base_detector import BaseDetector
16 |
17 | try:
18 | from external.nms import soft_nms_39
19 | except:
20 | print('NMS not imported! If you need it,'
21 | ' do \n cd $CenterNet_ROOT/src/lib/external \n make')
22 |
23 |
24 | class MultiPoseDetector(BaseDetector):
25 | def __init__(self, cfg):
26 | super(MultiPoseDetector, self).__init__(cfg)
27 | self.flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
28 |
29 | def process(self, images, return_time=False):
30 | with torch.no_grad():
31 | torch.cuda.synchronize()
32 | outputs = self.model(images)
33 | hm, wh, hps, reg, hm_hp, hp_offset = outputs
34 |
35 | hm = hm.sigmoid_()
36 | if self.cfg.LOSS.HM_HP and not self.cfg.LOSS.MSE_LOSS:
37 | hm_hp = hm_hp.sigmoid_()
38 |
39 | reg = reg if self.cfg.LOSS.REG_OFFSET else None
40 | hm_hp = hm_hp if self.cfg.LOSS.HM_HP else None
41 | hp_offset = hp_offset if self.cfg.LOSS.REG_HP_OFFSET else None
42 | torch.cuda.synchronize()
43 | forward_time = time.time()
44 |
45 | if self.cfg.TEST.FLIP_TEST:
46 | hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2
47 | wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2
48 | hps = (hps[0:1] +
49 | flip_lr_off(hps[1:2], self.flip_idx)) / 2
50 | hm_hp = (hm_hp[0:1] + flip_lr(hm_hp[1:2], self.flip_idx)) / 2 \
51 | if hm_hp is not None else None
52 | reg = reg[0:1] if reg is not None else None
53 | hp_offset = hp_offset[0:1] if hp_offset is not None else None
54 |
55 | dets = multi_pose_decode(hm, wh, hps, reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.cfg.TEST.TOPK)
56 |
57 | if return_time:
58 | return outputs, dets, forward_time
59 | else:
60 | return outputs, dets
61 |
62 | def post_process(self, dets, meta, scale=1):
63 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
64 | dets= multi_pose_post_process(
65 | dets.copy(), [meta['c']], [meta['s']],
66 | meta['out_height'], meta['out_width'])
67 | for j in range(1, self.num_classes + 1):
68 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 56)
69 | dets[0][j][:, :4] /= scale
70 | dets[0][j][:, 5:39] /= scale
71 | return dets[0]
72 |
73 | def merge_outputs(self, detections):
74 | results = np.concatenate(
75 | [detection[1] for detection in detections], axis=0).astype(np.float32)
76 | if self.cfg.TEST.NMS or len(self.cfg.TEST.TEST_SCALES) > 1:
77 | soft_nms_39(results, Nt=0.5, method=2)
78 | results = results.tolist()
79 | return results
80 |
81 |
82 | def debug(self, debugger, images, dets, output, scale=1):
83 | dets = dets.detach().cpu().numpy().copy()
84 | dets[:, :, :4] *= self.cfg.MODEL.DOWN_RATIO
85 | dets[:, :, 5:39] *= self.cfg.MODEL.DOWN_RATIO
86 | img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
87 | img = np.clip(((
88 | img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8)
89 | pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
90 | debugger.add_blend_img(img, pred, 'pred_hm')
91 | if self.LOSS.HM_HP:
92 | pred = debugger.gen_colormap_hp(
93 | output['hm_hp'][0].detach().cpu().numpy())
94 | debugger.add_blend_img(img, pred, 'pred_hmhp')
95 |
96 | def show_results(self, debugger, image, results):
97 | debugger.add_img(image, img_id='multi_pose')
98 | for b_id, detection in enumerate(results):
99 | bbox = detection[:4]
100 | bbox_prob = detection[4]
101 | keypoints = detection[5:39]
102 | keypoints_prob = detection[39:]
103 | if bbox_prob > self.cfg.TEST.VIS_THRESH:
104 | debugger.add_coco_bbox(bbox, 0, bbox_prob, img_id='multi_pose')
105 | debugger.add_coco_hp(keypoints, keypoints_prob, img_id='multi_pose')
106 |
107 | debugger.show_all_imgs(pause=self.pause)
108 |
--------------------------------------------------------------------------------
/lib/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 |
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 |
--------------------------------------------------------------------------------
/lib/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | python setup.py build_ext --inplace
3 | rm -rf build
4 |
--------------------------------------------------------------------------------
/lib/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/external/__init__.py
--------------------------------------------------------------------------------
/lib/external/build/temp.linux-x86_64-3.6/nms.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/external/build/temp.linux-x86_64-3.6/nms.o
--------------------------------------------------------------------------------
/lib/external/make.sh:
--------------------------------------------------------------------------------
1 | python setup.py build_ext --inplace
2 |
--------------------------------------------------------------------------------
/lib/external/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from distutils.extension import Extension
3 |
4 | import numpy
5 | from Cython.Build import cythonize
6 |
7 | extensions = [
8 | Extension(
9 | "nms",
10 | ["nms.pyx"],
11 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
12 | )
13 | ]
14 |
15 | setup(
16 | name="coco",
17 | ext_modules=cythonize(extensions),
18 | include_dirs=[numpy.get_include()]
19 | )
20 |
--------------------------------------------------------------------------------
/lib/logger.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
4 | import os
5 | import sys
6 | import time
7 |
8 | import torch
9 |
10 | USE_TENSORBOARD = False
11 | try:
12 | import tensorboardX
13 | print('Using tensorboardX')
14 | except:
15 | USE_TENSORBOARD = False
16 |
17 | class Logger(object):
18 | def __init__(self, cfg):
19 | """Create a summary writer logging to log_dir."""
20 | if not os.path.exists(cfg.OUTPUT_DIR):
21 | try:
22 | os.makedirs(cfg.OUTPUT_DIR)
23 | except:
24 | pass
25 | time_str = time.strftime('%Y-%m-%d-%H-%M')
26 |
27 | file_name = os.path.join(cfg.OUTPUT_DIR, 'opt.txt')
28 | with open(file_name, 'wt') as opt_file:
29 | opt_file.write('==> torch version: {}\n'.format(torch.__version__))
30 | opt_file.write('==> cudnn version: {}\n'.format(
31 | torch.backends.cudnn.version()))
32 | opt_file.write('==> Cmd:\n')
33 | opt_file.write(str(sys.argv))
34 | opt_file.write('\n==> Opt:\n')
35 |
36 | log_dir = cfg.OUTPUT_DIR + '/logs_{}'.format(time_str)
37 | if USE_TENSORBOARD:
38 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
39 | else:
40 | try:
41 | os.makedirs(os.path.dirname(log_dir))
42 | except:
43 | pass
44 | try:
45 | os.makedirs(log_dir)
46 | except:
47 | pass
48 | self.log = open(log_dir + '/log.txt', 'w')
49 | try:
50 | os.system('cp {}/opt.txt {}/'.format(cfg.OUTPUT_DIR, log_dir))
51 | except:
52 | pass
53 | self.start_line = True
54 |
55 | def write(self, txt):
56 | if self.start_line:
57 | time_str = time.strftime('%Y-%m-%d-%H-%M')
58 | self.log.write('{}: {}'.format(time_str, txt))
59 | else:
60 | self.log.write(txt)
61 | self.start_line = False
62 | if '\n' in txt:
63 | self.start_line = True
64 | self.log.flush()
65 |
66 | def close(self):
67 | self.log.close()
68 |
69 | def scalar_summary(self, tag, value, step):
70 | """Log a scalar variable."""
71 | if USE_TENSORBOARD:
72 | self.writer.add_scalar(tag, value, step)
73 |
--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2019, Charles Shang
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/README.md:
--------------------------------------------------------------------------------
1 | ## Deformable Convolutional Networks V2 with Pytorch 1.0
2 |
3 | ### Build
4 | ```bash
5 | ./make.sh # build
6 | python test.py # run examples and gradient check
7 | ```
8 |
9 | ### An Example
10 | - deformable conv
11 | ```python
12 | from dcn_v2 import DCN
13 | input = torch.randn(2, 64, 128, 128).cuda()
14 | # wrap all things (offset and mask) in DCN
15 | dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
16 | output = dcn(input)
17 | print(output.shape)
18 | ```
19 | - deformable roi pooling
20 | ```python
21 | from dcn_v2 import DCNPooling
22 | input = torch.randn(2, 32, 64, 64).cuda()
23 | batch_inds = torch.randint(2, (20, 1)).cuda().float()
24 | x = torch.randint(256, (20, 1)).cuda().float()
25 | y = torch.randint(256, (20, 1)).cuda().float()
26 | w = torch.randint(64, (20, 1)).cuda().float()
27 | h = torch.randint(64, (20, 1)).cuda().float()
28 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
29 |
30 | # mdformable pooling (V2)
31 | # wrap all things (offset and mask) in DCNPooling
32 | dpooling = DCNPooling(spatial_scale=1.0 / 4,
33 | pooled_size=7,
34 | output_dim=32,
35 | no_trans=False,
36 | group_size=1,
37 | trans_std=0.1).cuda()
38 |
39 | dout = dpooling(input, rois)
40 | ```
41 | ### Note
42 | Now the master branch is for pytorch 1.0 (new ATen API), you can switch back to pytorch 0.4 with,
43 | ```bash
44 | git checkout pytorch_0.4
45 | ```
46 |
47 | ### Known Issues:
48 |
49 | - [x] Gradient check w.r.t offset (solved)
50 | - [ ] Backward is not reentrant (minor)
51 |
52 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
53 |
54 | I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
55 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
56 | non-differential points?
57 |
58 | Update: all gradient check passes with double precision.
59 |
60 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for
61 | float `<1e-15` for double),
62 | so it may not be a serious problem (?)
63 |
64 | Please post an issue or PR if you have any comments.
65 |
--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/DCNv2/__init__.py
--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python setup.py build develop
3 |
--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import glob
4 | import os
5 |
6 | import torch
7 | from setuptools import find_packages, setup
8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
9 |
10 | requirements = ["torch", "torchvision"]
11 |
12 | def get_extensions():
13 | this_dir = os.path.dirname(os.path.abspath(__file__))
14 | extensions_dir = os.path.join(this_dir, "src")
15 |
16 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
17 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
18 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
19 |
20 | sources = main_file + source_cpu
21 | extension = CppExtension
22 | extra_compile_args = {"cxx": []}
23 | define_macros = []
24 |
25 | if torch.cuda.is_available() and CUDA_HOME is not None:
26 | extension = CUDAExtension
27 | sources += source_cuda
28 | define_macros += [("WITH_CUDA", None)]
29 | extra_compile_args["nvcc"] = [
30 | "-DCUDA_HAS_FP16=1",
31 | "-D__CUDA_NO_HALF_OPERATORS__",
32 | "-D__CUDA_NO_HALF_CONVERSIONS__",
33 | "-D__CUDA_NO_HALF2_OPERATORS__",
34 | ]
35 | else:
36 | raise NotImplementedError('Cuda is not availabel')
37 |
38 | sources = [os.path.join(extensions_dir, s) for s in sources]
39 | include_dirs = [extensions_dir]
40 | ext_modules = [
41 | extension(
42 | "_ext",
43 | sources,
44 | include_dirs=include_dirs,
45 | define_macros=define_macros,
46 | extra_compile_args=extra_compile_args,
47 | )
48 | ]
49 | return ext_modules
50 |
51 | setup(
52 | name="DCNv2",
53 | version="0.1",
54 | author="charlesshang",
55 | url="https://github.com/charlesshang/DCNv2",
56 | description="deformable convolutional networks",
57 | packages=find_packages(exclude=("configs", "tests",)),
58 | # install_requires=requirements,
59 | ext_modules=get_extensions(),
60 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
61 | )
62 |
--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/cpu/dcn_v2_cpu.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 | #include
5 |
6 |
7 | at::Tensor
8 | dcn_v2_cpu_forward(const at::Tensor &input,
9 | const at::Tensor &weight,
10 | const at::Tensor &bias,
11 | const at::Tensor &offset,
12 | const at::Tensor &mask,
13 | const int kernel_h,
14 | const int kernel_w,
15 | const int stride_h,
16 | const int stride_w,
17 | const int pad_h,
18 | const int pad_w,
19 | const int dilation_h,
20 | const int dilation_w,
21 | const int deformable_group)
22 | {
23 | AT_ERROR("Not implement on cpu");
24 | }
25 |
26 | std::vector
27 | dcn_v2_cpu_backward(const at::Tensor &input,
28 | const at::Tensor &weight,
29 | const at::Tensor &bias,
30 | const at::Tensor &offset,
31 | const at::Tensor &mask,
32 | const at::Tensor &grad_output,
33 | int kernel_h, int kernel_w,
34 | int stride_h, int stride_w,
35 | int pad_h, int pad_w,
36 | int dilation_h, int dilation_w,
37 | int deformable_group)
38 | {
39 | AT_ERROR("Not implement on cpu");
40 | }
41 |
42 | std::tuple
43 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
44 | const at::Tensor &bbox,
45 | const at::Tensor &trans,
46 | const int no_trans,
47 | const float spatial_scale,
48 | const int output_dim,
49 | const int group_size,
50 | const int pooled_size,
51 | const int part_size,
52 | const int sample_per_part,
53 | const float trans_std)
54 | {
55 | AT_ERROR("Not implement on cpu");
56 | }
57 |
58 | std::tuple
59 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
60 | const at::Tensor &input,
61 | const at::Tensor &bbox,
62 | const at::Tensor &trans,
63 | const at::Tensor &top_count,
64 | const int no_trans,
65 | const float spatial_scale,
66 | const int output_dim,
67 | const int group_size,
68 | const int pooled_size,
69 | const int part_size,
70 | const int sample_per_part,
71 | const float trans_std)
72 | {
73 | AT_ERROR("Not implement on cpu");
74 | }
--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/cpu/vision.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | at::Tensor
5 | dcn_v2_cpu_forward(const at::Tensor &input,
6 | const at::Tensor &weight,
7 | const at::Tensor &bias,
8 | const at::Tensor &offset,
9 | const at::Tensor &mask,
10 | const int kernel_h,
11 | const int kernel_w,
12 | const int stride_h,
13 | const int stride_w,
14 | const int pad_h,
15 | const int pad_w,
16 | const int dilation_h,
17 | const int dilation_w,
18 | const int deformable_group);
19 |
20 | std::vector
21 | dcn_v2_cpu_backward(const at::Tensor &input,
22 | const at::Tensor &weight,
23 | const at::Tensor &bias,
24 | const at::Tensor &offset,
25 | const at::Tensor &mask,
26 | const at::Tensor &grad_output,
27 | int kernel_h, int kernel_w,
28 | int stride_h, int stride_w,
29 | int pad_h, int pad_w,
30 | int dilation_h, int dilation_w,
31 | int deformable_group);
32 |
33 |
34 | std::tuple
35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
36 | const at::Tensor &bbox,
37 | const at::Tensor &trans,
38 | const int no_trans,
39 | const float spatial_scale,
40 | const int output_dim,
41 | const int group_size,
42 | const int pooled_size,
43 | const int part_size,
44 | const int sample_per_part,
45 | const float trans_std);
46 |
47 | std::tuple
48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
49 | const at::Tensor &input,
50 | const at::Tensor &bbox,
51 | const at::Tensor &trans,
52 | const at::Tensor &top_count,
53 | const int no_trans,
54 | const float spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const float trans_std);
--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/cuda/dcn_v2_im2col_cuda.h:
--------------------------------------------------------------------------------
1 |
2 | /*!
3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
4 | *
5 | * COPYRIGHT
6 | *
7 | * All contributions by the University of California:
8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
9 | * All rights reserved.
10 | *
11 | * All other contributions:
12 | * Copyright (c) 2014-2017, the respective contributors
13 | * All rights reserved.
14 | *
15 | * Caffe uses a shared copyright model: each contributor holds copyright over
16 | * their contributions to Caffe. The project versioning records all such
17 | * contribution and copyright details. If a contributor wants to further mark
18 | * their specific copyright on a particular contribution, they should indicate
19 | * their copyright solely in the commit message of the change when it is
20 | * committed.
21 | *
22 | * LICENSE
23 | *
24 | * Redistribution and use in source and binary forms, with or without
25 | * modification, are permitted provided that the following conditions are met:
26 | *
27 | * 1. Redistributions of source code must retain the above copyright notice, this
28 | * list of conditions and the following disclaimer.
29 | * 2. Redistributions in binary form must reproduce the above copyright notice,
30 | * this list of conditions and the following disclaimer in the documentation
31 | * and/or other materials provided with the distribution.
32 | *
33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 | *
44 | * CONTRIBUTION AGREEMENT
45 | *
46 | * By contributing to the BVLC/caffe repository through pull-request, comment,
47 | * or otherwise, the contributor releases their content to the
48 | * license and copyright terms herein.
49 | *
50 | ***************** END Caffe Copyright Notice and Disclaimer ********************
51 | *
52 | * Copyright (c) 2018 Microsoft
53 | * Licensed under The MIT License [see LICENSE for details]
54 | * \file modulated_deformable_im2col.h
55 | * \brief Function definitions of converting an image to
56 | * column matrix based on kernel, padding, dilation, and offset.
57 | * These functions are mainly used in deformable convolution operators.
58 | * \ref: https://arxiv.org/abs/1811.11168
59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
60 | */
61 |
62 | /***************** Adapted by Charles Shang *********************/
63 |
64 | #ifndef DCN_V2_IM2COL_CUDA
65 | #define DCN_V2_IM2COL_CUDA
66 |
67 | #ifdef __cplusplus
68 | extern "C"
69 | {
70 | #endif
71 |
72 | void modulated_deformable_im2col_cuda(cudaStream_t stream,
73 | const float *data_im, const float *data_offset, const float *data_mask,
74 | const int batch_size, const int channels, const int height_im, const int width_im,
75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
77 | const int dilation_h, const int dilation_w,
78 | const int deformable_group, float *data_col);
79 |
80 | void modulated_deformable_col2im_cuda(cudaStream_t stream,
81 | const float *data_col, const float *data_offset, const float *data_mask,
82 | const int batch_size, const int channels, const int height_im, const int width_im,
83 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
84 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
85 | const int dilation_h, const int dilation_w,
86 | const int deformable_group, float *grad_im);
87 |
88 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
89 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
90 | const int batch_size, const int channels, const int height_im, const int width_im,
91 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
92 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
93 | const int dilation_h, const int dilation_w,
94 | const int deformable_group,
95 | float *grad_offset, float *grad_mask);
96 |
97 | #ifdef __cplusplus
98 | }
99 | #endif
100 |
101 | #endif
--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/cuda/vision.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | at::Tensor
5 | dcn_v2_cuda_forward(const at::Tensor &input,
6 | const at::Tensor &weight,
7 | const at::Tensor &bias,
8 | const at::Tensor &offset,
9 | const at::Tensor &mask,
10 | const int kernel_h,
11 | const int kernel_w,
12 | const int stride_h,
13 | const int stride_w,
14 | const int pad_h,
15 | const int pad_w,
16 | const int dilation_h,
17 | const int dilation_w,
18 | const int deformable_group);
19 |
20 | std::vector
21 | dcn_v2_cuda_backward(const at::Tensor &input,
22 | const at::Tensor &weight,
23 | const at::Tensor &bias,
24 | const at::Tensor &offset,
25 | const at::Tensor &mask,
26 | const at::Tensor &grad_output,
27 | int kernel_h, int kernel_w,
28 | int stride_h, int stride_w,
29 | int pad_h, int pad_w,
30 | int dilation_h, int dilation_w,
31 | int deformable_group);
32 |
33 |
34 | std::tuple
35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,
36 | const at::Tensor &bbox,
37 | const at::Tensor &trans,
38 | const int no_trans,
39 | const float spatial_scale,
40 | const int output_dim,
41 | const int group_size,
42 | const int pooled_size,
43 | const int part_size,
44 | const int sample_per_part,
45 | const float trans_std);
46 |
47 | std::tuple
48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,
49 | const at::Tensor &input,
50 | const at::Tensor &bbox,
51 | const at::Tensor &trans,
52 | const at::Tensor &top_count,
53 | const int no_trans,
54 | const float spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const float trans_std);
--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/dcn_v2.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "cpu/vision.h"
4 |
5 | #ifdef WITH_CUDA
6 | #include "cuda/vision.h"
7 | #endif
8 |
9 | at::Tensor
10 | dcn_v2_forward(const at::Tensor &input,
11 | const at::Tensor &weight,
12 | const at::Tensor &bias,
13 | const at::Tensor &offset,
14 | const at::Tensor &mask,
15 | const int kernel_h,
16 | const int kernel_w,
17 | const int stride_h,
18 | const int stride_w,
19 | const int pad_h,
20 | const int pad_w,
21 | const int dilation_h,
22 | const int dilation_w,
23 | const int deformable_group)
24 | {
25 | if (input.type().is_cuda())
26 | {
27 | #ifdef WITH_CUDA
28 | return dcn_v2_cuda_forward(input, weight, bias, offset, mask,
29 | kernel_h, kernel_w,
30 | stride_h, stride_w,
31 | pad_h, pad_w,
32 | dilation_h, dilation_w,
33 | deformable_group);
34 | #else
35 | AT_ERROR("Not compiled with GPU support");
36 | #endif
37 | }
38 | AT_ERROR("Not implemented on the CPU");
39 | }
40 |
41 | std::vector
42 | dcn_v2_backward(const at::Tensor &input,
43 | const at::Tensor &weight,
44 | const at::Tensor &bias,
45 | const at::Tensor &offset,
46 | const at::Tensor &mask,
47 | const at::Tensor &grad_output,
48 | int kernel_h, int kernel_w,
49 | int stride_h, int stride_w,
50 | int pad_h, int pad_w,
51 | int dilation_h, int dilation_w,
52 | int deformable_group)
53 | {
54 | if (input.type().is_cuda())
55 | {
56 | #ifdef WITH_CUDA
57 | return dcn_v2_cuda_backward(input,
58 | weight,
59 | bias,
60 | offset,
61 | mask,
62 | grad_output,
63 | kernel_h, kernel_w,
64 | stride_h, stride_w,
65 | pad_h, pad_w,
66 | dilation_h, dilation_w,
67 | deformable_group);
68 | #else
69 | AT_ERROR("Not compiled with GPU support");
70 | #endif
71 | }
72 | AT_ERROR("Not implemented on the CPU");
73 | }
74 |
75 | std::tuple
76 | dcn_v2_psroi_pooling_forward(const at::Tensor &input,
77 | const at::Tensor &bbox,
78 | const at::Tensor &trans,
79 | const int no_trans,
80 | const float spatial_scale,
81 | const int output_dim,
82 | const int group_size,
83 | const int pooled_size,
84 | const int part_size,
85 | const int sample_per_part,
86 | const float trans_std)
87 | {
88 | if (input.type().is_cuda())
89 | {
90 | #ifdef WITH_CUDA
91 | return dcn_v2_psroi_pooling_cuda_forward(input,
92 | bbox,
93 | trans,
94 | no_trans,
95 | spatial_scale,
96 | output_dim,
97 | group_size,
98 | pooled_size,
99 | part_size,
100 | sample_per_part,
101 | trans_std);
102 | #else
103 | AT_ERROR("Not compiled with GPU support");
104 | #endif
105 | }
106 | AT_ERROR("Not implemented on the CPU");
107 | }
108 |
109 | std::tuple
110 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad,
111 | const at::Tensor &input,
112 | const at::Tensor &bbox,
113 | const at::Tensor &trans,
114 | const at::Tensor &top_count,
115 | const int no_trans,
116 | const float spatial_scale,
117 | const int output_dim,
118 | const int group_size,
119 | const int pooled_size,
120 | const int part_size,
121 | const int sample_per_part,
122 | const float trans_std)
123 | {
124 | if (input.type().is_cuda())
125 | {
126 | #ifdef WITH_CUDA
127 | return dcn_v2_psroi_pooling_cuda_backward(out_grad,
128 | input,
129 | bbox,
130 | trans,
131 | top_count,
132 | no_trans,
133 | spatial_scale,
134 | output_dim,
135 | group_size,
136 | pooled_size,
137 | part_size,
138 | sample_per_part,
139 | trans_std);
140 | #else
141 | AT_ERROR("Not compiled with GPU support");
142 | #endif
143 | }
144 | AT_ERROR("Not implemented on the CPU");
145 | }
--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/vision.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include "dcn_v2.h"
3 |
4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
5 | m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward");
6 | m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward");
7 | m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward");
8 | m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward");
9 | }
10 |
--------------------------------------------------------------------------------
/lib/models/backbones/Utitled Document:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/Utitled Document
--------------------------------------------------------------------------------
/lib/models/backbones/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/__init__.py
--------------------------------------------------------------------------------
/lib/models/backbones/darknet.py:
--------------------------------------------------------------------------------
1 | import math
2 | from collections import OrderedDict
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 |
9 | class BasicBlock(nn.Module):
10 | def __init__(self, inplanes, planes):
11 | super(BasicBlock, self).__init__()
12 | self.conv1 = nn.Conv2d(inplanes, planes[0], kernel_size=1,
13 | stride=1, padding=0, bias=False)
14 | self.bn1 = nn.BatchNorm2d(planes[0])
15 | self.relu1 = nn.LeakyReLU(0.1)
16 | self.conv2 = nn.Conv2d(planes[0], planes[1], kernel_size=3,
17 | stride=1, padding=1, bias=False)
18 | self.bn2 = nn.BatchNorm2d(planes[1])
19 | self.relu2 = nn.LeakyReLU(0.1)
20 |
21 | def forward(self, x):
22 | residual = x
23 |
24 | out = self.conv1(x)
25 | out = self.bn1(out)
26 | out = self.relu1(out)
27 |
28 | out = self.conv2(out)
29 | out = self.bn2(out)
30 | out = self.relu2(out)
31 |
32 | out += residual
33 | return out
34 |
35 |
36 | class DarkNet(nn.Module):
37 | def __init__(self, layers):
38 | super(DarkNet, self).__init__()
39 | self.inplanes = 32
40 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
41 | self.bn1 = nn.BatchNorm2d(self.inplanes)
42 | self.relu1 = nn.LeakyReLU(0.1)
43 |
44 | self.layer1 = self._make_layer([32, 64], layers[0])
45 | self.layer2 = self._make_layer([64, 128], layers[1])
46 | self.layer3 = self._make_layer([128, 256], layers[2])
47 | #self.layer4 = self._make_layer([256, 512], layers[3])
48 | #self.layer5 = self._make_layer([512, 1024], layers[4])
49 |
50 | self.layers_out_filters = [64, 128, 256]
51 |
52 | for m in self.modules():
53 | if isinstance(m, nn.BatchNorm2d):
54 | m.weight.data.fill_(1)
55 | m.bias.data.zero_()
56 |
57 | def _make_layer(self, planes, blocks):
58 | layers = []
59 | # downsample
60 | layers.append(("ds_conv", nn.Conv2d(self.inplanes, planes[1], kernel_size=3,
61 | stride=2, padding=1, bias=False)))
62 | layers.append(("ds_bn", nn.BatchNorm2d(planes[1])))
63 | layers.append(("ds_relu", nn.LeakyReLU(0.1)))
64 | # blocks
65 | self.inplanes = planes[1]
66 | for i in range(0, blocks):
67 | layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes)))
68 | return nn.Sequential(OrderedDict(layers))
69 |
70 | def forward(self, x):
71 | x = self.conv1(x)
72 | x = self.bn1(x)
73 | x = self.relu1(x)
74 |
75 | x = self.layer1(x)
76 | x = self.layer2(x)
77 | x = self.layer3(x)
78 | x = F.interpolate(x, size=(128, 128),
79 | mode="bilinear", align_corners=True)
80 |
81 | return x
82 |
83 |
84 | def darknet21(cfg,is_train=True, **kwargs):
85 | model = DarkNet([1, 1, 2, 2, 1])
86 | if is_train and cfg.BACKBONE.INIT_WEIGHTS:
87 | if isinstance(cfg.BACKBONE.PRETRAINED, str):
88 | model.load_state_dict(torch.load(cfg.BACKBONE.PRETRAINED))
89 | else:
90 | raise Exception("darknet request a pretrained path. got [{}]".format(cfg.BACKBONE.PRETRAINED))
91 | return model
92 |
93 | def darknet53(num_layers, cfg):
94 | model = DarkNet([1, 2, 8])
95 | #if is_train and cfg.BACKBONE.INIT_WEIGHTS:
96 | # if isinstance(cfg.BACKBONE.PRETRAINED, str):
97 | # model.load_state_dict(torch.load(cfg.BACKBONE.PRETRAINED))
98 | # else:
99 | # raise Exception("darknet request a pretrained path. got [{}]".format(cfg.BACKBONE.PRETRAINED))
100 | return model
101 |
--------------------------------------------------------------------------------
/lib/models/backbones/efficientdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .efficientdet import EfficientDet
2 |
3 |
4 | def get_efficientdet(num_layers, cfg):
5 | model = EfficientDet(intermediate_channels=cfg.MODEL.INTERMEDIATE_CHANNEL)
6 | return model
7 |
--------------------------------------------------------------------------------
/lib/models/backbones/efficientdet/efficientdet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import math
4 | from .efficientnet import EfficientNet
5 | from .bifpn import BIFPN
6 | from .retinahead import RetinaHead
7 | from torchvision.ops import nms
8 | import torch.nn.functional as F
9 |
10 | MODEL_MAP = {
11 | 'efficientdet-d0': 'efficientnet-b0',
12 | 'efficientdet-d1': 'efficientnet-b1',
13 | 'efficientdet-d2': 'efficientnet-b2',
14 | 'efficientdet-d3': 'efficientnet-b3',
15 | 'efficientdet-d4': 'efficientnet-b4',
16 | 'efficientdet-d5': 'efficientnet-b5',
17 | 'efficientdet-d6': 'efficientnet-b6',
18 | 'efficientdet-d7': 'efficientnet-b6',
19 | }
20 | class EfficientDet(nn.Module):
21 | def __init__(self,
22 | intermediate_channels,
23 | network = 'efficientdet-d0',
24 | D_bifpn=3,
25 | W_bifpn=32,
26 | D_class=3,
27 | scale_ratios = [0.5, 1, 2, 4, 8,16,32],
28 | ):
29 | super(EfficientDet, self).__init__()
30 | self.backbone = EfficientNet.from_pretrained(MODEL_MAP[network])
31 | self.neck = BIFPN(in_channels=self.backbone.get_list_features(),
32 | out_channels=W_bifpn,
33 | stack=D_bifpn,
34 | num_outs=7)
35 | self.bbox_head = RetinaHead(num_classes = intermediate_channels,
36 | in_channels = W_bifpn)
37 |
38 | self.scale_ratios = scale_ratios
39 | for m in self.modules():
40 | if isinstance(m, nn.Conv2d):
41 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
42 | m.weight.data.normal_(0, math.sqrt(2. / n))
43 | elif isinstance(m, nn.BatchNorm2d):
44 | m.weight.data.fill_(1)
45 | m.bias.data.zero_()
46 | self.freeze_bn()
47 |
48 | def forward(self, inputs):
49 | x = self.extract_feat(inputs)
50 | outs = self.bbox_head(x)
51 |
52 | return outs[0][1]
53 |
54 | def freeze_bn(self):
55 | '''Freeze BatchNorm layers.'''
56 | for layer in self.modules():
57 | if isinstance(layer, nn.BatchNorm2d):
58 | layer.eval()
59 | def extract_feat(self, img):
60 | """
61 | Directly extract features from the backbone+neck
62 | """
63 | x = self.backbone(img)
64 | x = self.neck(x)
65 | return x
66 |
67 |
--------------------------------------------------------------------------------
/lib/models/backbones/efficientdet/retinahead.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch.nn as nn
3 |
4 | from .conv_module import ConvModule, bias_init_with_prob, normal_init
5 | from six.moves import map, zip
6 |
7 | def multi_apply(func, *args, **kwargs):
8 | pfunc = partial(func, **kwargs) if kwargs else func
9 | map_results = map(pfunc, *args)
10 | return tuple(map(list, zip(*map_results)))
11 |
12 | class RetinaHead(nn.Module):
13 | """
14 | An anchor-based head used in [1]_.
15 | The head contains two subnetworks. The first classifies anchor boxes and
16 | the second regresses deltas for the anchors.
17 | References:
18 | .. [1] https://arxiv.org/pdf/1708.02002.pdf
19 | Example:
20 | >>> import torch
21 | >>> self = RetinaHead(11, 7)
22 | >>> x = torch.rand(1, 7, 32, 32)
23 | >>> cls_score, bbox_pred = self.forward_single(x)
24 | >>> # Each anchor predicts a score for each class except background
25 | >>> cls_per_anchor = cls_score.shape[1] / self.num_anchors
26 | >>> box_per_anchor = bbox_pred.shape[1] / self.num_anchors
27 | >>> assert cls_per_anchor == (self.num_classes - 1)
28 | >>> assert box_per_anchor == 4
29 | """
30 |
31 | def __init__(self,
32 | num_classes,
33 | in_channels,
34 | feat_channels=64,
35 | stacked_convs=4,
36 | octave_base_scale=4,
37 | scales_per_octave=3,
38 | conv_cfg=None,
39 | norm_cfg=None,
40 | **kwargs):
41 | super(RetinaHead, self).__init__()
42 | self.in_channels = in_channels
43 | self.num_classes = num_classes
44 | self.feat_channels = feat_channels
45 | self.stacked_convs = stacked_convs
46 | self.octave_base_scale = octave_base_scale
47 | self.scales_per_octave = scales_per_octave
48 | self.conv_cfg = conv_cfg
49 | self.norm_cfg = norm_cfg
50 | octave_scales = np.array(
51 | [2**(i / scales_per_octave) for i in range(scales_per_octave)])
52 | self.cls_out_channels = num_classes
53 | self._init_layers()
54 | def _init_layers(self):
55 | self.relu = nn.ReLU(inplace=True)
56 | self.cls_convs = nn.ModuleList()
57 | #self.reg_convs = nn.ModuleList()
58 | for i in range(self.stacked_convs):
59 | chn = self.in_channels if i == 0 else self.feat_channels
60 | self.cls_convs.append(
61 | ConvModule(
62 | chn,
63 | self.feat_channels,
64 | 3,
65 | stride=1,
66 | padding=1,
67 | conv_cfg=self.conv_cfg,
68 | norm_cfg=self.norm_cfg))
69 | self.retina_cls = nn.Conv2d(
70 | self.feat_channels,
71 | self.cls_out_channels,
72 | 3,
73 | padding=1)
74 | #self.output_act = nn.Sigmoid()
75 |
76 | def init_weights(self):
77 | for m in self.cls_convs:
78 | normal_init(m.conv, std=0.01)
79 | for m in self.reg_convs:
80 | normal_init(m.conv, std=0.01)
81 | bias_cls = bias_init_with_prob(0.01)
82 | normal_init(self.retina_cls, std=0.01, bias=bias_cls)
83 | #normal_init(self.retina_reg, std=0.01)
84 |
85 | def forward_single(self, x):
86 | cls_feat = x
87 | #reg_feat = x
88 | for cls_conv in self.cls_convs:
89 | cls_feat = cls_conv(cls_feat)
90 | #for reg_conv in self.reg_convs:
91 | # reg_feat = reg_conv(reg_feat)
92 |
93 | cls_score = self.retina_cls(cls_feat)
94 | # out is B x C x W x H, with C = n_classes + n_anchors
95 | #cls_score = cls_score.permute(0, 2, 3, 1)
96 | #batch_size, width, height, channels = cls_score.shape
97 | #cls_score = cls_score.view(batch_size, width, height, self.num_anchors, self.num_classes)
98 | #cls_score = cls_score.contiguous().view(x.size(0), -1, self.num_classes)
99 |
100 |
101 | #bbox_pred = self.retina_reg(reg_feat)
102 | #bbox_pred = bbox_pred.permute(0, 2, 3, 1)
103 | #bbox_pred = bbox_pred.contiguous().view(bbox_pred.size(0), -1, 4)
104 | return [cls_score]
105 | def forward(self, feats):
106 | return multi_apply(self.forward_single, feats)
107 |
--------------------------------------------------------------------------------
/lib/models/backbones/mobilenet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/mobilenet/__init__.py
--------------------------------------------------------------------------------
/lib/models/backbones/test_mode.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from darknet import darknet53
4 | from hardnet import hardnet
5 |
6 | model = hardnet(19).cuda()
7 | inputs = torch.randn((1,3,512,512)).cuda()
8 |
9 | outs = model(inputs)
10 |
11 | print(outs.shape)
12 |
13 |
14 |
15 | model = darknet53(0,1,2).cuda()
16 |
17 | inputs = torch.randn((1,3,512,512)).cuda()
18 |
19 | outs = model(inputs)
20 |
21 | print(outs.shape)
22 |
--------------------------------------------------------------------------------
/lib/models/heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/heads/__init__.py
--------------------------------------------------------------------------------
/lib/models/heads/keypoint.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import os
4 |
5 | import torch
6 | import torch.nn as nn
7 |
8 |
9 | class KeypointHead(nn.Module):
10 |
11 | def __init__(self, intermediate_channel, head_conv):
12 | super(KeypointHead, self).__init__()
13 |
14 | self.hm = nn.Sequential(
15 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
16 | nn.ReLU(inplace=True),
17 | nn.Conv2d(head_conv, 1, kernel_size=1, stride=1, padding=0))
18 | self.wh = nn.Sequential(
19 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
20 | nn.ReLU(inplace=True),
21 | nn.Conv2d(head_conv, 2, kernel_size=1, stride=1, padding=0))
22 | self.hps = nn.Sequential(
23 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
24 | nn.ReLU(inplace=True),
25 | nn.Conv2d(head_conv, 34, kernel_size=1, stride=1, padding=0))
26 | self.reg = nn.Sequential(
27 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
28 | nn.ReLU(inplace=True),
29 | nn.Conv2d(head_conv, 2, kernel_size=1, stride=1, padding=0))
30 | self.hm_hp = nn.Sequential(
31 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
32 | nn.ReLU(inplace=True),
33 | nn.Conv2d(head_conv, 17, kernel_size=1, stride=1, padding=0))
34 | self.hp_offset = nn.Sequential(
35 | nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
36 | nn.ReLU(inplace=True),
37 | nn.Conv2d(head_conv, 2, kernel_size=1, stride=1, padding=0))
38 | self.init_weights()
39 |
40 | def forward(self, x):
41 |
42 | return [self.hm(x), self.wh(x), self.hps(x), self.reg(x), self.hm_hp(x), self.hp_offset(x)]
43 |
44 | def init_weights(self):
45 | self.hm[-1].bias.data.fill_(-2.19)
46 | self.hm_hp[-1].bias.data.fill_(-2.19)
47 | self.fill_fc_weights(self.wh)
48 | self.fill_fc_weights(self.hps)
49 | self.fill_fc_weights(self.reg)
50 | self.fill_fc_weights(self.hp_offset)
51 |
52 | def fill_fc_weights(self, layers):
53 | for m in layers.modules():
54 | if isinstance(m, nn.Conv2d):
55 | nn.init.normal_(m.weight, std=0.001)
56 | if m.bias is not None:
57 | nn.init.constant_(m.bias, 0)
58 |
--------------------------------------------------------------------------------
/lib/models/heads/mask.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/heads/mask.py
--------------------------------------------------------------------------------
/lib/models/model.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import os
4 |
5 | import torch
6 | import torch.nn as nn
7 | import torchvision.models as models
8 |
9 | from .backbones.darknet import darknet53
10 | from .backbones.dlav0 import get_pose_net as get_dlav0
11 | from .backbones.hardnet import get_hard_net
12 | from .backbones.large_hourglass import get_large_hourglass_net
13 | from .backbones.mobilenet.mobilenetv3 import get_mobile_pose_netv3
14 | from .backbones.mobilenet.mobilenetv2 import get_mobile_pose_netv2
15 | from .backbones.msra_resnet import get_resnet
16 | from .backbones.pose_dla_dcn import get_pose_net as get_dla_dcn
17 | from .backbones.pose_higher_hrnet import get_hrpose_net
18 | from .backbones.resnet_dcn import get_pose_net as get_pose_net_dcn
19 | from .backbones.shufflenetv2_dcn import get_shufflev2_net
20 | from .backbones.ghost_net import get_ghost_net
21 | from .backbones.efficientdet import get_efficientdet
22 | from .heads.keypoint import KeypointHead
23 |
24 | _backbone_factory = {
25 | 'res': get_resnet, # default Resnet with deconv
26 | 'dlav0': get_dlav0, # default DLAup
27 | 'dla': get_dla_dcn,
28 | 'resdcn': get_pose_net_dcn,
29 | 'hourglass': get_large_hourglass_net,
30 | 'mobilenetv3': get_mobile_pose_netv3,
31 | 'mobilenetv2': get_mobile_pose_netv2,
32 | 'shufflenetV2': get_shufflev2_net,
33 | 'hrnet': get_hrpose_net,
34 | 'hardnet': get_hard_net,
35 | 'darknet': darknet53,
36 | 'ghostnet': get_ghost_net,
37 | 'efficientdet':get_efficientdet,
38 | }
39 |
40 | _head_factory = {
41 | 'keypoint': KeypointHead
42 | }
43 |
44 | class BackBoneWithHead(nn.Module):
45 |
46 | def __init__(self, arch, head_conv, cfg):
47 | super(BackBoneWithHead, self).__init__()
48 |
49 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
50 | arch = arch[:arch.find('_')] if '_' in arch else arch
51 | backbone = _backbone_factory[arch]
52 | self.backbone_model = backbone(num_layers=num_layers, cfg = cfg)
53 |
54 | head = _head_factory[cfg.MODEL.HEADS_NAME]
55 | self.head_model = head(cfg.MODEL.INTERMEDIATE_CHANNEL, cfg.MODEL.HEAD_CONV)
56 |
57 | def forward(self, x):
58 | x = self.backbone_model(x)
59 | return self.head_model(x)
60 |
61 |
62 |
63 | def create_model(arch, head_conv, cfg):
64 |
65 | return BackBoneWithHead(arch, head_conv, cfg)
66 |
67 | def load_model(model, model_path, optimizer=None, resume=False,
68 | lr=None, lr_step=None):
69 | start_epoch = 0
70 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
71 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
72 | state_dict_ = checkpoint['state_dict']
73 | state_dict = {}
74 |
75 | # convert data_parallal to model
76 | for k in state_dict_:
77 | if k.startswith('module') and not k.startswith('module_list'):
78 | state_dict[k[7:]] = state_dict_[k]
79 | else:
80 | state_dict[k] = state_dict_[k]
81 | model_state_dict = model.state_dict()
82 |
83 | # check loaded parameters and created model parameters
84 | msg = 'If you see this, your model does not fully load the ' + \
85 | 'pre-trained weight. Please make sure ' + \
86 | 'you have correctly specified --arch xxx ' + \
87 | 'or set the correct --num_classes for your own dataset.'
88 | for k in state_dict:
89 | if k in model_state_dict:
90 | if state_dict[k].shape != model_state_dict[k].shape:
91 | print('Skip loading parameter {}, required shape{}, '\
92 | 'loaded shape{}. {}'.format(
93 | k, model_state_dict[k].shape, state_dict[k].shape, msg))
94 | state_dict[k] = model_state_dict[k]
95 | else:
96 | print('Drop parameter {}.'.format(k) + msg)
97 | for k in model_state_dict:
98 | if not (k in state_dict):
99 | print('No param {}.'.format(k) + msg)
100 | state_dict[k] = model_state_dict[k]
101 | model.load_state_dict(state_dict, strict=False)
102 |
103 | # resume optimizer parameters
104 | if optimizer is not None and resume:
105 | if 'optimizer' in checkpoint:
106 | optimizer.load_state_dict(checkpoint['optimizer'])
107 | start_epoch = checkpoint['epoch']
108 | start_lr = lr
109 | for step in lr_step:
110 | if start_epoch >= step:
111 | start_lr *= 0.1
112 | for param_group in optimizer.param_groups:
113 | param_group['lr'] = start_lr
114 | print('Resumed optimizer with start lr', start_lr)
115 | else:
116 | print('No optimizer parameters in checkpoint.')
117 | if optimizer is not None:
118 | return model, optimizer, start_epoch
119 | else:
120 | return model
121 |
122 | def save_model(path, epoch, model, optimizer=None):
123 | if isinstance(model, torch.nn.DataParallel):
124 | state_dict = model.module.state_dict()
125 | else:
126 | state_dict = model.state_dict()
127 | data = {'epoch': epoch,
128 | 'state_dict': state_dict}
129 | if not (optimizer is None):
130 | data['optimizer'] = optimizer.state_dict()
131 | torch.save(data, path)
132 |
--------------------------------------------------------------------------------
/lib/models/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 |
7 | def _sigmoid(x):
8 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
9 | return y
10 |
11 | def _gather_feat(feat, ind, mask=None):
12 | dim = feat.size(2)
13 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
14 | feat = feat.gather(1, ind)
15 | if mask is not None:
16 | mask = mask.unsqueeze(2).expand_as(feat)
17 | feat = feat[mask]
18 | feat = feat.view(-1, dim)
19 | return feat
20 |
21 | def _transpose_and_gather_feat(feat, ind):
22 | feat = feat.permute(0, 2, 3, 1).contiguous()
23 | feat = feat.view(feat.size(0), -1, feat.size(3))
24 | feat = _gather_feat(feat, ind)
25 | return feat
26 |
27 | def flip_tensor(x):
28 | return torch.flip(x, [3])
29 |
30 | def flip_lr(x, flip_idx):
31 | tmp = x.detach().cpu().numpy()[..., ::-1].copy()
32 | shape = tmp.shape
33 | for e in flip_idx:
34 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \
35 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
36 | return torch.from_numpy(tmp.reshape(shape)).to(x.device)
37 |
38 | def flip_lr_off(x, flip_idx):
39 | tmp = x.detach().cpu().numpy()[..., ::-1].copy()
40 | shape = tmp.shape
41 | tmp = tmp.reshape(tmp.shape[0], 17, 2,
42 | tmp.shape[2], tmp.shape[3])
43 | tmp[:, :, 0, :, :] *= -1
44 | for e in flip_idx:
45 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \
46 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
47 | return torch.from_numpy(tmp.reshape(shape)).to(x.device)
48 |
--------------------------------------------------------------------------------
/lib/trains/base_trainer.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import time
4 |
5 | import torch
6 | import torch.nn as nn
7 | from progress.bar import Bar
8 |
9 | from utils.utils import AverageMeter
10 |
11 |
12 | class BaseTrainer(object):
13 | def __init__(
14 | self, cfg, local_rank, model, optimizer=None):
15 | self.cfg = cfg
16 | self.optimizer = optimizer
17 | self.loss_stats, self.loss = self._get_losses(cfg, local_rank)
18 | self.model = model
19 | self.local_rank = local_rank
20 |
21 | def set_device(self, gpus, chunk_sizes, device):
22 |
23 | if self.cfg.TRAIN.DISTRIBUTE:
24 | self.model = self.model.to(device)
25 | self.model = nn.parallel.DistributedDataParallel(self.model, find_unused_parameters=True,
26 | device_ids=[self.local_rank, ],
27 | output_device=self.local_rank)
28 | else:
29 | self.model = nn.DataParallel(self.model).to(device)
30 | self.loss.to(device)
31 | for state in self.optimizer.state.values():
32 | for k, v in state.items():
33 | if isinstance(v, torch.Tensor):
34 | state[k] = v.to(device=device, non_blocking=True)
35 |
36 | def run_epoch(self, phase, epoch, data_loader):
37 |
38 | model = self.model
39 | if phase == 'train':
40 | self.model.train()
41 | else:
42 | if len(self.cfg.GPUS) > 1:
43 | model = model.module
44 | model.eval()
45 | torch.cuda.empty_cache()
46 |
47 | cfg = self.cfg
48 | results = {}
49 | data_time, batch_time = AverageMeter(), AverageMeter()
50 | avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
51 | num_iters = len(data_loader)
52 | bar = Bar('{}/{}'.format(cfg.TASK, cfg.EXP_ID), max=num_iters)
53 | end = time.time()
54 | for iter_id, batch in enumerate(data_loader):
55 | if iter_id >= num_iters:
56 | break
57 | data_time.update(time.time() - end)
58 |
59 | for k in batch:
60 | if k != 'meta':
61 | batch[k] = batch[k].to(device=torch.device('cuda:%d'%self.local_rank), non_blocking=True)
62 |
63 | outputs = model(batch['input'])
64 | loss, loss_stats = self.loss(outputs, batch)
65 |
66 | loss = loss.mean()
67 | if phase == 'train':
68 | self.optimizer.zero_grad()
69 | loss.backward()
70 | self.optimizer.step()
71 | batch_time.update(time.time() - end)
72 | end = time.time()
73 |
74 | Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
75 | epoch, iter_id, num_iters, phase=phase,
76 | total=bar.elapsed_td, eta=bar.eta_td)
77 | for l in avg_loss_stats:
78 | avg_loss_stats[l].update(
79 | loss_stats[l].mean().item(), batch['input'].size(0))
80 | Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg)
81 | if not cfg.TRAIN.HIDE_DATA_TIME:
82 | Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
83 | '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
84 | if cfg.PRINT_FREQ > 0:
85 | if iter_id % cfg.PRINT_FREQ == 0:
86 | print('{}/{}| {}'.format(cfg.TASK, cfg.EXP_ID, Bar.suffix))
87 | else:
88 | bar.next()
89 |
90 | if cfg.DEBUG > 0:
91 | self.debug(batch, outputs, iter_id)
92 |
93 | if phase == 'val':
94 | self.save_result(outputs, batch, results)
95 | del outputs, loss, loss_stats
96 |
97 | bar.finish()
98 | ret = {k: v.avg for k, v in avg_loss_stats.items()}
99 | ret['time'] = bar.elapsed_td.total_seconds() / 60.
100 |
101 | return ret, results
102 |
103 | def debug(self, batch, output, iter_id):
104 | raise NotImplementedError
105 |
106 | def save_result(self, output, batch, results):
107 | raise NotImplementedError
108 |
109 | def _get_losses(self, cfg):
110 | raise NotImplementedError
111 |
112 | def val(self, epoch, data_loader):
113 | return self.run_epoch('val', epoch, data_loader)
114 |
115 | def train(self, epoch, data_loader):
116 | return self.run_epoch('train', epoch, data_loader)
117 |
--------------------------------------------------------------------------------
/lib/trains/train_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | from .multi_pose import MultiPoseTrainer
4 |
5 |
6 | train_factory = {
7 | 'multi_pose': MultiPoseTrainer,
8 | }
9 |
--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/utils/__init__.py
--------------------------------------------------------------------------------
/lib/utils/oracle_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import numba
4 | import numpy as np
5 |
6 |
7 | @numba.jit(nopython=True, nogil=True)
8 | def gen_oracle_map(feat, ind, w, h):
9 | # feat: B x maxN x featDim
10 | # ind: B x maxN
11 | batch_size = feat.shape[0]
12 | max_objs = feat.shape[1]
13 | feat_dim = feat.shape[2]
14 | out = np.zeros((batch_size, feat_dim, h, w), dtype=np.float32)
15 | vis = np.zeros((batch_size, h, w), dtype=np.uint8)
16 | ds = [(0, 1), (0, -1), (1, 0), (-1, 0)]
17 | for i in range(batch_size):
18 | queue_ind = np.zeros((h*w*2, 2), dtype=np.int32)
19 | queue_feat = np.zeros((h*w*2, feat_dim), dtype=np.float32)
20 | head, tail = 0, 0
21 | for j in range(max_objs):
22 | if ind[i][j] > 0:
23 | x, y = ind[i][j] % w, ind[i][j] // w
24 | out[i, :, y, x] = feat[i][j]
25 | vis[i, y, x] = 1
26 | queue_ind[tail] = x, y
27 | queue_feat[tail] = feat[i][j]
28 | tail += 1
29 | while tail - head > 0:
30 | x, y = queue_ind[head]
31 | f = queue_feat[head]
32 | head += 1
33 | for (dx, dy) in ds:
34 | xx, yy = x + dx, y + dy
35 | if xx >= 0 and yy >= 0 and xx < w and yy < h and vis[i, yy, xx] < 1:
36 | out[i, :, yy, xx] = f
37 | vis[i, yy, xx] = 1
38 | queue_ind[tail] = xx, yy
39 | queue_feat[tail] = f
40 | tail += 1
41 | return out
42 |
--------------------------------------------------------------------------------
/lib/utils/post_process.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import numpy as np
4 |
5 | from .image import transform_preds
6 |
7 |
8 | def multi_pose_post_process(dets, c, s, h, w):
9 | # dets: batch x max_dets x 40
10 | # return list of 39 in image coord
11 | ret = []
12 | for i in range(dets.shape[0]):
13 | bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h))
14 | pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h))
15 | top_preds = np.concatenate(
16 | [bbox.reshape(-1, 4), dets[i, :, 4:5],
17 | pts.reshape(-1, 34), dets[i, :, 39:56]], axis=1).astype(np.float32).tolist()
18 | ret.append({np.ones(1, dtype=np.int32)[0]: top_preds})
19 | return ret
20 |
21 |
--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import torch
4 |
5 |
6 | class AverageMeter(object):
7 | """Computes and stores the average and current value"""
8 | def __init__(self):
9 | self.reset()
10 |
11 | def reset(self):
12 | self.val = 0
13 | self.avg = 0
14 | self.sum = 0
15 | self.count = 0
16 |
17 | def update(self, val, n=1):
18 | self.val = val
19 | self.sum += val * n
20 | self.count += n
21 | if self.count > 0:
22 | self.avg = self.sum / self.count
23 |
--------------------------------------------------------------------------------
/readme/DATA.md:
--------------------------------------------------------------------------------
1 | # Dataset preparation
2 |
3 | If you want to reproduce the results in the paper for benchmark evaluation and training, you will need to setup dataset.
4 |
5 |
6 | ### COCO
7 | - Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download).
8 | - Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download).
9 | - Place the data (or create symlinks) to make the data folder like:
10 |
11 | ~~~
12 | ${CenterNet_ROOT}
13 | |-- data
14 | `-- |-- coco
15 | `-- |-- annotations
16 | | |-- instances_train2017.json
17 | | |-- instances_val2017.json
18 | | |-- person_keypoints_train2017.json
19 | | |-- person_keypoints_val2017.json
20 | | |-- image_info_test-dev2017.json
21 | |---|-- train2017
22 | |---|-- val2017
23 | `---|-- test2017
24 | ~~~
25 |
--------------------------------------------------------------------------------
/readme/DEVELOP.md:
--------------------------------------------------------------------------------
1 | # Develop
2 |
3 | This document provides tutorials to develop CenterNet. `lib/src/opts` lists a few more options that the current version supports.
4 |
5 | ## New dataset
6 | Basically there are three steps:
7 |
8 | - Convert the dataset annotation to [COCO format](http://cocodataset.org/#format-data). Please refer to [src/tools/convert_kitti_to_coco.py](../src/tools/convert_kitti_to_coco.py) for an example to convert kitti format to coco format.
9 | - Create a dataset intilization file in `src/lib/datasets/dataset`. In most cases you can just copy `src/lib/datasets/dataset/coco.py` to your dataset name and change the category information, and annotation path.
10 | - Import your dataset at `src/lib/datasets/dataset_factory`.
11 |
12 | ## New task
13 |
14 | You will need to add files to `src/lib/datasets/sample/`, `src/lib/datasets/trains/`, and `src/lib/datasets/detectors/`, which specify the data generation during training, the training targets, and the testing, respectively.
15 |
16 | ## New architecture
17 |
18 | - Add your model file to `src/lib/models/networks/`. The model should accept a dict `heads` of `{name: channels}`, which specify the name of each network output and its number of channals. Make sure your model returns a list (for multiple stages. Single stage model should return a list containing a single element.). The element of the list is a dict contraining the same keys with `heads`.
19 | - Add your model in `model_factory` of `src/lib/models/model.py`.
--------------------------------------------------------------------------------
/readme/GETTING_STARTED.md:
--------------------------------------------------------------------------------
1 | # Getting Started
2 |
3 | This document provides tutorials to train and evaluate CenterNet. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md).
4 |
5 | ## Benchmark evaluation
6 |
7 | First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`.
8 |
9 | ### COCO
10 |
11 | To evaluate COCO object detection with DLA
12 | run
13 |
14 | ~~~
15 | python test.py ctdet --exp_id coco_dla --keep_res --load_model ../models/ctdet_coco_dla_2x.pth
16 | ~~~
17 |
18 | This will give an AP of `37.4` if setup correctly. `--keep_res` is for keep the original image resolution. Without `--keep_res` it will resize the images to `512 x 512`. You can add `--flip_test` and `--flip_test --test_scales 0.5,0.75,1,1.25,1.5` to the above commend, for flip test and multi_scale test, respectively. The expected APs are `39.2` and `41.7`, respectively.
19 |
20 | To test with hourglass net, run
21 |
22 | ~~~
23 | python test.py ctdet --exp_id coco_hg --arch hourglass --fix_res --load_model ../models/ctdet_coco_hg.pth
24 | ~~~
25 |
26 | Similarly, to evaluate human pose estimation, run the following command for dla
27 |
28 | ~~~
29 | python test.py multi_pose --exp_id dla --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
30 | ~~~
31 |
32 | and the following for hourglass
33 |
34 | ~~~
35 | python test.py multi_pose --exp_id hg --arch hourglass --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
36 | ~~~
37 |
38 | The expected results can be found in the model zoo.
39 |
40 | ### Pascal
41 |
42 | To evaluate object detection on Pascal VOC (test2007), run
43 |
44 | ~~~
45 | python test.py ctdet --exp_id dla --dataset pascal --load_model ../models/ctdet_pascal_dla.pth --flip_test
46 | ~~~
47 |
48 | Note that we fix the resolution during testing.
49 | And you can change to other network architectures and resolutions by specifying `--arch` and `--input_res 512`.
50 |
51 | ### KITTI
52 |
53 | To evaluate the kitti dataset, first compile the evaluation tool (from [here](https://github.com/prclibo/kitti_eval)):
54 |
55 | ~~~
56 | cd CenterNet_ROOT/src/tools/kitti_eval
57 | g++ -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp -O3
58 | ~~~
59 |
60 | Then run the evaluation with pretrained model:
61 |
62 | ~~~
63 | python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --load_model ../models/ddd_3dop.pth
64 | ~~~
65 |
66 | to evaluate the 3DOP split. For the subcnn split, change `--kitti_split` to `subcnn` and load the corresponding models.
67 | Note that test time augmentation is not trivially applicable for 3D orientation.
68 |
69 | ## Training
70 |
71 | We have packed all the training scripts in the [experiments](../experiments) folder.
72 | The experiment names are correspond to the model name in the [model zoo](MODEL_ZOO.md).
73 | The number of GPUs for each experiments can be found in the scripts and the model zoo.
74 | In the case that you don't have 8 GPUs, you can follow the [linear learning rate rule](https://arxiv.org/abs/1706.02677) to scale the learning rate as batch size.
75 | For example, to train COCO object detection with dla on 2 GPUs, run
76 |
77 | ~~~
78 | python main.py ctdet --exp_id coco_dla --batch_size 32 --master_batch 15 --lr 1.25e-4 --gpus 0,1
79 | ~~~
80 |
81 | The default learning rate is `1.25e-4` for batch size `32` (on 2 GPUs).
82 | By default, pytorch evenly splits the total batch size to each GPUs.
83 | `--master_batch` allows using different batchsize for the master GPU, which usually costs more memory than other GPUs.
84 | If it encounters GPU memory out, using slightly less batch size (e.g., `112` of `128`) with the same learning is fine.
85 |
86 | If the training is terminated before finishing, you can use the same commond with `--resume` to resume training. It will found the lastest model with the same `exp_id`.
87 |
88 | Our HourglassNet model is finetuned from the pretrained [ExtremeNet model](https://drive.google.com/file/d/1omiOUjWCrFbTJREypuZaODu0bOlF_7Fg/view?usp=sharing) (from the [ExtremeNet repo](https://github.com/xingyizhou/ExtremeNet)).
89 | You will need to download the model, run `python convert_hourglass_weight.py` to convert the model format, and load the model for training (see the [script](../experiments/ctdet_coco_hg.sh)).
90 |
--------------------------------------------------------------------------------
/readme/INSTALL.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 |
4 | The code was tested on Ubuntu 18.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v1.1. NVIDIA GPUs are needed for both training and testing.
5 | After install Anaconda:
6 |
7 | 0. [Optional but recommended] create a new conda environment.
8 |
9 | ~~~
10 | conda create --name CenterNet python=3.6
11 | ~~~
12 | And activate the environment.
13 |
14 | ~~~
15 | conda activate CenterNet
16 | ~~~
17 |
18 | 1. Install pytorch1.1:
19 |
20 | ~~~
21 | pip install torch==1.1
22 | ~~~
23 |
24 | 2. Install [COCOAPI](https://github.com/cocodataset/cocoapi):
25 |
26 | ~~~
27 | # COCOAPI=/path/to/clone/cocoapi
28 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
29 | cd $COCOAPI/PythonAPI
30 | make
31 | python setup.py install --user
32 | ~~~
33 |
34 | 3. Clone this repo:
35 |
36 | ~~~
37 | CenterNet_ROOT=/path/to/clone/CenterNet
38 | git clone https://github.com/tensorboy/centerpose $CenterNet_ROOT
39 | ~~~
40 |
41 |
42 | 4. Install the requirements
43 |
44 | ~~~
45 | pip install -r requirements.txt
46 | ~~~
47 |
48 |
49 | 5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_0.4)).
50 |
51 | ~~~
52 | cd $CenterNet_ROOT/src/lib/models/networks/DCNv2
53 | ./make.sh
54 | ~~~
55 | 6. [Optional, only required if you are using extremenet or multi-scale testing] Compile NMS if your want to use multi-scale testing or test ExtremeNet.
56 |
57 | ~~~
58 | cd $CenterNet_ROOT/src/lib/external
59 | make
60 | ~~~
61 |
--------------------------------------------------------------------------------
/readme/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/demo.gif
--------------------------------------------------------------------------------
/readme/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/fig2.png
--------------------------------------------------------------------------------
/readme/multi_pose_screenshot_27.11.2019.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/multi_pose_screenshot_27.11.2019.png
--------------------------------------------------------------------------------
/readme/performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/performance.png
--------------------------------------------------------------------------------
/readme/plot_speed_accuracy.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import pandas as pd
4 | import plotly.express as px
5 | import plotly.graph_objects as go
6 |
7 | sizeref = 2000
8 |
9 | # Dictionary with dataframes for each continent
10 | continent_names = ['DLA', 'Resnet', 'MobileNet', 'ShuffleNet', 'HigherResolution', 'HardNet']
11 | continent_data = {}
12 |
13 | continent_data['DLA-34'] = {'map':[62.3], 'speed':[23], 'size':82.7/1.5}
14 | continent_data['Resnet50'] = {'map':[54.5], 'speed':[28], 'size':139.8/1.5}
15 | continent_data['MobileNetV3'] = {'map':[46.0], 'speed':[30], 'size':9.7/1.5}
16 | continent_data['ShuffleNetV2'] = {'map':[43.9], 'speed':[25], 'size':40./1.5}
17 | continent_data['HigherResolution'] = {'map':[63.8], 'speed':[16], 'size':115.2/1.5}
18 | continent_data['HardNet'] = {'map':[46.0], 'speed':[30], 'size':19.3/1.5}
19 | continent_data['Darknet53'] = {'map':[38.2], 'speed':[30], 'size':27.1/1.5}
20 |
21 | # Create figure
22 | fig = go.Figure()
23 |
24 | for continent_name, continent in continent_data.items():
25 | fig.add_trace(go.Scatter(
26 | x=continent['speed'], y=continent['map'],
27 | name=continent_name, text='model performance',
28 | marker_size=continent['size'],
29 | ))
30 | # Tune marker appearance and layout
31 | fig.update_traces(mode='markers', marker=dict(sizemode='area',
32 | sizeref=sizeref, line_width=2))
33 |
34 | fig.update_layout(
35 | title='mAP v.s. FPS',
36 | xaxis=dict(
37 | title='FPS (frames per second)',
38 | gridcolor='white',
39 | type='log',
40 | gridwidth=2,
41 | ),
42 | yaxis=dict(
43 | title='Mean Average Precision (mAP)',
44 | gridcolor='white',
45 | gridwidth=2,
46 | ),
47 | paper_bgcolor='rgb(243, 243, 243)',
48 | plot_bgcolor='rgb(243, 243, 243)',
49 | )
50 | fig.show()
51 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | Cython
3 | numba
4 | progress
5 | matplotlib
6 | easydict
7 | scipy
8 | pycocotools
9 | yacs
10 | pthflops
11 |
--------------------------------------------------------------------------------
/samples/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import sys
3 |
4 |
5 | def add_path(path):
6 | if path not in sys.path:
7 | sys.path.insert(0, path)
8 |
9 | this_dir = osp.dirname(__file__)
10 |
11 | # Add lib to PYTHONPATH
12 | lib_path = osp.join(this_dir, '..', 'lib')
13 | add_path(lib_path)
14 |
--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import sys
3 |
4 |
5 | def add_path(path):
6 | if path not in sys.path:
7 | sys.path.insert(0, path)
8 |
9 | this_dir = osp.dirname(__file__)
10 |
11 | # Add lib to PYTHONPATH
12 | lib_path = osp.join(this_dir, '..', 'lib')
13 | add_path(lib_path)
14 |
--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import argparse
4 | import os
5 |
6 | import cv2
7 |
8 | import _init_paths
9 | from config import cfg, update_config
10 | from detectors.detector_factory import detector_factory
11 |
12 |
13 | def parse_args():
14 | parser = argparse.ArgumentParser(description='Train keypoints network')
15 | # general
16 | parser.add_argument('--cfg',
17 | help='experiment configure file name',
18 | required=True,
19 | type=str)
20 | parser.add_argument('--TESTMODEL',
21 | help='model directory',
22 | type=str,
23 | default='')
24 | parser.add_argument('--DEMOFILE',
25 | help='source images or video',
26 | type=str,
27 | default='')
28 | parser.add_argument('--DEBUG', type=int, default=0,
29 | help='level of visualization.'
30 | '1: only show the final detection results'
31 | '2: show the network output features'
32 | '3: use matplot to display' # useful when lunching training with ipython notebook
33 | '4: save all visualizations to disk')
34 | parser.add_argument('--NMS',
35 | help='whether to do NMS',
36 | type=bool,
37 | default=True)
38 | args = parser.parse_args()
39 |
40 | return args
41 |
42 |
43 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
44 | video_ext = ['mp4', 'mov', 'avi', 'mkv']
45 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
46 |
47 | def demo(cfg):
48 | Detector = detector_factory[cfg.TEST.TASK]
49 | detector = Detector(cfg)
50 |
51 | if cfg.TEST.DEMO_FILE == 'webcam' or \
52 | cfg.TEST.DEMO_FILE[cfg.TEST.DEMO_FILE.rfind('.') + 1:].lower() in video_ext:
53 | cam = cv2.VideoCapture(0 if cfg.TEST.DEMO_FILE == 'webcam' else cfg.TEST.DEMO_FILE)
54 | detector.pause = False
55 | while True:
56 | _, img = cam.read()
57 | cv2.imshow('input', img)
58 | ret = detector.run(img)
59 | time_str = ''
60 | for stat in time_stats:
61 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
62 | print(time_str)
63 | if cv2.waitKey(1) == 27:
64 | return # esc to quit
65 | else:
66 | if os.path.isdir(cfg.TEST.DEMO_FILE):
67 | image_names = []
68 | ls = os.listdir(cfg.TEST.DEMO_FILE)
69 | for file_name in sorted(ls):
70 | ext = file_name[file_name.rfind('.') + 1:].lower()
71 | if ext in image_ext:
72 | image_names.append(os.path.join(cfg.TEST.DEMO_FILE, file_name))
73 | else:
74 | image_names = [cfg.TEST.DEMO_FILE]
75 |
76 | for (image_name) in image_names:
77 | ret = detector.run(image_name)
78 |
79 | time_str = ''
80 | for stat in time_stats:
81 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
82 | print(time_str)
83 | if __name__ == '__main__':
84 | args = parse_args()
85 | update_config(cfg, args.cfg)
86 | cfg.defrost()
87 | cfg.TEST.MODEL_PATH = args.TESTMODEL
88 | cfg.TEST.DEMO_FILE = args.DEMOFILE
89 | cfg.TEST.NMS = args.NMS
90 | cfg.DEBUG = args.DEBUG
91 | cfg.freeze()
92 | demo(cfg)
93 |
--------------------------------------------------------------------------------
/tools/evaluate.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 |
3 | import argparse
4 | import json
5 | import os
6 | import time
7 |
8 | import cv2
9 | import numpy as np
10 | import torch
11 | from progress.bar import Bar
12 |
13 | import _init_paths
14 | from config import cfg, update_config
15 | from datasets.dataset_factory import dataset_factory
16 | from detectors.detector_factory import detector_factory
17 | from external.nms import soft_nms
18 | from logger import Logger
19 | from utils.utils import AverageMeter
20 |
21 |
22 | def parse_args():
23 | parser = argparse.ArgumentParser(description='Train keypoints network')
24 | # general
25 | parser.add_argument('--cfg',
26 | help='experiment configure file name',
27 | required=True,
28 | type=str)
29 | parser.add_argument('--NMS',
30 | help='whether to do NMS',
31 | type=bool,
32 | default=True)
33 | parser.add_argument('--TESTMODEL',
34 | help='model directory',
35 | type=str,
36 | default='')
37 | parser.add_argument('--DEBUG', type=int, default=0,
38 | help='level of visualization.'
39 | '1: only show the final detection results'
40 | '2: show the network output features'
41 | '3: use matplot to display' # useful when lunching training with ipython notebook
42 | '4: save all visualizations to disk')
43 | args = parser.parse_args()
44 |
45 | return args
46 |
47 |
48 | def test(cfg):
49 |
50 | Dataset = dataset_factory[cfg.SAMPLE_METHOD]
51 | Logger(cfg)
52 | Detector = detector_factory[cfg.TEST.TASK]
53 |
54 | dataset = Dataset(cfg, 'val')
55 | detector = Detector(cfg)
56 |
57 | results = {}
58 | num_iters = len(dataset)
59 | bar = Bar('{}'.format(cfg.EXP_ID), max=num_iters)
60 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
61 | avg_time_stats = {t: AverageMeter() for t in time_stats}
62 | for ind in range(num_iters):
63 | img_id = dataset.images[ind]
64 | img_info = dataset.coco.loadImgs(ids=[img_id])[0]
65 | img_path = os.path.join(dataset.img_dir, img_info['file_name'])
66 | #img_path = '/home/tensorboy/data/coco/images/val2017/000000004134.jpg'
67 | ret = detector.run(img_path)
68 |
69 | results[img_id] = ret['results']
70 |
71 | Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
72 | ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
73 | for t in avg_time_stats:
74 | avg_time_stats[t].update(ret[t])
75 | Bar.suffix = Bar.suffix + '|{} {:.3f} '.format(t, avg_time_stats[t].avg)
76 | bar.next()
77 | bar.finish()
78 | dataset.run_eval(results, cfg.OUTPUT_DIR)
79 |
80 | if __name__ == '__main__':
81 | args = parse_args()
82 | update_config(cfg, args.cfg)
83 | cfg.defrost()
84 | cfg.DEBUG = args.DEBUG
85 | cfg.TEST.MODEL_PATH = args.TESTMODEL
86 | cfg.TEST.NMS = args.NMS
87 | cfg.freeze()
88 | test(cfg)
89 |
--------------------------------------------------------------------------------