├── .gitignore
├── LICENSE
├── NOTICE
├── README.md
├── demo
    ├── _init_paths.py
    ├── centernet_tensorrt_engine.py
    ├── convert2onnx.py
    ├── demo_main.py
    ├── face
    │   ├── __init__.py
    │   ├── centerface.py
    │   ├── demo.py
    │   ├── prnet.py
    │   ├── reid
    │   │   ├── __init__.py
    │   │   ├── reid_manager.py
    │   │   └── reid_table
    │   │   │   ├── __init__.py
    │   │   │   ├── base_idbase.py
    │   │   │   ├── head_pose_base.py
    │   │   │   └── reid_utils.py
    │   ├── resfcn256.py
    │   └── utils
    │   │   ├── BFM_UV.mat
    │   │   ├── cv_plot.py
    │   │   ├── estimate_pose.py
    │   │   ├── generate_posmap_300WLP.py
    │   │   ├── losses.py
    │   │   ├── render.py
    │   │   ├── render_app.py
    │   │   ├── rotate_vertices.py
    │   │   ├── utils.py
    │   │   └── uv_data
    │   │       ├── canonical_vertices.npy
    │   │       ├── face_ind.txt
    │   │       ├── triangles.txt
    │   │       ├── uv_kpt_ind.txt
    │   │       └── uv_weight_mask_gdh.png
    ├── result.png
    ├── tensorrt_model.py
    └── tracking
    │   ├── __init__.py
    │   ├── deep_sort.py
    │   ├── feature_extractor.py
    │   ├── model.py
    │   ├── sort
    │       ├── __init__.py
    │       ├── detection.py
    │       ├── iou_matching.py
    │       ├── kalman_filter.py
    │       ├── linear_assignment.py
    │       ├── nn_matching.py
    │       ├── preprocessing.py
    │       ├── track.py
    │       └── tracker.py
    │   └── util.py
├── experiments
    ├── darknet53_512x512.yaml
    ├── dla_34_512x512.yaml
    ├── efficientdet_512x512.yaml
    ├── ghost_net.yaml
    ├── hardnet_512x512.yaml
    ├── hrnet_w32_512.yaml
    ├── hrnet_w48_512.yaml
    ├── mobilenetv2_512x512.yaml
    ├── mobilenetv3_512x512.yaml
    ├── res_50_512x512.yaml
    └── shufflenetV2_512x512.yaml
├── images
    └── image1.jpeg
├── lib
    ├── config
    │   ├── __init__.py
    │   └── default.py
    ├── datasets
    │   ├── coco_hp.py
    │   ├── data.py
    │   ├── dataset_factory.py
    │   └── multi_pose.py
    ├── detectors
    │   ├── base_detector.py
    │   ├── detector_factory.py
    │   └── multi_pose.py
    ├── external
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── __init__.py
    │   ├── build
    │   │   └── temp.linux-x86_64-3.6
    │   │   │   └── nms.o
    │   ├── make.sh
    │   ├── nms.pyx
    │   └── setup.py
    ├── logger.py
    ├── models
    │   ├── backbones
    │   │   ├── DCNv2
    │   │   │   ├── LICENSE
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── dcn_v2.py
    │   │   │   ├── make.sh
    │   │   │   ├── setup.py
    │   │   │   ├── src
    │   │   │   │   ├── cpu
    │   │   │   │   │   ├── dcn_v2_cpu.cpp
    │   │   │   │   │   └── vision.h
    │   │   │   │   ├── cuda
    │   │   │   │   │   ├── dcn_v2_cuda.cu
    │   │   │   │   │   ├── dcn_v2_im2col_cuda.cu
    │   │   │   │   │   ├── dcn_v2_im2col_cuda.h
    │   │   │   │   │   ├── dcn_v2_psroi_pooling_cuda.cu
    │   │   │   │   │   └── vision.h
    │   │   │   │   ├── dcn_v2.h
    │   │   │   │   └── vision.cpp
    │   │   │   └── test.py
    │   │   ├── Utitled Document
    │   │   ├── __init__.py
    │   │   ├── darknet.py
    │   │   ├── dlav0.py
    │   │   ├── efficientdet
    │   │   │   ├── __init__.py
    │   │   │   ├── bifpn.py
    │   │   │   ├── conv_module.py
    │   │   │   ├── efficientdet.py
    │   │   │   ├── efficientnet.py
    │   │   │   ├── module.py
    │   │   │   ├── retinahead.py
    │   │   │   └── utils.py
    │   │   ├── ghost_net.py
    │   │   ├── hardnet.py
    │   │   ├── large_hourglass.py
    │   │   ├── mobilenet
    │   │   │   ├── __init__.py
    │   │   │   ├── mobilenetv2.py
    │   │   │   └── mobilenetv3.py
    │   │   ├── msra_resnet.py
    │   │   ├── pose_dla_dcn.py
    │   │   ├── pose_higher_hrnet.py
    │   │   ├── resnet_dcn.py
    │   │   ├── shufflenetv2_dcn.py
    │   │   └── test_mode.py
    │   ├── decode.py
    │   ├── heads
    │   │   ├── __init__.py
    │   │   ├── keypoint.py
    │   │   └── mask.py
    │   ├── losses.py
    │   ├── model.py
    │   └── utils.py
    ├── trains
    │   ├── base_trainer.py
    │   ├── multi_pose.py
    │   └── train_factory.py
    └── utils
    │   ├── __init__.py
    │   ├── debugger.py
    │   ├── image.py
    │   ├── oracle_utils.py
    │   ├── post_process.py
    │   └── utils.py
├── readme
    ├── DATA.md
    ├── DEVELOP.md
    ├── GETTING_STARTED.md
    ├── INSTALL.md
    ├── demo.gif
    ├── fig2.png
    ├── multi_pose_screenshot_27.11.2019.png
    ├── performance.png
    └── plot_speed_accuracy.py
├── requirements.txt
├── samples
    ├── _init_paths.py
    ├── data_inspect_utils.py
    └── inspect_data.ipynb
└── tools
    ├── _init_paths.py
    ├── demo.py
    ├── evaluate.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *pyc
 2 | *~
 3 | *so
 4 | lib/models/backbones/DCNv2/build/*
 5 | lib/models/backbones/DCNv2/DCNv2.egg-info/*
 6 | *build/*
 7 | */.vscode/*
 8 | *__pycache__*
 9 | *.ipynb_checkpoints*
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Xingyi Zhou
 4 | All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The repo is based on [CenterNet](https://arxiv.org/abs/1904.07850), which aimed for push the boundary of human pose estimation
 2 | multi person pose estimation using center point detection:
 3 | ![](readme/fig2.png)
 4 | 
 5 | ## Main results
 6 | 
 7 | ### Keypoint detection on COCO validation 2017
 8 | <p align="center"> <img src='readme/performance.png' align="center" height="512px"></p>
 9 | 
10 | | Backbone     |  AP       |  FPS         | TensorRT Speed | GFLOPs |Download |
11 | |--------------|-----------|--------------|----------|----------|----------|
12 | |DLA-34        | 62.7      |    23      |  - |  - |[model](https://drive.google.com/open?id=1IahJ3vpjTVu1p-Okf6lcn-bM7fVKNg6N)  |
13 | |Resnet-50     | 54.5     |    28      |  33 |  - |[model](https://drive.google.com/open?id=1oBgWrfigo2fGtpQJXQ0stADTgVFxPWGq)  |
14 | |MobilenetV3   | 46.0      |    30      |  - |  - |[model](https://drive.google.com/open?id=1snJnADAD1NUzyO1QXCftuZu1rsr8095G)  |
15 | |ShuffleNetV2  | 43.9      |    25      |  - |  - |[model](https://drive.google.com/open?id=1FK7YQzCB6mLcb0v4SOmlqtRJfA-PQSvN)  |
16 | |[HRNet_W32](https://drive.google.com/open?id=1mJoK7KEx35Wgf6uAZ-Ez5IwAeOk1RYw0)| 63.8 |    16      |  - |  - |[model](https://drive.google.com/open?id=1X0yxGeeNsD4VwU2caDo-BaH_MoCAnU_J)  |
17 | |[HardNet](https://github.com/PingoLH/FCHarDNet)| 46.0    |    30        | -  |  - |[model](https://drive.google.com/open?id=1CFc_qAAT4NFfrAG8JOxRVG8CAw9ySuYp)  |
18 | |[Darknet53]()| 34.2    |    30        | -  |  - |[model](https://drive.google.com/open?id=1S8spP_QKHqIYmWpfF9Bb4-4OoUXIOnkh)  |
19 | |[EfficientDet]()| 38.2    |    30        | -  |  - |[model](https://drive.google.com/open?id=1S8spP_QKHqIYmWpfF9Bb4-4OoUXIOnkh)  |
20 | 
21 | ## Installation
22 | 
23 | git submodule init&git submodule update
24 | Please refer to [INSTALL.md](readme/INSTALL.md) for installation instructions.
25 | 
26 | ## Use CenterNet
27 | 
28 | We support demo for image/ image folder, video, and webcam. 
29 | 
30 | First, download the model [DLA-34](https://drive.google.com/open?id=1OkHjjViB0dzbuicdtIam-YcoT0sYpmjP)
31 | from the [Model zoo](https://drive.google.com/open?id=1UG2l8XtjOfBtG_GLpSdxlWS2wxFR8hQF) and put them in anywhere.
32 | 
33 | Run:
34 |     
35 | ~~~
36 | cd tools; python demo.py --cfg ../experiments/dla_34_512x512.yaml --TESTMODEL /your/model/path/dla34_best.pth --DEMOFILE ../images/33823288584_1d21cf0a26_k.jpg --DEBUG 1
37 | ~~~
38 | The result for the example images should look like:
39 | <p align="center"> <img src='readme/multi_pose_screenshot_27.11.2019.png' align="center" height="512px"></p>
40 | 
41 | ## Evaluation 
42 | ~~~
43 | cd tools; python evaluate.py --cfg ../experiments/dla_34_512x512.yaml --TESTMODEL /your/model/path/dla34_best.pth --DEMOFILE --DEBUG 0
44 | ~~~
45 | 
46 | ## Training
47 | 
48 | After [installation](readme/INSTALL.md), follow the instructions in [DATA.md](readme/DATA.md) to setup the datasets.
49 | 
50 | We provide config files for all the experiments in the [experiments](experiments) folder.
51 | 
52 | ```
53 | cd ./tools python -m torch.distributed.launch --nproc_per_node 4 train.py --cfg ../experiments/*yalm
54 | ```
55 | 
56 | ## Demo
57 | 
58 | the demo files located in the `demo` directory, which is would be a very robust human detection+tracking+face reid system.
59 | 
60 | <p align="left">
61 | <img src="./readme/demo.gif", width="720">
62 | </p>
63 | 
64 | ## License
65 | 
66 | MIT License (refer to the LICENSE file for details).
67 | 
68 | ## Citation
69 | 
70 | If you find this project useful for your research, please use the following BibTeX entry.
71 | 
72 |     @inproceedings{zhou2019objects,
73 |       title={Objects as Points},
74 |       author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
75 |       booktitle={arXiv preprint arXiv:1904.07850},
76 |       year={2019}
77 |     }
78 | 


--------------------------------------------------------------------------------
/demo/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | 
 5 | def add_path(path):
 6 |     if path not in sys.path:
 7 |         sys.path.insert(0, path)
 8 | 
 9 | this_dir = osp.dirname(__file__)
10 | 
11 | # Add lib to PYTHONPATH
12 | lib_path = osp.join(this_dir, '..', 'lib')
13 | add_path(lib_path)
14 | 


--------------------------------------------------------------------------------
/demo/face/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/__init__.py


--------------------------------------------------------------------------------
/demo/face/centerface.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | 
  6 | 
  7 | class CenterFace(object):
  8 |     def __init__(self, model_path, landmarks=True):
  9 |         self.landmarks = landmarks
 10 |         if self.landmarks:
 11 |             self.net = cv2.dnn.readNetFromONNX(model_path)
 12 |         else:
 13 |             self.net = cv2.dnn.readNetFromONNX('cface.1k.onnx')
 14 | 
 15 |     def __call__(self, img, threshold=0.5):
 16 |         blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(self.img_w_new, self.img_h_new), mean=(0, 0, 0), swapRB=True, crop=False)
 17 |         self.net.setInput(blob)
 18 |         begin = datetime.datetime.now()
 19 |         if self.landmarks:
 20 |             heatmap, scale, offset, lms = self.net.forward(["537", "538", "539", '540'])
 21 |         else:
 22 |             heatmap, scale, offset = self.net.forward(["535", "536", "537"])
 23 | 
 24 |         end = datetime.datetime.now()
 25 |         print("cpu times = ", end - begin)
 26 |         if self.landmarks:
 27 |             dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold)
 28 |         else:
 29 |             dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold)
 30 | 
 31 |         if len(dets) > 0:
 32 |             dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_h
 33 |             if self.landmarks:
 34 |                 lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_h
 35 |         else:
 36 |             dets = np.empty(shape=[0, 5], dtype=np.float32)
 37 |             if self.landmarks:
 38 |                 lms = np.empty(shape=[0, 10], dtype=np.float32)
 39 |         if self.landmarks:
 40 |             return dets, lms
 41 |         else:
 42 |             return dets
 43 | 
 44 |     def transform(self, h, w):
 45 |         img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32)
 46 |         scale_h, scale_w = img_h_new / h, img_w_new / w
 47 |         self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = img_h_new, img_w_new, scale_h, scale_w
 48 | 
 49 |     def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1):
 50 |         heatmap = np.squeeze(heatmap)
 51 |         scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :]
 52 |         offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :]
 53 |         c0, c1 = np.where(heatmap > threshold)
 54 |         if self.landmarks:
 55 |             boxes, lms = [], []
 56 |         else:
 57 |             boxes = []
 58 |         if len(c0) > 0:
 59 |             for i in range(len(c0)):
 60 |                 s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4
 61 |                 o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]]
 62 |                 s = heatmap[c0[i], c1[i]]
 63 |                 x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2)
 64 |                 x1, y1 = min(x1, size[1]), min(y1, size[0])
 65 |                 boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s])
 66 |                 if self.landmarks:
 67 |                     lm = []
 68 |                     for j in range(5):
 69 |                         lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1)
 70 |                         lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1)
 71 |                     lms.append(lm)
 72 |             boxes = np.asarray(boxes, dtype=np.float32)
 73 |             keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3)
 74 |             boxes = boxes[keep, :]
 75 |             if self.landmarks:
 76 |                 lms = np.asarray(lms, dtype=np.float32)
 77 |                 lms = lms[keep, :]
 78 |         if self.landmarks:
 79 |             return boxes, lms
 80 |         else:
 81 |             return boxes
 82 | 
 83 |     def nms(self, boxes, scores, nms_thresh):
 84 |         x1 = boxes[:, 0]
 85 |         y1 = boxes[:, 1]
 86 |         x2 = boxes[:, 2]
 87 |         y2 = boxes[:, 3]
 88 |         areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 89 |         order = np.argsort(scores)[::-1]
 90 |         num_detections = boxes.shape[0]
 91 |         suppressed = np.zeros((num_detections,), dtype=np.bool)
 92 | 
 93 |         keep = []
 94 |         for _i in range(num_detections):
 95 |             i = order[_i]
 96 |             if suppressed[i]:
 97 |                 continue
 98 |             keep.append(i)
 99 | 
100 |             ix1 = x1[i]
101 |             iy1 = y1[i]
102 |             ix2 = x2[i]
103 |             iy2 = y2[i]
104 |             iarea = areas[i]
105 | 
106 |             for _j in range(_i + 1, num_detections):
107 |                 j = order[_j]
108 |                 if suppressed[j]:
109 |                     continue
110 | 
111 |                 xx1 = max(ix1, x1[j])
112 |                 yy1 = max(iy1, y1[j])
113 |                 xx2 = min(ix2, x2[j])
114 |                 yy2 = min(iy2, y2[j])
115 |                 w = max(0, xx2 - xx1 + 1)
116 |                 h = max(0, yy2 - yy1 + 1)
117 | 
118 |                 inter = w * h
119 |                 ovr = inter / (iarea + areas[j] - inter)
120 |                 if ovr >= nms_thresh:
121 |                     suppressed[j] = True
122 | 
123 |         return keep
124 | 


--------------------------------------------------------------------------------
/demo/face/demo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import cv2
 4 | import scipy.io as sio
 5 | 
 6 | from centerface import CenterFace
 7 | 
 8 | 
 9 | def test_image(image_path, model_path):
10 |     frame = cv2.imread(image_path)
11 |     h, w = frame.shape[:2]
12 |     landmarks = True
13 |     centerface = CenterFace(model_path=model_path, landmarks=landmarks)
14 |     centerface.transform(h, w)
15 |     if landmarks:
16 |         dets, lms = centerface(frame, threshold=0.35)
17 |     else:
18 |         dets = centerface(frame, threshold=0.35)
19 | 
20 |     for det in dets:
21 |         boxes, score = det[:4], det[4]
22 |         cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
23 |     if landmarks:
24 |         for lm in lms:
25 |             cv2.circle(frame, (int(lm[0]), int(lm[1])), 2, (0, 0, 255), -1)
26 |             cv2.circle(frame, (int(lm[2]), int(lm[3])), 2, (0, 0, 255), -1)
27 |             cv2.circle(frame, (int(lm[4]), int(lm[5])), 2, (0, 0, 255), -1)
28 |             cv2.circle(frame, (int(lm[6]), int(lm[7])), 2, (0, 0, 255), -1)
29 |             cv2.circle(frame, (int(lm[8]), int(lm[9])), 2, (0, 0, 255), -1)
30 |     cv2.imshow('out', frame)
31 |     cv2.waitKey(0)
32 | 
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     image_path = '/home/tensorboy/centerpose/images/image1.jpg'
37 |     model_path = '/home/tensorboy/CenterFace/models/onnx/centerface.onnx'
38 |     test_image(image_path, model_path)
39 | 


--------------------------------------------------------------------------------
/demo/face/reid/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/reid/__init__.py


--------------------------------------------------------------------------------
/demo/face/reid/reid_manager.py:
--------------------------------------------------------------------------------
 1 | from .reid_table.head_pose_base import ReidDataBase
 2 | 
 3 | 
 4 | class ReIDManager(object):
 5 |     def __init__(self, config):
 6 |         self.reid_table = ReidDataBase(config)
 7 | 
 8 |     def query_targets(self, reappear_targets, ignored_targets):
 9 |         if len(reappear_targets) == 0:
10 |             return [], []
11 |         reappear_detections = []
12 |         for single_target in reappear_targets:
13 |             best_detection = self._get_detection_with_highest_face(single_target)
14 |             reappear_detections.append(best_detection)
15 | 
16 |         ignored_id = [t.id for t in ignored_targets]
17 |         hash_ids, hash_status = self.reid_table.reid_query_detections(reappear_detections, ignored_id)
18 |         return hash_ids, hash_status
19 | 
20 |     def update_targets(self, tracked_targets):
21 |         all_detections = [self._get_latest_detection(target) for target in tracked_targets]
22 |         # update reid features
23 |         self.reid_table.update(all_detections)
24 | 
25 |     def remove_targets(self, removed_targets):
26 |         will_remove_ids = [target.id for target in removed_targets]
27 |         self.reid_table.remove(will_remove_ids)
28 | 
29 |     def query_certain_id(self, detection_list, target_id):
30 |         if len(detection_list) == 0:
31 |             return []
32 |         return self.reid_table.reid_query_certain_id(detection_list, target_id)
33 | 
34 |     def _get_detection_with_highest_face(self, target):
35 |         detection_list = target.last_detections  # list(target.get_detections())
36 |         detection_list = sorted(detection_list, key=lambda t: t.face_score)[::-1]
37 |         return detection_list[0]
38 | 
39 |     def _get_latest_detection(self, target):
40 |         # target.last_detections: a list of last detections sorted by time (currently contains 4 detections)
41 |         return target.last_detections[0]
42 | 


--------------------------------------------------------------------------------
/demo/face/reid/reid_table/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/reid/reid_table/__init__.py


--------------------------------------------------------------------------------
/demo/face/reid/reid_table/base_idbase.py:
--------------------------------------------------------------------------------
 1 | class BaseReidDatabase(object):
 2 |     """
 3 |     Args:
 4 |        dataset: a collection of (hash_id, features) pairs in some format
 5 |                 (maybe proto?)
 6 |     """
 7 | 
 8 |     def __init__(self):
 9 |         self.dataset = {}
10 | 
11 |     def update(self, features, cameras, hash_ids):
12 |         """Update dataset with features for a specific hash_id
13 |         Args:
14 |             features: List[M_i x L-dimensional np.float32 array]
15 |             cameras: List[M_i np.float32 vetor]
16 |             hash_ids: List[ids] of length M_i
17 |         """
18 |         raise NotImplementedError
19 | 
20 |     def get_all_ids(self):
21 |         return ['{:04d}'.format(abs(k) % 10000) for k, v in self.dataset.items()]
22 | 
23 |     def get_current_table_size(self):
24 |         return len(self.dataset)
25 | 
26 |     def check_if_in_table(self, new_id):
27 |         return new_id in self.dataset
28 | 
29 |     # search all persons in one frame
30 |     def retrieval(self, features, cameras, tracked_ids):
31 |         """Computes and returns closest entity based on features
32 |         Args:
33 |            features: List[M_i x L-dimensional np.float32 array]
34 |            cameras: List[M_i np.float32 vetor]
35 |            tracked_ids: List of ids of unknown length, confirmed ids by tracker. ReID should ignore these ids.
36 |         Returns:
37 |            hash_ids(list): list of ids, id could be none
38 |         """
39 |         raise NotImplementedError
40 | 
41 |     def remove(self, hash_id):
42 |         """Deletes entity with hash_id and all of it's features from the dataset
43 |         Args:
44 |             hash_id(string): unique string identifying the specific person
45 |         """
46 |         raise NotImplementedError
47 | 


--------------------------------------------------------------------------------
/demo/face/reid/reid_table/reid_utils.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class MatchFlags(Enum):
 5 |     MATCHED = 0
 6 |     BADFEATURE = 1
 7 |     NOTCONVINCED = 2
 8 |     FIRSTTIME = 3
 9 |     EMPTYDICT = 4
10 |     UNREGISTER = 5
11 |     NODETECTION = 6
12 | 
13 | 
14 | class HEADFLAGS(Enum):
15 |     S0 = 0
16 |     S1 = 1
17 |     S2 = 2
18 |     S3 = 3
19 |     S4 = 4
20 |     S5 = 5
21 |     S6 = 6
22 | 
23 | 
24 | # Data template
25 | class Detection:
26 |     def __init__(self, ind, score, camera, feature, landmarks, bbox):
27 |         self.target_id = ind
28 |         self.face_score = score
29 |         self.features = feature
30 |         self.camera_id = camera
31 |         self.landmarks = landmarks
32 |         self.bbox = bbox
33 | 
34 |     def set_new_id(self, new_id):
35 |         self.target_id = new_id
36 | 
37 |     def get_id(self):
38 |         return self.target_id
39 | 
40 |     def get_face_score(self):
41 |         return self.face_score
42 | 
43 | 
44 | def assign_head_status(yaw):
45 |     # if abs(yaw) > 25:
46 |     #     head_status = HEADFLAGS.S2
47 |     # elif abs(yaw) > 15:
48 |     #     head_status = HEADFLAGS.S1
49 |     # else:
50 |     #     head_status = HEADFLAGS.S0
51 |     if abs(yaw) > 30:
52 |         head_status = HEADFLAGS.S6
53 |     elif abs(yaw) > 25:
54 |         head_status = HEADFLAGS.S5
55 |     elif abs(yaw) > 20:
56 |         head_status = HEADFLAGS.S4
57 |     elif abs(yaw) > 15:
58 |         head_status = HEADFLAGS.S3
59 |     elif abs(yaw) > 10:
60 |         head_status = HEADFLAGS.S2
61 |     elif abs(yaw) > 5:
62 |         head_status = HEADFLAGS.S1
63 |     else:
64 |         head_status = HEADFLAGS.S0
65 |     return head_status
66 | 


--------------------------------------------------------------------------------
/demo/face/utils/BFM_UV.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/utils/BFM_UV.mat


--------------------------------------------------------------------------------
/demo/face/utils/cv_plot.py:
--------------------------------------------------------------------------------
 1 | # coding: UTF-8
 2 | """
 3 |     @func: draw landmark & mesh on image.
 4 |     @source: YadiraF/PRNet/utils/cv_plot.py
 5 | """
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1
10 | 
11 | 
12 | def plot_kpt(image, kpt):
13 |     ''' Draw 68 key points
14 |     Args: 
15 |         image: the input image
16 |         kpt: (68, 3).
17 |     '''
18 |     image = image.copy()
19 |     kpt = np.round(kpt).astype(np.int32)
20 |     for i in range(kpt.shape[0]):
21 |         st = kpt[i, :2]
22 |         image = cv2.circle(image, (st[0], st[1]), 1, (0, 0, 255), 2)
23 |         if i in end_list:
24 |             continue
25 |         ed = kpt[i + 1, :2]
26 |         image = cv2.line(image, (st[0], st[1]), (ed[0], ed[1]), (255, 255, 255), 1)
27 |     return image
28 | 
29 | 
30 | def plot_vertices(image, vertices):
31 |     image = image.copy()
32 |     vertices = np.round(vertices).astype(np.int32)
33 |     for i in range(0, vertices.shape[0], 2):
34 |         st = vertices[i, :2]
35 |         image = cv2.circle(image, (st[0], st[1]), 1, (255, 0, 0), -1)
36 |     return image
37 | 
38 | 
39 | def plot_pose_box(image, P, kpt, color=(0, 255, 0), line_width=2):
40 |     ''' Draw a 3D box as annotation of pose. Ref:https://github.com/yinguobing/head-pose-estimation/blob/master/pose_estimator.py
41 |     Args: 
42 |         image: the input image
43 |         P: (3, 4). Affine Camera Matrix.
44 |         kpt: (68, 3).
45 |     '''
46 |     image = image.copy()
47 | 
48 |     point_3d = []
49 |     rear_size = 90
50 |     rear_depth = 0
51 |     point_3d.append((-rear_size, -rear_size, rear_depth))
52 |     point_3d.append((-rear_size, rear_size, rear_depth))
53 |     point_3d.append((rear_size, rear_size, rear_depth))
54 |     point_3d.append((rear_size, -rear_size, rear_depth))
55 |     point_3d.append((-rear_size, -rear_size, rear_depth))
56 | 
57 |     front_size = 105
58 |     front_depth = 110
59 |     point_3d.append((-front_size, -front_size, front_depth))
60 |     point_3d.append((-front_size, front_size, front_depth))
61 |     point_3d.append((front_size, front_size, front_depth))
62 |     point_3d.append((front_size, -front_size, front_depth))
63 |     point_3d.append((-front_size, -front_size, front_depth))
64 |     point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3)
65 | 
66 |     # Map to 2d image points
67 |     point_3d_homo = np.hstack((point_3d, np.ones([point_3d.shape[0], 1])))  # n x 4
68 |     point_2d = point_3d_homo.dot(P.T)[:, :2]
69 |     point_2d[:, :2] = point_2d[:, :2] - np.mean(point_2d[:4, :2], 0) + np.mean(kpt[:27, :2], 0)
70 |     point_2d = np.int32(point_2d.reshape(-1, 2))
71 | 
72 |     # Draw all the lines
73 |     cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
74 |     cv2.line(image, tuple(point_2d[1]), tuple(
75 |         point_2d[6]), color, line_width, cv2.LINE_AA)
76 |     cv2.line(image, tuple(point_2d[2]), tuple(
77 |         point_2d[7]), color, line_width, cv2.LINE_AA)
78 |     cv2.line(image, tuple(point_2d[3]), tuple(
79 |         point_2d[8]), color, line_width, cv2.LINE_AA)
80 | 
81 |     return image
82 | 


--------------------------------------------------------------------------------
/demo/face/utils/estimate_pose.py:
--------------------------------------------------------------------------------
  1 | # coding: UTF-8
  2 | 
  3 | from math import asin, atan2, cos, sin
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | def isRotationMatrix(R):
  9 |     ''' checks if a matrix is a valid rotation matrix(whether orthogonal or not)
 10 |     '''
 11 |     Rt = np.transpose(R)
 12 |     shouldBeIdentity = np.dot(Rt, R)
 13 |     I = np.identity(3, dtype=R.dtype)
 14 |     n = np.linalg.norm(I - shouldBeIdentity)
 15 |     return n < 1e-6
 16 | 
 17 | 
 18 | def matrix2angle(R):
 19 |     ''' compute three Euler angles from a Rotation Matrix. Ref: http://www.gregslabaugh.net/publications/euler.pdf
 20 |     Args:
 21 |         R: (3,3). rotation matrix
 22 |     Returns:
 23 |         x: yaw
 24 |         y: pitch
 25 |         z: roll
 26 |     '''
 27 |     # assert(isRotationMatrix(R))
 28 | 
 29 |     if R[2, 0] != 1 or R[2, 0] != -1:
 30 |         x = asin(R[2, 0])
 31 |         y = atan2(R[2, 1] / cos(x), R[2, 2] / cos(x))
 32 |         z = atan2(R[1, 0] / cos(x), R[0, 0] / cos(x))
 33 | 
 34 |     else:  # Gimbal lock
 35 |         z = 0  # can be anything
 36 |         if R[2, 0] == -1:
 37 |             x = np.pi / 2
 38 |             y = z + atan2(R[0, 1], R[0, 2])
 39 |         else:
 40 |             x = -np.pi / 2
 41 |             y = -z + atan2(-R[0, 1], -R[0, 2])
 42 | 
 43 |     return x, y, z
 44 | 
 45 | 
 46 | def P2sRt(P):
 47 |     ''' decompositing camera matrix P. 
 48 |     Args: 
 49 |         P: (3, 4). Affine Camera Matrix.
 50 |     Returns:
 51 |         s: scale factor.
 52 |         R: (3, 3). rotation matrix.
 53 |         t2d: (2,). 2d translation. 
 54 |     '''
 55 |     t2d = P[:2, 3]
 56 |     R1 = P[0:1, :3]
 57 |     R2 = P[1:2, :3]
 58 |     s = (np.linalg.norm(R1) + np.linalg.norm(R2)) / 2.0
 59 |     r1 = R1 / np.linalg.norm(R1)
 60 |     r2 = R2 / np.linalg.norm(R2)
 61 |     r3 = np.cross(r1, r2)
 62 | 
 63 |     R = np.concatenate((r1, r2, r3), 0)
 64 |     return s, R, t2d
 65 | 
 66 | 
 67 | def compute_similarity_transform(points_static, points_to_transform):
 68 |     # http://nghiaho.com/?page_id=671
 69 |     p0 = np.copy(points_static).T
 70 |     p1 = np.copy(points_to_transform).T
 71 | 
 72 |     t0 = -np.mean(p0, axis=1).reshape(3, 1)
 73 |     t1 = -np.mean(p1, axis=1).reshape(3, 1)
 74 |     t_final = t1 - t0
 75 | 
 76 |     p0c = p0 + t0
 77 |     p1c = p1 + t1
 78 | 
 79 |     covariance_matrix = p0c.dot(p1c.T)
 80 |     U, S, V = np.linalg.svd(covariance_matrix)
 81 |     R = U.dot(V)
 82 |     if np.linalg.det(R) < 0:
 83 |         R[:, 2] *= -1
 84 | 
 85 |     rms_d0 = np.sqrt(np.mean(np.linalg.norm(p0c, axis=0) ** 2))
 86 |     rms_d1 = np.sqrt(np.mean(np.linalg.norm(p1c, axis=0) ** 2))
 87 | 
 88 |     s = (rms_d0 / rms_d1)
 89 |     P = np.c_[s * np.eye(3).dot(R), t_final]
 90 |     return P
 91 | 
 92 | 
 93 | def estimate_pose(vertices):
 94 |     canonical_vertices = np.load('face/utils/uv_data/canonical_vertices.npy')
 95 |     P = compute_similarity_transform(vertices, canonical_vertices)
 96 |     _, R, _ = P2sRt(P)  # decompose affine matrix to s, R, t
 97 |     pose = matrix2angle(R)
 98 | 
 99 |     return P, pose
100 | 


--------------------------------------------------------------------------------
/demo/face/utils/generate_posmap_300WLP.py:
--------------------------------------------------------------------------------
  1 | # coding: UTF-8
  2 | '''
  3 | Generate uv position map of 300W_LP.
  4 | '''
  5 | import argparse
  6 | import os
  7 | import sys
  8 | from time import time
  9 | 
 10 | import matplotlib.pyplot as plt
 11 | import numpy as np
 12 | import scipy.io as sio
 13 | import skimage.transform
 14 | from skimage import io
 15 | 
 16 | import face3d
 17 | from face3d import mesh
 18 | from face3d.morphable_model import MorphabelModel
 19 | 
 20 | sys.path.append('..')
 21 | 
 22 | 
 23 | def process_uv(uv_coords, uv_h=256, uv_w=256):
 24 |     uv_coords[:, 0] = uv_coords[:, 0] * (uv_w - 1)
 25 |     uv_coords[:, 1] = uv_coords[:, 1] * (uv_h - 1)
 26 |     uv_coords[:, 1] = uv_h - uv_coords[:, 1] - 1
 27 |     uv_coords = np.hstack((uv_coords, np.zeros((uv_coords.shape[0], 1))))  # add z
 28 |     return uv_coords
 29 | 
 30 | 
 31 | def run_posmap_300W_LP(bfm, image_path, mat_path, save_folder, idx=0, uv_h=256, uv_w=256, image_h=256, image_w=256):
 32 |     # 1. load image and fitted parameters
 33 |     image_name = image_path.strip().split('/')[-1]
 34 |     image = io.imread(image_path) / 255.
 35 |     [h, w, c] = image.shape
 36 | 
 37 |     info = sio.loadmat(mat_path)
 38 |     pose_para = info['Pose_Para'].T.astype(np.float32)
 39 |     shape_para = info['Shape_Para'].astype(np.float32)
 40 |     exp_para = info['Exp_Para'].astype(np.float32)
 41 | 
 42 |     # 2. generate mesh
 43 |     # generate shape
 44 |     vertices = bfm.generate_vertices(shape_para, exp_para)
 45 |     # transform mesh
 46 |     s = pose_para[-1, 0]
 47 |     angles = pose_para[:3, 0]
 48 |     t = pose_para[3:6, 0]
 49 |     transformed_vertices = bfm.transform_3ddfa(vertices, s, angles, t)
 50 |     projected_vertices = transformed_vertices.copy()  # using stantard camera & orth projection as in 3DDFA
 51 |     image_vertices = projected_vertices.copy()
 52 |     image_vertices[:, 1] = h - image_vertices[:, 1] - 1
 53 | 
 54 |     # 3. crop image with key points
 55 |     kpt = image_vertices[bfm.kpt_ind, :].astype(np.int32)
 56 |     left = np.min(kpt[:, 0])
 57 |     right = np.max(kpt[:, 0])
 58 |     top = np.min(kpt[:, 1])
 59 |     bottom = np.max(kpt[:, 1])
 60 |     center = np.array([right - (right - left) / 2.0,
 61 |                        bottom - (bottom - top) / 2.0])
 62 |     old_size = (right - left + bottom - top) / 2
 63 |     size = int(old_size * 1.5)
 64 |     # random pertube. you can change the numbers
 65 |     marg = old_size * 0.1
 66 |     t_x = np.random.rand() * marg * 2 - marg
 67 |     t_y = np.random.rand() * marg * 2 - marg
 68 |     center[0] = center[0] + t_x
 69 |     center[1] = center[1] + t_y
 70 |     size = size * (np.random.rand() * 0.2 + 0.9)
 71 | 
 72 |     # crop and record the transform parameters
 73 |     src_pts = np.array([[center[0] - size / 2, center[1] - size / 2], [center[0] - size / 2, center[1] + size / 2],
 74 |                         [center[0] + size / 2, center[1] - size / 2]])
 75 |     DST_PTS = np.array([[0, 0], [0, image_h - 1], [image_w - 1, 0]])
 76 |     tform = skimage.transform.estimate_transform('similarity', src_pts, DST_PTS)
 77 |     cropped_image = skimage.transform.warp(image, tform.inverse, output_shape=(image_h, image_w))
 78 | 
 79 |     # transform face position(image vertices) along with 2d facial image
 80 |     position = image_vertices.copy()
 81 |     position[:, 2] = 1
 82 |     position = np.dot(position, tform.params.T)
 83 |     position[:, 2] = image_vertices[:, 2] * tform.params[0, 0]  # scale z
 84 |     position[:, 2] = position[:, 2] - np.min(position[:, 2])  # translate z
 85 | 
 86 |     # 4. uv position map: render position in uv space
 87 |     uv_position_map = mesh.render.render_colors(uv_coords, bfm.full_triangles, position, uv_h, uv_w, c=3)
 88 | 
 89 |     # 5. save files
 90 |     if not os.path.exists(os.path.join(save_folder, str(idx) + '/')):
 91 |         os.mkdir(os.path.join(save_folder, str(idx) + '/'))
 92 | 
 93 |     io.imsave('{}/{}/{}'.format(save_folder, idx, 'original.jpg'), np.squeeze(cropped_image))
 94 |     np.save('{}/{}/{}'.format(save_folder, idx, image_name.replace('jpg', 'npy')), uv_position_map)
 95 |     io.imsave('{}/{}/{}'.format(save_folder, idx, 'uv_posmap.jpg'),
 96 |               (uv_position_map) / max(image_h, image_w))  # only for show
 97 | 
 98 |     # --verify
 99 |     # import cv2
100 |     # uv_texture_map_rec = cv2.remap(cropped_image, uv_position_map[:,:,:2].astype(np.float32), None, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT,borderValue=(0))
101 |     # io.imsave('{}/{}'.format(save_folder, image_name.replace('.jpg', '_tex.jpg')), np.squeeze(uv_texture_map_rec))
102 | 
103 | 
104 | def generate_batch_sample(input_dir, save_folder='./300WLP'):
105 |     if not os.path.exists(save_folder):
106 |         os.mkdir(save_folder)
107 |     # set para
108 |     uv_h = uv_w = 256
109 | 
110 |     # load uv coords
111 |     global uv_coords
112 |     uv_coords = face3d.morphable_model.load.load_uv_coords('BFM/BFM_UV.mat')  #
113 |     uv_coords = process_uv(uv_coords, uv_h, uv_w)
114 | 
115 |     # load bfm
116 |     bfm = MorphabelModel('BFM/BFM.mat')
117 | 
118 |     # Batch generating uv_map Dataset
119 |     """
120 |     @date: 2019/07/19
121 |     Train Dataset:
122 |         AFW. 10413.
123 |         HELEN. 75351.
124 |         LFPW. 33111.
125 |     Test Dataset:
126 |         IBUG. 3571.
127 | 
128 |     """
129 |     base = 0
130 | 
131 |     for idx, item in enumerate(os.listdir(input_dir)):
132 |         if 'jpg' in item:
133 |             ab_path = os.path.join(input_dir, item)
134 |             img_path = ab_path
135 |             mat_path = ab_path.replace('jpg', 'mat')
136 | 
137 |             run_posmap_300W_LP(bfm, img_path, mat_path, save_folder, idx + base)
138 |             print("Number {} uv_pos_map was generated!".format(idx))
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     parser = argparse.ArgumentParser()
143 |     parser.add_argument("--save_dir", help="specify output uv_map directory.")
144 |     parser.add_argument("--input_dir", help="specify input origin mat & image directory.")
145 |     args = parser.parse_args()
146 | 
147 |     generate_batch_sample(save_folder=args.save_dir,
148 |                           input_dir=args.input_dir)
149 | 


--------------------------------------------------------------------------------
/demo/face/utils/render_app.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .render import vis_of_vertices, render_texture
 3 | from scipy import ndimage
 4 | 
 5 | def get_visibility(vertices, triangles, h, w):
 6 |     triangles = triangles.T
 7 |     vertices_vis = vis_of_vertices(vertices.T, triangles, h, w)
 8 |     vertices_vis = vertices_vis.astype(bool)
 9 |     for k in range(2):
10 |         tri_vis = vertices_vis[triangles[0,:]] | vertices_vis[triangles[1,:]] | vertices_vis[triangles[2,:]]
11 |         ind = triangles[:, tri_vis]
12 |         vertices_vis[ind] = True
13 |     # for k in range(2):
14 |     #     tri_vis = vertices_vis[triangles[0,:]] & vertices_vis[triangles[1,:]] & vertices_vis[triangles[2,:]]
15 |     #     ind = triangles[:, tri_vis]
16 |     #     vertices_vis[ind] = True
17 |     vertices_vis = vertices_vis.astype(np.float32)  #1 for visible and 0 for non-visible
18 |     return vertices_vis
19 | 
20 | def get_uv_mask(vertices_vis, triangles, uv_coords, h, w, resolution):
21 |     triangles = triangles.T
22 |     vertices_vis = vertices_vis.astype(np.float32)
23 |     uv_mask = render_texture(uv_coords.T, vertices_vis[np.newaxis, :], triangles, resolution, resolution, 1)
24 |     uv_mask = np.squeeze(uv_mask > 0)
25 |     uv_mask = ndimage.binary_closing(uv_mask)
26 |     uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4)))  
27 |     uv_mask = ndimage.binary_closing(uv_mask)
28 |     uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4)))  
29 |     uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4)))  
30 |     uv_mask = ndimage.binary_erosion(uv_mask, structure = np.ones((4,4)))  
31 |     uv_mask = uv_mask.astype(np.float32)
32 | 
33 |     return np.squeeze(uv_mask)
34 | 
35 | def get_depth_image(vertices, triangles, h, w, isShow = False):
36 |     z = vertices[:, 2:]
37 |     if isShow:
38 |         z = z/max(z)
39 |     depth_image = render_texture(vertices.T, z.T, triangles.T, h, w, 1)
40 |     return np.squeeze(depth_image)
41 | 


--------------------------------------------------------------------------------
/demo/face/utils/rotate_vertices.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def frontalize(vertices):
 5 |     canonical_vertices = np.load('utils/uv_data/canonical_vertices.npy')
 6 | 
 7 |     vertices_homo = np.hstack((vertices, np.ones([vertices.shape[0], 1])))  # n x 4
 8 |     P = np.linalg.lstsq(vertices_homo, canonical_vertices)[0].T  # Affine matrix. 3 x 4
 9 |     front_vertices = vertices_homo.dot(P.T)
10 | 
11 |     return front_vertices
12 | 


--------------------------------------------------------------------------------
/demo/face/utils/uv_data/canonical_vertices.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/utils/uv_data/canonical_vertices.npy


--------------------------------------------------------------------------------
/demo/face/utils/uv_data/uv_kpt_ind.txt:
--------------------------------------------------------------------------------
1 | 1.500000000000000000e+01 2.200000000000000000e+01 2.600000000000000000e+01 3.200000000000000000e+01 4.500000000000000000e+01 6.700000000000000000e+01 9.100000000000000000e+01 1.120000000000000000e+02 1.280000000000000000e+02 1.430000000000000000e+02 1.640000000000000000e+02 1.880000000000000000e+02 2.100000000000000000e+02 2.230000000000000000e+02 2.290000000000000000e+02 2.330000000000000000e+02 2.400000000000000000e+02 5.800000000000000000e+01 7.100000000000000000e+01 8.500000000000000000e+01 9.700000000000000000e+01 1.060000000000000000e+02 1.490000000000000000e+02 1.580000000000000000e+02 1.700000000000000000e+02 1.840000000000000000e+02 1.970000000000000000e+02 1.280000000000000000e+02 1.280000000000000000e+02 1.280000000000000000e+02 1.280000000000000000e+02 1.170000000000000000e+02 1.220000000000000000e+02 1.280000000000000000e+02 1.330000000000000000e+02 1.380000000000000000e+02 7.800000000000000000e+01 8.600000000000000000e+01 9.500000000000000000e+01 1.020000000000000000e+02 9.600000000000000000e+01 8.700000000000000000e+01 1.530000000000000000e+02 1.600000000000000000e+02 1.690000000000000000e+02 1.770000000000000000e+02 1.680000000000000000e+02 1.590000000000000000e+02 1.080000000000000000e+02 1.160000000000000000e+02 1.240000000000000000e+02 1.280000000000000000e+02 1.310000000000000000e+02 1.390000000000000000e+02 1.460000000000000000e+02 1.370000000000000000e+02 1.320000000000000000e+02 1.280000000000000000e+02 1.230000000000000000e+02 1.180000000000000000e+02 1.100000000000000000e+02 1.220000000000000000e+02 1.280000000000000000e+02 1.330000000000000000e+02 1.450000000000000000e+02 1.320000000000000000e+02 1.280000000000000000e+02 1.230000000000000000e+02
2 | 9.600000000000000000e+01 1.180000000000000000e+02 1.410000000000000000e+02 1.650000000000000000e+02 1.830000000000000000e+02 1.900000000000000000e+02 1.880000000000000000e+02 1.870000000000000000e+02 1.930000000000000000e+02 1.870000000000000000e+02 1.880000000000000000e+02 1.900000000000000000e+02 1.830000000000000000e+02 1.650000000000000000e+02 1.410000000000000000e+02 1.180000000000000000e+02 9.600000000000000000e+01 4.900000000000000000e+01 4.200000000000000000e+01 3.900000000000000000e+01 4.000000000000000000e+01 4.200000000000000000e+01 4.200000000000000000e+01 4.000000000000000000e+01 3.900000000000000000e+01 4.200000000000000000e+01 4.900000000000000000e+01 5.900000000000000000e+01 7.300000000000000000e+01 8.600000000000000000e+01 9.600000000000000000e+01 1.110000000000000000e+02 1.130000000000000000e+02 1.150000000000000000e+02 1.130000000000000000e+02 1.110000000000000000e+02 6.700000000000000000e+01 6.000000000000000000e+01 6.100000000000000000e+01 6.500000000000000000e+01 6.800000000000000000e+01 6.900000000000000000e+01 6.500000000000000000e+01 6.100000000000000000e+01 6.000000000000000000e+01 6.700000000000000000e+01 6.900000000000000000e+01 6.800000000000000000e+01 1.420000000000000000e+02 1.310000000000000000e+02 1.270000000000000000e+02 1.280000000000000000e+02 1.270000000000000000e+02 1.310000000000000000e+02 1.420000000000000000e+02 1.480000000000000000e+02 1.500000000000000000e+02 1.500000000000000000e+02 1.500000000000000000e+02 1.480000000000000000e+02 1.410000000000000000e+02 1.350000000000000000e+02 1.340000000000000000e+02 1.350000000000000000e+02 1.420000000000000000e+02 1.430000000000000000e+02 1.420000000000000000e+02 1.430000000000000000e+02
3 | 


--------------------------------------------------------------------------------
/demo/face/utils/uv_data/uv_weight_mask_gdh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/face/utils/uv_data/uv_weight_mask_gdh.png


--------------------------------------------------------------------------------
/demo/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/result.png


--------------------------------------------------------------------------------
/demo/tensorrt_model.py:
--------------------------------------------------------------------------------
  1 | import atexit
  2 | 
  3 | import tensorrt as trt
  4 | import torch
  5 | 
  6 | 
  7 | def torch_dtype_to_trt(dtype):
  8 |     if dtype == torch.int8:
  9 |         return trt.int8
 10 |     elif dtype == torch.int32:
 11 |         return trt.int32
 12 |     elif dtype == torch.float16:
 13 |         return trt.float16
 14 |     elif dtype == torch.float32:
 15 |         return trt.float32
 16 |     else:
 17 |         raise TypeError('%s is not supported by tensorrt' % dtype)
 18 | 
 19 | 
 20 | def torch_dtype_from_trt(dtype):
 21 |     if dtype == trt.int8:
 22 |         return torch.int8
 23 |     elif dtype == trt.int32:
 24 |         return torch.int32
 25 |     elif dtype == trt.float16:
 26 |         return torch.float16
 27 |     elif dtype == trt.float32:
 28 |         return torch.float32
 29 |     else:
 30 |         raise TypeError('%s is not supported by torch' % dtype)
 31 | 
 32 | 
 33 | def torch_device_to_trt(device):
 34 |     if device.type == torch.device('cuda').type:
 35 |         return trt.TensorLocation.DEVICE
 36 |     elif device.type == torch.device('cpu').type:
 37 |         return trt.TensorLocation.HOST
 38 |     else:
 39 |         return TypeError('%s is not supported by tensorrt' % device)
 40 | 
 41 | 
 42 | def torch_device_from_trt(device):
 43 |     if device == trt.TensorLocation.DEVICE:
 44 |         return torch.device('cuda')
 45 |     elif device == trt.TensorLocation.HOST:
 46 |         return torch.device('cpu')
 47 |     else:
 48 |         return TypeError('%s is not supported by torch' % device)
 49 | 
 50 |     
 51 | class TRTModel(object):
 52 |     
 53 |     def __init__(self, engine_path, input_names=None, output_names=None, final_shapes=None):
 54 |         
 55 |         # load engine
 56 |         self.logger = trt.Logger()
 57 |         self.runtime = trt.Runtime(self.logger)
 58 |         with open(engine_path, 'rb') as f:
 59 |             self.engine = self.runtime.deserialize_cuda_engine(f.read())
 60 |         self.context = self.engine.create_execution_context()
 61 |         
 62 |         if input_names is None:
 63 |             self.input_names = self._trt_input_names()
 64 |         else:
 65 |             self.input_names = input_names
 66 |             
 67 |         if output_names is None:
 68 |             self.output_names = self._trt_output_names()
 69 |         else:
 70 |             self.output_names = output_names
 71 |             
 72 |         self.final_shapes = final_shapes
 73 |     
 74 |     def _input_binding_indices(self):
 75 |         return [i for i in range(self.engine.num_bindings) if self.engine.binding_is_input(i)]
 76 |     
 77 |     def _output_binding_indices(self):
 78 |         return [i for i in range(self.engine.num_bindings) if not self.engine.binding_is_input(i)]
 79 |     
 80 |     def _trt_input_names(self):
 81 |         return [self.engine.get_binding_name(i) for i in self._input_binding_indices()]
 82 |     
 83 |     def _trt_output_names(self):
 84 |         return [self.engine.get_binding_name(i) for i in self._output_binding_indices()]
 85 |     
 86 |     def create_output_buffers(self, batch_size):
 87 |         outputs = [None] * len(self.output_names)
 88 |         for i, output_name in enumerate(self.output_names):
 89 |             idx = self.engine.get_binding_index(output_name)
 90 |             dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))
 91 |             if self.final_shapes is not None:
 92 |                 shape = (batch_size, ) + self.final_shapes[i]
 93 |             else:
 94 |                 shape = (batch_size, ) + tuple(self.engine.get_binding_shape(idx))
 95 |             device = torch_device_from_trt(self.engine.get_location(idx))
 96 |             output = torch.empty(size=shape, dtype=dtype, device=device)
 97 |             outputs[i] = output
 98 |         return outputs
 99 |     
100 |     def execute(self, *inputs):
101 |         batch_size = inputs[0].shape[0]
102 |         
103 |         bindings = [None] * (len(self.input_names) + len(self.output_names))
104 |         
105 |         # map input bindings
106 |         inputs_torch = [None] * len(self.input_names)
107 |         for i, name in enumerate(self.input_names):
108 |             idx = self.engine.get_binding_index(name)
109 |             
110 |             # convert to appropriate format
111 |             inputs_torch[i] = torch.from_numpy(inputs[i])
112 |             inputs_torch[i] = inputs_torch[i].to(torch_device_from_trt(self.engine.get_location(idx)))
113 |             inputs_torch[i] = inputs_torch[i].type(torch_dtype_from_trt(self.engine.get_binding_dtype(idx)))
114 |             
115 |             bindings[idx] = int(inputs_torch[i].data_ptr())
116 |             
117 |         output_buffers = self.create_output_buffers(batch_size)
118 |         
119 |         # map output bindings
120 |         for i, name in enumerate(self.output_names):
121 |             idx = self.engine.get_binding_index(name)
122 |             bindings[idx] = int(output_buffers[i].data_ptr())
123 |         
124 |         self.context.execute(batch_size, bindings)
125 |         
126 |         outputs = [buffer for buffer in output_buffers]
127 |                                  
128 |         return outputs
129 |     
130 |     def __call__(self, *inputs):
131 |         return self.execute(*inputs)
132 | 


--------------------------------------------------------------------------------
/demo/tracking/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/tracking/__init__.py


--------------------------------------------------------------------------------
/demo/tracking/deep_sort.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import numpy as np
 4 | 
 5 | from .feature_extractor import Extractor
 6 | from .sort.detection import Detection
 7 | from .sort.nn_matching import NearestNeighborDistanceMetric
 8 | from .sort.preprocessing import non_max_suppression
 9 | from .sort.tracker import Tracker
10 | 
11 | 
12 | class DeepSort(object):
13 |     def __init__(self, model_path):
14 |         self.min_confidence = 0.3
15 |         self.nms_max_overlap = 1.0
16 | 
17 |         self.extractor = Extractor(model_path, use_cuda=True)
18 | 
19 |         max_cosine_distance = 0.2
20 |         nn_budget = 100
21 |         metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
22 |         self.tracker = Tracker(metric)
23 | 
24 |     def update(self, bbox_xywh, confidences, ori_img):
25 |         self.height, self.width = ori_img.shape[:2]
26 | 
27 | 
28 |         # generate detections
29 |         try :
30 |             features = self._get_features(bbox_xywh, ori_img)
31 |         except :
32 |             print('a')
33 |         detections = [Detection(bbox_xywh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence]
34 | 
35 | 
36 |         # run on non-maximum supression
37 |         boxes = np.array([d.tlwh for d in detections])
38 |         scores = np.array([d.confidence for d in detections])
39 |         indices = non_max_suppression( boxes, self.nms_max_overlap, scores)
40 |         detections = [detections[i] for i in indices]
41 | 
42 | 
43 |         # update tracker
44 |         self.tracker.predict()
45 |         self.tracker.update(detections)
46 | 
47 | 
48 |         # output bbox identities
49 |         outputs = []
50 |         for track in self.tracker.tracks:
51 |             if not track.is_confirmed() or track.time_since_update > 1:
52 |                 continue
53 |             box = track.to_tlwh()
54 |             x1,y1,x2,y2 = self._xywh_to_xyxy_centernet(box)
55 |             track_id = track.track_id
56 |             outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int))
57 |         if len(outputs) > 0:
58 |             outputs = np.stack(outputs,axis=0)
59 | 
60 | 
61 |         return outputs
62 | 
63 | 
64 | 
65 |     # for centernet (x1,x2 w,h -> x1,y1,x2,y2)
66 |     def _xywh_to_xyxy_centernet(self, bbox_xywh):
67 |         x1,y1,w,h = bbox_xywh
68 |         x1 = max(x1,0)
69 |         y1 = max(y1,0)
70 |         x2 = min(int(x1+w),self.width-1)
71 |         y2 = min(int(y1+h),self.height-1)
72 |         return int(x1),int(y1),x2,y2
73 | 
74 |     # for yolo  (centerx,centerx, w,h -> x1,y1,x2,y2)
75 |     def _xywh_to_xyxy_yolo(self, bbox_xywh):
76 |         x,y,w,h = bbox_xywh
77 |         x1 = max(int(x-w/2),0)
78 |         x2 = min(int(x+w/2),self.width-1)
79 |         y1 = max(int(y-h/2),0)
80 |         y2 = min(int(y+h/2),self.height-1)
81 |         return x1,y1,x2,y2
82 | 
83 |     def _get_features(self, bbox_xywh, ori_img):
84 |         features = []
85 |         for box in bbox_xywh:
86 |             x1,y1,x2,y2 = self._xywh_to_xyxy_centernet(box)
87 |             im = ori_img[y1:y2,x1:x2]
88 |             feature = self.extractor(im)[0]
89 |             features.append(feature)
90 |         if len(features):
91 |             features = np.stack(features, axis=0)
92 |         else:
93 |             features = np.array([])
94 |         return features
95 | 
96 | if __name__ == '__main__':
97 |     pass
98 | 


--------------------------------------------------------------------------------
/demo/tracking/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import torch
 4 | import torchvision.transforms as transforms
 5 | 
 6 | from .model import Net
 7 | 
 8 | 
 9 | class Extractor(object):
10 |     def __init__(self, model_path, use_cuda=True):
11 |         self.net = Net(reid=True)
12 |         self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
13 |         state_dict = torch.load(model_path)['net_dict']
14 |         self.net.load_state_dict(state_dict)
15 |         print("Loading weights from {}... Done!".format(model_path))
16 |         self.net.to(self.device)
17 |         self.norm = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
18 | 
19 |     def __call__(self, img):
20 |         assert isinstance(img, np.ndarray), "type error"
21 |         img = img.astype(np.float)#/255.
22 |         img = cv2.resize(img, (64,128))
23 |         img = torch.from_numpy(img).float().permute(2,0,1)
24 |         img = self.norm(img).unsqueeze(0)
25 |         with torch.no_grad():
26 |             img = img.to(self.device)
27 |             feature = self.net(img)
28 |         return feature.cpu().numpy()
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     img = cv2.imread("demo.jpg")[:,:,(2,1,0)]
33 |     extr = Extractor("checkpoint/ckpt.t7")
34 |     feature = extr(img)
35 |     print(feature.shape)
36 | 


--------------------------------------------------------------------------------
/demo/tracking/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class BasicBlock(nn.Module):
  7 |     def __init__(self, c_in, c_out,is_downsample=False):
  8 |         super(BasicBlock,self).__init__()
  9 |         self.is_downsample = is_downsample
 10 |         if is_downsample:
 11 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
 12 |         else:
 13 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
 14 |         self.bn1 = nn.BatchNorm2d(c_out)
 15 |         self.relu = nn.ReLU(True)
 16 |         self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
 17 |         self.bn2 = nn.BatchNorm2d(c_out)
 18 |         if is_downsample:
 19 |             self.downsample = nn.Sequential(
 20 |                 nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
 21 |                 nn.BatchNorm2d(c_out)
 22 |             )
 23 |         elif c_in != c_out:
 24 |             self.downsample = nn.Sequential(
 25 |                 nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
 26 |                 nn.BatchNorm2d(c_out)
 27 |             )
 28 |             self.is_downsample = True
 29 | 
 30 |     def forward(self,x):
 31 |         y = self.conv1(x)
 32 |         y = self.bn1(y)
 33 |         y = self.relu(y)
 34 |         y = self.conv2(y)
 35 |         y = self.bn2(y)
 36 |         if self.is_downsample:
 37 |             x = self.downsample(x)
 38 |         return F.relu(x.add(y),True)
 39 | 
 40 | def make_layers(c_in,c_out,repeat_times, is_downsample=False):
 41 |     blocks = []
 42 |     for i in range(repeat_times):
 43 |         if i ==0:
 44 |             blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
 45 |         else:
 46 |             blocks += [BasicBlock(c_out,c_out),]
 47 |     return nn.Sequential(*blocks)
 48 | 
 49 | class Net(nn.Module):
 50 |     def __init__(self, num_classes=751 ,reid=False):
 51 |         super(Net,self).__init__()
 52 |         # 3 128 64
 53 |         self.conv = nn.Sequential(
 54 |             nn.Conv2d(3,64,3,stride=1,padding=1),
 55 |             nn.BatchNorm2d(64),
 56 |             nn.ReLU(inplace=True),
 57 |             # nn.Conv2d(32,32,3,stride=1,padding=1),
 58 |             # nn.BatchNorm2d(32),
 59 |             # nn.ReLU(inplace=True),
 60 |             nn.MaxPool2d(3,2,padding=1),
 61 |         )
 62 |         # 32 64 32
 63 |         self.layer1 = make_layers(64,64,2,False)
 64 |         # 32 64 32
 65 |         self.layer2 = make_layers(64,128,2,True)
 66 |         # 64 32 16
 67 |         self.layer3 = make_layers(128,256,2,True)
 68 |         # 128 16 8
 69 |         self.layer4 = make_layers(256,512,2,True)
 70 |         # 256 8 4
 71 |         self.avgpool = nn.AvgPool2d((8,4),1)
 72 |         # 256 1 1 
 73 |         self.reid = reid
 74 |         self.classifier = nn.Sequential(
 75 |             nn.Linear(512, 256),
 76 |             nn.BatchNorm1d(256),
 77 |             nn.ReLU(inplace=True),
 78 |             nn.Dropout(),
 79 |             nn.Linear(256, num_classes),
 80 |         )
 81 |     
 82 |     def forward(self, x):
 83 |         x = self.conv(x)
 84 |         x = self.layer1(x)
 85 |         x = self.layer2(x)
 86 |         x = self.layer3(x)
 87 |         x = self.layer4(x)
 88 |         x = self.avgpool(x)
 89 |         x = x.view(x.size(0),-1)
 90 |         # B x 128
 91 |         if self.reid:
 92 |             x = x.div(x.norm(p=2,dim=1,keepdim=True))
 93 |             return x
 94 |         # classifier
 95 |         x = self.classifier(x)
 96 |         return x
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     net = Net()
101 |     x = torch.randn(4,3,128,64)
102 |     y = net(x)
103 |     import ipdb; ipdb.set_trace()
104 | 


--------------------------------------------------------------------------------
/demo/tracking/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/demo/tracking/sort/__init__.py


--------------------------------------------------------------------------------
/demo/tracking/sort/detection.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 |     """
 7 |     This class represents a bounding box detection in a single image.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     tlwh : array_like
12 |         Bounding box in format `(x, y, w, h)`.
13 |     confidence : float
14 |         Detector confidence score.
15 |     feature : array_like
16 |         A feature vector that describes the object contained in this image.
17 | 
18 |     Attributes
19 |     ----------
20 |     tlwh : ndarray
21 |         Bounding box in format `(top left x, top left y, width, height)`.
22 |     confidence : ndarray
23 |         Detector confidence score.
24 |     feature : ndarray | NoneType
25 |         A feature vector that describes the object contained in this image.
26 | 
27 |     """
28 | 
29 |     def __init__(self, tlwh, confidence, feature):
30 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
31 |         self.confidence = float(confidence)
32 |         self.feature = np.asarray(feature, dtype=np.float32)
33 | 
34 |     def to_tlbr(self):
35 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
36 |         `(top left, bottom right)`.
37 |         """
38 |         ret = self.tlwh.copy()
39 |         ret[2:] += ret[:2]
40 |         return ret
41 | 
42 |     def to_xyah(self):
43 |         """Convert bounding box to format `(center x, center y, aspect ratio,
44 |         height)`, where the aspect ratio is `width / height`.
45 |         """
46 |         ret = self.tlwh.copy()
47 |         ret[:2] += ret[2:] / 2
48 |         ret[2] /= ret[3]
49 |         return ret
50 | 


--------------------------------------------------------------------------------
/demo/tracking/sort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | from __future__ import absolute_import
 3 | 
 4 | import numpy as np
 5 | 
 6 | from . import linear_assignment
 7 | 
 8 | 
 9 | def iou(bbox, candidates):
10 |     """Computer intersection over union.
11 | 
12 |     Parameters
13 |     ----------
14 |     bbox : ndarray
15 |         A bounding box in format `(top left x, top left y, width, height)`.
16 |     candidates : ndarray
17 |         A matrix of candidate bounding boxes (one per row) in the same format
18 |         as `bbox`.
19 | 
20 |     Returns
21 |     -------
22 |     ndarray
23 |         The intersection over union in [0, 1] between the `bbox` and each
24 |         candidate. A higher score means a larger fraction of the `bbox` is
25 |         occluded by the candidate.
26 | 
27 |     """
28 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
29 |     candidates_tl = candidates[:, :2]
30 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
31 | 
32 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
33 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
34 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
35 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
36 |     wh = np.maximum(0., br - tl)
37 | 
38 |     area_intersection = wh.prod(axis=1)
39 |     area_bbox = bbox[2:].prod()
40 |     area_candidates = candidates[:, 2:].prod(axis=1)
41 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
42 | 
43 | 
44 | def iou_cost(tracks, detections, track_indices=None,
45 |              detection_indices=None):
46 |     """An intersection over union distance metric.
47 | 
48 |     Parameters
49 |     ----------
50 |     tracks : List[deep_sort.track.Track]
51 |         A list of tracks.
52 |     detections : List[deep_sort.detection.Detection]
53 |         A list of detections.
54 |     track_indices : Optional[List[int]]
55 |         A list of indices to tracks that should be matched. Defaults to
56 |         all `tracks`.
57 |     detection_indices : Optional[List[int]]
58 |         A list of indices to detections that should be matched. Defaults
59 |         to all `detections`.
60 | 
61 |     Returns
62 |     -------
63 |     ndarray
64 |         Returns a cost matrix of shape
65 |         len(track_indices), len(detection_indices) where entry (i, j) is
66 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
67 | 
68 |     """
69 |     if track_indices is None:
70 |         track_indices = np.arange(len(tracks))
71 |     if detection_indices is None:
72 |         detection_indices = np.arange(len(detections))
73 | 
74 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
75 |     for row, track_idx in enumerate(track_indices):
76 |         if tracks[track_idx].time_since_update > 1:
77 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
78 |             continue
79 | 
80 |         bbox = tracks[track_idx].to_tlwh()
81 |         candidates = np.asarray([detections[i].tlwh for i in detection_indices])
82 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
83 |     return cost_matrix
84 | 


--------------------------------------------------------------------------------
/demo/tracking/sort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import cv2
 3 | import numpy as np
 4 | 
 5 | 
 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None):
 7 |     """Suppress overlapping detections.
 8 | 
 9 |     Original code from [1]_ has been adapted to include confidence score.
10 | 
11 |     .. [1] http://www.pyimagesearch.com/2015/02/16/
12 |            faster-non-maximum-suppression-python/
13 | 
14 |     Examples
15 |     --------
16 | 
17 |         >>> boxes = [d.roi for d in detections]
18 |         >>> scores = [d.confidence for d in detections]
19 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20 |         >>> detections = [detections[i] for i in indices]
21 | 
22 |     Parameters
23 |     ----------
24 |     boxes : ndarray
25 |         Array of ROIs (x, y, width, height).
26 |     max_bbox_overlap : float
27 |         ROIs that overlap more than this values are suppressed.
28 |     scores : Optional[array_like]
29 |         Detector confidence score.
30 | 
31 |     Returns
32 |     -------
33 |     List[int]
34 |         Returns indices of detections that have survived non-maxima suppression.
35 | 
36 |     """
37 |     if len(boxes) == 0:
38 |         return []
39 | 
40 |     boxes = boxes.astype(np.float)
41 |     pick = []
42 | 
43 |     x1 = boxes[:, 0]
44 |     y1 = boxes[:, 1]
45 |     x2 = boxes[:, 2] + boxes[:, 0]
46 |     y2 = boxes[:, 3] + boxes[:, 1]
47 | 
48 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
49 |     if scores is not None:
50 |         idxs = np.argsort(scores)
51 |     else:
52 |         idxs = np.argsort(y2)
53 | 
54 |     while len(idxs) > 0:
55 |         last = len(idxs) - 1
56 |         i = idxs[last]
57 |         pick.append(i)
58 | 
59 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
60 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
61 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
62 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
63 | 
64 |         w = np.maximum(0, xx2 - xx1 + 1)
65 |         h = np.maximum(0, yy2 - yy1 + 1)
66 | 
67 |         overlap = (w * h) / area[idxs[:last]]
68 | 
69 |         idxs = np.delete(
70 |             idxs, np.concatenate(
71 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
72 | 
73 |     return pick
74 | 


--------------------------------------------------------------------------------
/demo/tracking/sort/track.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | 
  3 | 
  4 | class TrackState:
  5 |     """
  6 |     Enumeration type for the single target track state. Newly created tracks are
  7 |     classified as `tentative` until enough evidence has been collected. Then,
  8 |     the track state is changed to `confirmed`. Tracks that are no longer alive
  9 |     are classified as `deleted` to mark them for removal from the set of active
 10 |     tracks.
 11 | 
 12 |     """
 13 | 
 14 |     Tentative = 1
 15 |     Confirmed = 2
 16 |     Deleted = 3
 17 | 
 18 | 
 19 | class Track:
 20 |     """
 21 |     A single target track with state space `(x, y, a, h)` and associated
 22 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
 23 |     aspect ratio and `h` is the height.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     mean : ndarray
 28 |         Mean vector of the initial state distribution.
 29 |     covariance : ndarray
 30 |         Covariance matrix of the initial state distribution.
 31 |     track_id : int
 32 |         A unique track identifier.
 33 |     n_init : int
 34 |         Number of consecutive detections before the track is confirmed. The
 35 |         track state is set to `Deleted` if a miss occurs within the first
 36 |         `n_init` frames.
 37 |     max_age : int
 38 |         The maximum number of consecutive misses before the track state is
 39 |         set to `Deleted`.
 40 |     feature : Optional[ndarray]
 41 |         Feature vector of the detection this track originates from. If not None,
 42 |         this feature is added to the `features` cache.
 43 | 
 44 |     Attributes
 45 |     ----------
 46 |     mean : ndarray
 47 |         Mean vector of the initial state distribution.
 48 |     covariance : ndarray
 49 |         Covariance matrix of the initial state distribution.
 50 |     track_id : int
 51 |         A unique track identifier.
 52 |     hits : int
 53 |         Total number of measurement updates.
 54 |     age : int
 55 |         Total number of frames since first occurance.
 56 |     time_since_update : int
 57 |         Total number of frames since last measurement update.
 58 |     state : TrackState
 59 |         The current track state.
 60 |     features : List[ndarray]
 61 |         A cache of features. On each measurement update, the associated feature
 62 |         vector is added to this list.
 63 | 
 64 |     """
 65 | 
 66 |     def __init__(self, mean, covariance, track_id, n_init, max_age,
 67 |                  feature=None):
 68 |         self.mean = mean
 69 |         self.covariance = covariance
 70 |         self.track_id = track_id
 71 |         self.hits = 1
 72 |         self.age = 1
 73 |         self.time_since_update = 0
 74 | 
 75 |         self.state = TrackState.Tentative
 76 |         self.features = []
 77 |         if feature is not None:
 78 |             self.features.append(feature)
 79 | 
 80 |         self._n_init = n_init
 81 |         self._max_age = max_age
 82 | 
 83 |     def to_tlwh(self):
 84 |         """Get current position in bounding box format `(top left x, top left y,
 85 |         width, height)`.
 86 | 
 87 |         Returns
 88 |         -------
 89 |         ndarray
 90 |             The bounding box.
 91 | 
 92 |         """
 93 |         ret = self.mean[:4].copy()
 94 |         ret[2] *= ret[3]
 95 |         ret[:2] -= ret[2:] / 2
 96 |         return ret
 97 | 
 98 |     def to_tlbr(self):
 99 |         """Get current position in bounding box format `(min x, miny, max x,
100 |         max y)`.
101 | 
102 |         Returns
103 |         -------
104 |         ndarray
105 |             The bounding box.
106 | 
107 |         """
108 |         ret = self.to_tlwh()
109 |         ret[2:] = ret[:2] + ret[2:]
110 |         return ret
111 | 
112 |     def predict(self, kf):
113 |         """Propagate the state distribution to the current time step using a
114 |         Kalman filter prediction step.
115 | 
116 |         Parameters
117 |         ----------
118 |         kf : kalman_filter.KalmanFilter
119 |             The Kalman filter.
120 | 
121 |         """
122 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
123 |         self.age += 1
124 |         self.time_since_update += 1
125 | 
126 |     def update(self, kf, detection):
127 |         """Perform Kalman filter measurement update step and update the feature
128 |         cache.
129 | 
130 |         Parameters
131 |         ----------
132 |         kf : kalman_filter.KalmanFilter
133 |             The Kalman filter.
134 |         detection : Detection
135 |             The associated detection.
136 | 
137 |         """
138 |         self.mean, self.covariance = kf.update(
139 |             self.mean, self.covariance, detection.to_xyah())
140 |         self.features.append(detection.feature)
141 | 
142 |         self.hits += 1
143 |         self.time_since_update = 0
144 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
145 |             self.state = TrackState.Confirmed
146 | 
147 |     def mark_missed(self):
148 |         """Mark this track as missed (no association at the current time step).
149 |         """
150 |         if self.state == TrackState.Tentative:
151 |             self.state = TrackState.Deleted
152 |         elif self.time_since_update > self._max_age:
153 |             self.state = TrackState.Deleted
154 | 
155 |     def is_tentative(self):
156 |         """Returns True if this track is tentative (unconfirmed).
157 |         """
158 |         return self.state == TrackState.Tentative
159 | 
160 |     def is_confirmed(self):
161 |         """Returns True if this track is confirmed."""
162 |         return self.state == TrackState.Confirmed
163 | 
164 |     def is_deleted(self):
165 |         """Returns True if this track is dead and should be deleted."""
166 |         return self.state == TrackState.Deleted
167 | 


--------------------------------------------------------------------------------
/demo/tracking/sort/tracker.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | 
  4 | import numpy as np
  5 | 
  6 | from . import iou_matching, kalman_filter, linear_assignment
  7 | from .track import Track
  8 | 
  9 | 
 10 | class Tracker:
 11 |     """
 12 |     This is the multi-target tracker.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     metric : nn_matching.NearestNeighborDistanceMetric
 17 |         A distance metric for measurement-to-track association.
 18 |     max_age : int
 19 |         Maximum number of missed misses before a track is deleted.
 20 |     n_init : int
 21 |         Number of consecutive detections before the track is confirmed. The
 22 |         track state is set to `Deleted` if a miss occurs within the first
 23 |         `n_init` frames.
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     metric : nn_matching.NearestNeighborDistanceMetric
 28 |         The distance metric used for measurement to track association.
 29 |     max_age : int
 30 |         Maximum number of missed misses before a track is deleted.
 31 |     n_init : int
 32 |         Number of frames that a track remains in initialization phase.
 33 |     kf : kalman_filter.KalmanFilter
 34 |         A Kalman filter to filter target trajectories in image space.
 35 |     tracks : List[Track]
 36 |         The list of active tracks at the current time step.
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self, metric, max_iou_distance=0.7, max_age=30, n_init=3):
 41 |         self.metric = metric
 42 |         self.max_iou_distance = max_iou_distance
 43 |         self.max_age = max_age
 44 |         self.n_init = n_init
 45 | 
 46 |         self.kf = kalman_filter.KalmanFilter()
 47 |         self.tracks = []
 48 |         self._next_id = 1
 49 | 
 50 |     def predict(self):
 51 |         """Propagate track state distributions one time step forward.
 52 | 
 53 |         This function should be called once every time step, before `update`.
 54 |         """
 55 |         for track in self.tracks:
 56 |             track.predict(self.kf)
 57 | 
 58 |     def update(self, detections):
 59 |         """Perform measurement update and track management.
 60 | 
 61 |         Parameters
 62 |         ----------
 63 |         detections : List[deep_sort.detection.Detection]
 64 |             A list of detections at the current time step.
 65 | 
 66 |         """
 67 |         # Run matching cascade.
 68 |         matches, unmatched_tracks, unmatched_detections = \
 69 |             self._match(detections)
 70 | 
 71 |         # Update track set.
 72 |         for track_idx, detection_idx in matches:
 73 |             self.tracks[track_idx].update(
 74 |                 self.kf, detections[detection_idx])
 75 |         for track_idx in unmatched_tracks:
 76 |             self.tracks[track_idx].mark_missed()
 77 |         for detection_idx in unmatched_detections:
 78 |             self._initiate_track(detections[detection_idx])
 79 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
 80 | 
 81 |         # Update distance metric.
 82 |         active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
 83 |         features, targets = [], []
 84 |         for track in self.tracks:
 85 |             if not track.is_confirmed():
 86 |                 continue
 87 |             features += track.features
 88 |             targets += [track.track_id for _ in track.features]
 89 |             track.features = []
 90 |         self.metric.partial_fit(
 91 |             np.asarray(features), np.asarray(targets), active_targets)
 92 | 
 93 |     def _match(self, detections):
 94 | 
 95 |         def gated_metric(tracks, dets, track_indices, detection_indices):
 96 |             features = np.array([dets[i].feature for i in detection_indices])
 97 |             targets = np.array([tracks[i].track_id for i in track_indices])
 98 |             cost_matrix = self.metric.distance(features, targets)
 99 |             cost_matrix = linear_assignment.gate_cost_matrix(
100 |                 self.kf, cost_matrix, tracks, dets, track_indices,
101 |                 detection_indices)
102 | 
103 |             return cost_matrix
104 | 
105 |         # Split track set into confirmed and unconfirmed tracks.
106 |         confirmed_tracks = [
107 |             i for i, t in enumerate(self.tracks) if t.is_confirmed()]
108 |         unconfirmed_tracks = [
109 |             i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
110 | 
111 |         # Associate confirmed tracks using appearance features.
112 |         matches_a, unmatched_tracks_a, unmatched_detections = \
113 |             linear_assignment.matching_cascade(
114 |                 gated_metric, self.metric.matching_threshold, self.max_age,
115 |                 self.tracks, detections, confirmed_tracks)
116 | 
117 |         # Associate remaining tracks together with unconfirmed tracks using IOU.
118 |         iou_track_candidates = unconfirmed_tracks + [
119 |             k for k in unmatched_tracks_a if
120 |             self.tracks[k].time_since_update == 1]
121 |         unmatched_tracks_a = [
122 |             k for k in unmatched_tracks_a if
123 |             self.tracks[k].time_since_update != 1]
124 |         matches_b, unmatched_tracks_b, unmatched_detections = \
125 |             linear_assignment.min_cost_matching(
126 |                 iou_matching.iou_cost, self.max_iou_distance, self.tracks,
127 |                 detections, iou_track_candidates, unmatched_detections)
128 | 
129 |         matches = matches_a + matches_b
130 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
131 |         return matches, unmatched_tracks, unmatched_detections
132 | 
133 |     def _initiate_track(self, detection):
134 |         mean, covariance = self.kf.initiate(detection.to_xyah())
135 |         self.tracks.append(Track(
136 |             mean, covariance, self._next_id, self.n_init, self.max_age,
137 |             detection.feature))
138 |         self._next_id += 1
139 | 


--------------------------------------------------------------------------------
/demo/tracking/util.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | COLORS_10 =[(144,238,144),(178, 34, 34),(221,160,221),(  0,255,  0),(  0,128,  0),(210,105, 30),(220, 20, 60),
 5 |             (192,192,192),(255,228,196),( 50,205, 50),(139,  0,139),(100,149,237),(138, 43,226),(238,130,238),
 6 |             (255,  0,255),(  0,100,  0),(127,255,  0),(255,  0,255),(  0,  0,205),(255,140,  0),(255,239,213),
 7 |             (199, 21,133),(124,252,  0),(147,112,219),(106, 90,205),(176,196,222),( 65,105,225),(173,255, 47),
 8 |             (255, 20,147),(219,112,147),(186, 85,211),(199, 21,133),(148,  0,211),(255, 99, 71),(144,238,144),
 9 |             (255,255,  0),(230,230,250),(  0,  0,255),(128,128,  0),(189,183,107),(255,255,224),(128,128,128),
10 |             (105,105,105),( 64,224,208),(205,133, 63),(  0,128,128),( 72,209,204),(139, 69, 19),(255,245,238),
11 |             (250,240,230),(152,251,152),(  0,255,255),(135,206,235),(  0,191,255),(176,224,230),(  0,250,154),
12 |             (245,255,250),(240,230,140),(245,222,179),(  0,139,139),(143,188,143),(255,  0,  0),(240,128,128),
13 |             (102,205,170),( 60,179,113),( 46,139, 87),(165, 42, 42),(178, 34, 34),(175,238,238),(255,248,220),
14 |             (218,165, 32),(255,250,240),(253,245,230),(244,164, 96),(210,105, 30)]
15 | 
16 | 
17 | def draw_bbox(img, box, cls_name, identity=None, offset=(0,0)):
18 |     '''
19 |         draw box of an id
20 |     '''
21 |     x1,y1,x2,y2 = [int(i+offset[idx%2]) for idx,i in enumerate(box)]
22 |     # set color and label text
23 |     color = COLORS_10[identity%len(COLORS_10)] if identity is not None else COLORS_10[0]
24 |     label = '{} {}'.format(cls_name, identity)
25 |     # box text and bar
26 |     t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
27 |     cv2.rectangle(img,(x1, y1),(x2,y2),color,2)
28 |     cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
29 |     cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 1)
30 |     return img
31 | 
32 | 
33 | def draw_bboxes(img, bbox, identities=None, offset=(0,0)):
34 |     for i,box in enumerate(bbox):
35 |         x1,y1,x2,y2 = [int(i) for i in box]
36 |         x1 += offset[0]
37 |         x2 += offset[0]
38 |         y1 += offset[1]
39 |         y2 += offset[1]
40 |         # box text and bar
41 |         id = int(identities[i]) if identities is not None else 0    
42 |         color = COLORS_10[id%len(COLORS_10)]
43 |         label = '{} {}'.format("object", id)
44 |         t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
45 |         cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
46 |         cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
47 |         cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
48 |     return img
49 | 
50 | def softmax(x):
51 |     assert isinstance(x, np.ndarray), "expect x be a numpy array"
52 |     x_exp = np.exp(x*5)
53 |     return x_exp/x_exp.sum()
54 | 
55 | def softmin(x):
56 |     assert isinstance(x, np.ndarray), "expect x be a numpy array"
57 |     x_exp = np.exp(-x)
58 |     return x_exp/x_exp.sum()
59 | 
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     x = np.arange(10)/10.
64 |     x = np.array([0.5,0.5,0.5,0.6,1.])
65 |     y = softmax(x)
66 |     z = softmin(x)
67 |     import ipdb; ipdb.set_trace()
68 | 


--------------------------------------------------------------------------------
/experiments/darknet53_512x512.yaml:
--------------------------------------------------------------------------------
  1 | SAMPLE_METHOD: 'coco_hp'
  2 | DATA_DIR: '/home/tensorboy/data'
  3 | EXP_ID: 'darknet53'
  4 | DEBUG: 0
  5 | DEBUG_THEME: 'white'
  6 | SEED: 317
  7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/darknet53'
  8 | LOG_DIR: ''
  9 | EXPERIMENT_NAME: ''
 10 | GPUS: [0, 1, 2, 3]
 11 | WORKERS: 4
 12 | PRINT_FREQ: 0
 13 | PIN_MEMORY: true
 14 | RANK: 0
 15 | SAVE_RESULTS: true
 16 | TASK : 'multi_pose'
 17 | 
 18 | CUDNN:
 19 |   BENCHMARK: true
 20 | 
 21 | MODEL:
 22 | 
 23 |   CENTER_THRESH: 0.1
 24 |   NUM_CLASSES: 1
 25 |   NAME: 'darknet'
 26 |   HEADS_NAME: 'keypoint'
 27 |   HEADS_NUM: [1, 2, 34, 2, 17, 2]
 28 |   INTERMEDIATE_CHANNEL: 256  
 29 |   HEAD_CONV: 256
 30 |   DOWN_RATIO: 4
 31 |   NUM_STACKS: 1
 32 |   INPUT_RES: 512
 33 |   OUTPUT_RES: 128
 34 |   INPUT_H: 512
 35 |   INPUT_W: 512
 36 |   PAD: 31
 37 |   NUM_KEYPOINTS: 17
 38 |   TAG_PER_JOINT: true
 39 |   TARGET_TYPE: 'gaussian'
 40 |   SIGMA: 2    
 41 | 
 42 | LOSS:
 43 |   METRIC: 'loss'
 44 |   MSE_LOSS: false
 45 |   REG_LOSS: 'l1'
 46 |   USE_OHKM: false
 47 |   TOPK: 8
 48 |   USE_TARGET_WEIGHT: true
 49 |   USE_DIFFERENT_JOINTS_WEIGHT: false
 50 |   HP_WEIGHT: 1.
 51 |   HM_HP_WEIGHT: 1.
 52 |   DENSE_HP: false
 53 |   HM_HP: true
 54 |   REG_BBOX: true
 55 |   WH_WEIGHT: 0.1
 56 |   REG_OFFSET: true  
 57 |   OFF_WEIGHT: 1.
 58 |   REG_HP_OFFSET: true
 59 |   
 60 | DATASET:
 61 |   DATASET: 'coco_hp'
 62 |   TRAIN_SET: 'train'
 63 |   TEST_SET: 'valid'
 64 |   TRAIN_IMAGE_DIR: 'images/train2017'
 65 |   TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
 66 |   VAL_IMAGE_DIR: 'images/val2017'
 67 |   VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
 68 | 
 69 |   # training data augmentation
 70 |   MEAN: [0.408, 0.447, 0.470]
 71 |   STD: [0.289, 0.274, 0.278]
 72 |   SHIFT: 0.1
 73 |   SCALE: 0.4
 74 |   ROTATE: 0.
 75 |   # for pose
 76 |   AUG_ROT: 0.
 77 |   FLIP: 0.5
 78 |   NO_COLOR_AUG: false
 79 | 
 80 |   ROT_FACTOR: 30
 81 |   SCALE_MIN: 0.5
 82 |   SCALE_MAX: 1.1
 83 |   IMAGE_SIZE: 512
 84 |   RANDOM_CROP: true 
 85 |   
 86 | TRAIN:
 87 |   OPTIMIZER: 'adam'
 88 |   DISTRIBUTE: true
 89 |   LOCAL_RANK: 0
 90 |   HIDE_DATA_TIME: false 
 91 |   SAVE_ALL_MODEL: false
 92 |   RESUME: false
 93 |   LR_FACTOR: 0.1
 94 |   LR_STEP: [270, 300]
 95 |   EPOCHS: 320
 96 |   NUM_ITERS: -1
 97 |   LR: 1.875e-4
 98 |   BATCH_SIZE: 48
 99 |   MASTER_BATCH_SIZE: 12
100 | 
101 |   MOMENTUM: 0.9
102 |   WD: 0.0001
103 |   NESTEROV: false
104 |   GAMMA1: 0.99
105 |   GAMMA2: 0.0
106 | 
107 |   # 'apply and reset gradients every n batches'
108 |   STRIDE_APPLY: 1
109 | 
110 |   CHECKPOINT: ''
111 |   SHUFFLE: true
112 |   VAL_INTERVALS: 1
113 |   TRAINVAL: false
114 |  
115 | TEST:
116 |   # Test Model Epoch
117 |   MODEL_PATH: '/home/tensorboy/data/centerpose/darknet53/model_best.pth'
118 |   TASK: 'multi_pose'
119 |   FLIP_TEST: true
120 | 
121 |   DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
122 |   MODEL_FILE: ''
123 |   TEST_SCALES: [1]
124 |   IMAGE_THRE: 0.1
125 |   TOPK: 100
126 |   NMS: false
127 |   NMS_THRE: 0.5
128 |   NOT_PREFETCH_TEST: false
129 |   FIX_RES: true
130 | 
131 |   SOFT_NMS: false
132 |   OKS_THRE: 0.5
133 |   VIS_THRESH: 0.3
134 |   KEYPOINT_THRESH: 0.2
135 |   NUM_MIN_KPT: 4
136 |   THRESH_HUMAN: 0.5
137 | 
138 |   EVAL_ORACLE_HM: false
139 |   EVAL_ORACLE_WH: false
140 |   EVAL_ORACLE_OFFSET: false
141 |   EVAL_ORACLE_KPS: false
142 |   EVAL_ORACLE_HMHP: false
143 |   EVAL_ORACLE_HP_OFFSET: false
144 |   EVAL_ORACLE_DEP: false
145 | 


--------------------------------------------------------------------------------
/experiments/dla_34_512x512.yaml:
--------------------------------------------------------------------------------
  1 | SAMPLE_METHOD: 'coco_hp'
  2 | DATA_DIR: '/data'
  3 | EXP_ID: 'sgd_lr6e3'
  4 | DEBUG: 0
  5 | DEBUG_THEME: 'white'
  6 | SEED: 317
  7 | OUTPUT_DIR: '/data/centerpose/dla34_lr6e3'
  8 | LOG_DIR: ''
  9 | EXPERIMENT_NAME: ''
 10 | GPUS: [0, 1, 2, 3]
 11 | WORKERS: 8
 12 | PRINT_FREQ: 0
 13 | PIN_MEMORY: true
 14 | RANK: 0
 15 | SAVE_RESULTS: true
 16 | 
 17 | CUDNN:
 18 |   BENCHMARK: true
 19 | 
 20 | MODEL:
 21 |   INIT_WEIGHTS: false
 22 |   PRETRAINED: ''
 23 |   CENTER_THRESH: 0.1
 24 |   NUM_CLASSES: 1
 25 |   NAME: 'dla_34'
 26 |   HEADS_NAME: 'keypoint'
 27 |   HEADS_NUM: [1, 2, 34, 2, 17, 2]
 28 |   HEAD_CONV: 256
 29 |   INTERMEDIATE_CHANNEL: 64  
 30 |   DOWN_RATIO: 4
 31 |   NUM_STACKS: 1
 32 |   INPUT_RES: 512
 33 |   OUTPUT_RES: 128
 34 |   INPUT_H: 512
 35 |   INPUT_W: 512
 36 |   PAD: 31
 37 |   NUM_KEYPOINTS: 17
 38 |   TAG_PER_JOINT: true
 39 |   TARGET_TYPE: 'gaussian'
 40 |   SIGMA: 2    
 41 | 
 42 | LOSS:
 43 |   METRIC: 'loss'
 44 |   MSE_LOSS: false
 45 |   REG_LOSS: 'l1'
 46 |   USE_OHKM: false
 47 |   TOPK: 8
 48 |   USE_TARGET_WEIGHT: true
 49 |   USE_DIFFERENT_JOINTS_WEIGHT: false
 50 |   HP_WEIGHT: 1.
 51 |   HM_HP_WEIGHT: 1.
 52 |   DENSE_HP: false
 53 |   HM_HP: true
 54 |   REG_BBOX: true
 55 |   WH_WEIGHT: 0.1
 56 |   REG_OFFSET: true  
 57 |   OFF_WEIGHT: 1.
 58 |   REG_HP_OFFSET: true
 59 |   HM_HP_WEIGHT: 1.
 60 |   
 61 | DATASET:
 62 |   DATASET: 'coco_hp'
 63 |   TRAIN_SET: 'train'
 64 |   TEST_SET: 'valid'
 65 |   TRAIN_IMAGE_DIR: 'images/train2017'
 66 |   TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
 67 |   VAL_IMAGE_DIR: 'images/val2017'
 68 |   VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
 69 | 
 70 |   # training data augmentation
 71 |   MEAN: [0.408, 0.447, 0.470]
 72 |   STD: [0.289, 0.274, 0.278]
 73 |   SHIFT: 0.1
 74 |   SCALE: 0.4
 75 |   ROTATE: 0.
 76 |   # for pose
 77 |   AUG_ROT: 0.
 78 |   FLIP: 0.5
 79 |   NO_COLOR_AUG: false
 80 | 
 81 |   ROT_FACTOR: 30
 82 |   SCALE_MIN: 0.5
 83 |   SCALE_MAX: 1.1
 84 |   IMAGE_SIZE: 512
 85 |   RANDOM_CROP: true 
 86 |   
 87 | TRAIN:
 88 |   DISTRIBUTE: true
 89 |   OPTIMIZER: 'adam'
 90 |   LOCAL_RANK: 0
 91 |   HIDE_DATA_TIME: false 
 92 |   SAVE_ALL_MODEL: false
 93 |   RESUME: false
 94 |   LR_FACTOR: 0.1
 95 |   LR_STEP: [270, 300]
 96 |   EPOCHS: 320
 97 |   NUM_ITERS: -1
 98 |   LR: 2.8125e-3
 99 |   BATCH_SIZE: 72
100 |   MASTER_BATCH_SIZE: 18
101 | 
102 |   MOMENTUM: 0.9
103 |   WD: 0.0001
104 |   NESTEROV: false
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 | 
108 |   # 'apply and reset gradients every n batches'
109 |   STRIDE_APPLY: 1
110 |   CHECKPOINT: ''
111 |   SHUFFLE: true
112 |   VAL_INTERVALS: 1
113 |   TRAINVAL: false
114 |  
115 | TEST:
116 |   # Test Model Epoch
117 |   MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/dla34_best.pth'
118 |   TASK: 'multi_pose'
119 |   FLIP_TEST: true
120 | 
121 |   DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
122 |   MODEL_FILE: ''
123 |   TEST_SCALES: [1]
124 |   IMAGE_THRE: 0.1
125 |   TOPK: 100
126 |   NMS: true
127 |   NMS_THRE: 0.5
128 |   NOT_PREFETCH_TEST: false
129 |   FIX_RES: false
130 | 
131 |   SOFT_NMS: false
132 |   OKS_THRE: 0.5
133 |   VIS_THRESH: 0.3
134 |   KEYPOINT_THRESH: 0.2
135 |   NUM_MIN_KPT: 4
136 |   THRESH_HUMAN: 0.4
137 | 
138 |   EVAL_ORACLE_HM: false
139 |   EVAL_ORACLE_WH: false
140 |   EVAL_ORACLE_OFFSET: false
141 |   EVAL_ORACLE_KPS: false
142 |   EVAL_ORACLE_HMHP: false
143 |   EVAL_ORACLE_HP_OFFSET: false
144 |   EVAL_ORACLE_DEP: false
145 | 


--------------------------------------------------------------------------------
/experiments/efficientdet_512x512.yaml:
--------------------------------------------------------------------------------
  1 | SAMPLE_METHOD: 'coco_hp'
  2 | DATA_DIR: '/home/tensorboy/data'
  3 | EXP_ID: 'coco_pose_efficientdet'
  4 | DEBUG: 0
  5 | DEBUG_THEME: 'white'
  6 | SEED: 317
  7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/efficientdet'
  8 | LOG_DIR: ''
  9 | EXPERIMENT_NAME: ''
 10 | GPUS: [0, 1, 2, 3]
 11 | WORKERS: 8
 12 | PRINT_FREQ: 0
 13 | PIN_MEMORY: true
 14 | RANK: 0
 15 | SAVE_RESULTS: true
 16 | TASK : 'multi_pose'
 17 | 
 18 | CUDNN:
 19 |   BENCHMARK: true
 20 | 
 21 | MODEL:
 22 |   INIT_WEIGHTS: false
 23 |   PRETRAINED: ''
 24 |   CENTER_THRESH: 0.1
 25 |   NUM_CLASSES: 1
 26 |   NAME: 'efficientdet'
 27 |   HEADS_NAME: 'keypoint'
 28 |   HEADS_NUM: [1, 2, 34, 2, 17, 2]
 29 |   HEAD_CONV: 64
 30 |   INTERMEDIATE_CHANNEL: 172
 31 |   DOWN_RATIO: 4
 32 |   NUM_STACKS: 1
 33 |   INPUT_RES: 512
 34 |   OUTPUT_RES: 128
 35 |   INPUT_H: 512
 36 |   INPUT_W: 512
 37 |   PAD: 31
 38 |   NUM_KEYPOINTS: 17
 39 |   TAG_PER_JOINT: true
 40 |   TARGET_TYPE: 'gaussian'
 41 |   SIGMA: 2    
 42 | 
 43 | LOSS:
 44 |   METRIC: 'loss'
 45 |   MSE_LOSS: false
 46 |   REG_LOSS: 'l1'
 47 |   USE_OHKM: false
 48 |   TOPK: 8
 49 |   USE_TARGET_WEIGHT: true
 50 |   USE_DIFFERENT_JOINTS_WEIGHT: false
 51 |   HP_WEIGHT: 1.
 52 |   HM_HP_WEIGHT: 1.
 53 |   DENSE_HP: false
 54 |   HM_HP: true
 55 |   REG_BBOX: true
 56 |   WH_WEIGHT: 0.1
 57 |   REG_OFFSET: true  
 58 |   OFF_WEIGHT: 1.
 59 |   REG_HP_OFFSET: true
 60 |   
 61 | DATASET:
 62 |   DATASET: 'coco_hp'
 63 |   TRAIN_SET: 'train'
 64 |   TEST_SET: 'valid'
 65 |   TRAIN_IMAGE_DIR: 'images/train2017'
 66 |   TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
 67 |   VAL_IMAGE_DIR: 'images/val2017'
 68 |   VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
 69 | 
 70 |   # training data augmentation
 71 |   MEAN: [0.408, 0.447, 0.470]
 72 |   STD: [0.289, 0.274, 0.278]
 73 |   SHIFT: 0.1
 74 |   SCALE: 0.4
 75 |   ROTATE: 0.
 76 |   # for pose
 77 |   AUG_ROT: 0.
 78 |   FLIP: 0.5
 79 |   NO_COLOR_AUG: false
 80 | 
 81 |   ROT_FACTOR: 30
 82 |   SCALE_MIN: 0.5
 83 |   SCALE_MAX: 1.1
 84 |   IMAGE_SIZE: 512
 85 |   RANDOM_CROP: true 
 86 |   
 87 | TRAIN:
 88 |   DISTRIBUTE: true
 89 |   OPTIMIZER: 'adam'  
 90 |   LOCAL_RANK: 0
 91 |   HIDE_DATA_TIME: false 
 92 |   SAVE_ALL_MODEL: false
 93 |   RESUME: false
 94 |   LR_FACTOR: 0.1
 95 |   LR_STEP: [270, 300]
 96 |   EPOCHS: 320
 97 |   NUM_ITERS: -1
 98 |   LR: 2.1875e-4
 99 |   BATCH_SIZE: 56
100 |   MASTER_BATCH_SIZE: 14
101 | 
102 |   MOMENTUM: 0.9
103 |   WD: 0.0001
104 |   NESTEROV: false
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 | 
108 |   # 'apply and reset gradients every n batches'
109 |   STRIDE_APPLY: 1
110 |   CHECKPOINT: ''
111 |   SHUFFLE: true
112 |   VAL_INTERVALS: 1
113 |   TRAINVAL: false
114 |  
115 | TEST:
116 |   # Test Model Epoch
117 |   MODEL_PATH: '/home/tensorboy/data/centerpose/efficientdet/model_best.pth'
118 |   TASK: 'multi_pose'
119 |   FLIP_TEST: false
120 | 
121 |   DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
122 |   MODEL_FILE: ''
123 |   TEST_SCALES: [1]
124 |   IMAGE_THRE: 0.1
125 |   TOPK: 32
126 |   NMS: false
127 |   NMS_THRE: 0.5
128 |   NOT_PREFETCH_TEST: false
129 |   FIX_RES: true
130 | 
131 |   SOFT_NMS: false
132 |   OKS_THRE: 0.5
133 |   VIS_THRESH: 0.3
134 |   KEYPOINT_THRESH: 0.2
135 |   NUM_MIN_KPT: 4
136 |   THRESH_HUMAN: 0.5
137 | 
138 |   EVAL_ORACLE_HM: false
139 |   EVAL_ORACLE_WH: false
140 |   EVAL_ORACLE_OFFSET: false
141 |   EVAL_ORACLE_KPS: false
142 |   EVAL_ORACLE_HMHP: false
143 |   EVAL_ORACLE_HP_OFFSET: false
144 |   EVAL_ORACLE_DEP: false
145 | 


--------------------------------------------------------------------------------
/experiments/ghost_net.yaml:
--------------------------------------------------------------------------------
  1 | SAMPLE_METHOD: 'coco_hp'
  2 | DATA_DIR: '/data'
  3 | EXP_ID: 'ghostnet'
  4 | DEBUG: 0
  5 | DEBUG_THEME: 'white'
  6 | SEED: 317
  7 | OUTPUT_DIR: '/data/centerpose/ghostnet'
  8 | LOG_DIR: ''
  9 | EXPERIMENT_NAME: ''
 10 | GPUS: [0, 1, 2, 3]
 11 | WORKERS: 8
 12 | PRINT_FREQ: 0
 13 | PIN_MEMORY: true
 14 | RANK: 0
 15 | SAVE_RESULTS: true
 16 | 
 17 | CUDNN:
 18 |   BENCHMARK: true
 19 | 
 20 | MODEL:
 21 |   INIT_WEIGHTS: false
 22 |   PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth'
 23 |   CENTER_THRESH: 0.1
 24 |   NUM_CLASSES: 1
 25 |   NAME: 'ghostnet'
 26 |   HEADS_NAME: 'keypoint'
 27 |   HEADS_NUM: [1, 2, 34, 2, 17, 2]
 28 |   HEAD_CONV: 256
 29 |   INTERMEDIATE_CHANNEL: 160
 30 |   DOWN_RATIO: 4
 31 |   NUM_STACKS: 1
 32 |   INPUT_RES: 512
 33 |   OUTPUT_RES: 128
 34 |   INPUT_H: 512
 35 |   INPUT_W: 512
 36 |   PAD: 31
 37 |   NUM_KEYPOINTS: 17
 38 |   TAG_PER_JOINT: true
 39 |   TARGET_TYPE: 'gaussian'
 40 |   SIGMA: 2    
 41 | 
 42 | LOSS:
 43 |   METRIC: 'loss'
 44 |   MSE_LOSS: false
 45 |   REG_LOSS: 'l1'
 46 |   USE_OHKM: false
 47 |   TOPK: 8
 48 |   USE_TARGET_WEIGHT: true
 49 |   USE_DIFFERENT_JOINTS_WEIGHT: false
 50 |   HP_WEIGHT: 1.
 51 |   HM_HP_WEIGHT: 1.
 52 |   DENSE_HP: false
 53 |   HM_HP: true
 54 |   REG_BBOX: true
 55 |   WH_WEIGHT: 0.1
 56 |   REG_OFFSET: true  
 57 |   OFF_WEIGHT: 1.
 58 |   REG_HP_OFFSET: true
 59 |   HM_HP_WEIGHT: 1.
 60 |   
 61 | DATASET:
 62 |   DATASET: 'coco_hp'
 63 |   TRAIN_SET: 'train'
 64 |   TEST_SET: 'valid'
 65 |   TRAIN_IMAGE_DIR: 'images/train2017'
 66 |   TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
 67 |   VAL_IMAGE_DIR: 'images/val2017'
 68 |   VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
 69 | 
 70 |   # training data augmentation
 71 |   MEAN: [0.408, 0.447, 0.470]
 72 |   STD: [0.289, 0.274, 0.278]
 73 |   SHIFT: 0.1
 74 |   SCALE: 0.4
 75 |   ROTATE: 0.
 76 |   # for pose
 77 |   AUG_ROT: 0.
 78 |   FLIP: 0.5
 79 |   NO_COLOR_AUG: false
 80 | 
 81 |   ROT_FACTOR: 30
 82 |   SCALE_MIN: 0.5
 83 |   SCALE_MAX: 1.1
 84 |   IMAGE_SIZE: 512
 85 |   RANDOM_CROP: true 
 86 |   
 87 | TRAIN:
 88 |   OPTIMIZER: 'adam'
 89 |   DISTRIBUTE: true
 90 |   LOCAL_RANK: 0  
 91 |   HIDE_DATA_TIME: false 
 92 |   SAVE_ALL_MODEL: false
 93 |   RESUME: false
 94 |   LR_FACTOR: 0.1
 95 |   LR_STEP: [270, 300]
 96 |   EPOCHS: 320
 97 |   NUM_ITERS: -1
 98 |   LR: 2.5e-4
 99 |   BATCH_SIZE: 64
100 |   MASTER_BATCH_SIZE: 16
101 | 
102 |   MOMENTUM: 0.9
103 |   WD: 0.0001
104 |   NESTEROV: false
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 | 
108 |   # 'apply and reset gradients every n batches'
109 |   STRIDE_APPLY: 1
110 | 
111 |   CHECKPOINT: ''
112 |   SHUFFLE: true
113 |   VAL_INTERVALS: 1
114 |   TRAINVAL: false
115 |  
116 | TEST:
117 |   # Test Model Epoch
118 |   MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/mobilenetV3_1x.pth'
119 |   TASK: 'multi_pose'
120 |   FLIP_TEST: true
121 | 
122 |   DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
123 |   MODEL_FILE: ''
124 |   TEST_SCALES: [1]
125 |   IMAGE_THRE: 0.1
126 |   TOPK: 100
127 |   NMS: false
128 |   NMS_THRE: 0.5
129 |   NOT_PREFETCH_TEST: false
130 |   FIX_RES: false
131 |   KEEP_RES: false
132 | 
133 |   SOFT_NMS: false
134 |   OKS_THRE: 0.5
135 |   VIS_THRESH: 0.3
136 |   KEYPOINT_THRESH: 0.2
137 |   NUM_MIN_KPT: 4
138 |   THRESH_HUMAN: 0.4
139 | 
140 |   EVAL_ORACLE_HM: false
141 |   EVAL_ORACLE_WH: false
142 |   EVAL_ORACLE_OFFSET: false
143 |   EVAL_ORACLE_KPS: false
144 |   EVAL_ORACLE_HMHP: false
145 |   EVAL_ORACLE_HP_OFFSET: false
146 |   EVAL_ORACLE_DEP: false
147 | 


--------------------------------------------------------------------------------
/experiments/hardnet_512x512.yaml:
--------------------------------------------------------------------------------
  1 | SAMPLE_METHOD: 'coco_hp'
  2 | DATA_DIR: '/data'
  3 | EXP_ID: 'hardnet'
  4 | DEBUG: 0
  5 | DEBUG_THEME: 'white'
  6 | SEED: 317
  7 | OUTPUT_DIR: '/data/centerpose/hardnet'
  8 | LOG_DIR: ''
  9 | EXPERIMENT_NAME: ''
 10 | GPUS: [0, 1, 2, 3]
 11 | WORKERS: 8
 12 | PRINT_FREQ: 0
 13 | PIN_MEMORY: true
 14 | RANK: 0
 15 | SAVE_RESULTS: true
 16 | 
 17 | CUDNN:
 18 |   BENCHMARK: true
 19 | 
 20 | MODEL:
 21 |   INIT_WEIGHTS: true
 22 |   PRETRAINED: '/data/pretrained_models/imagenet/hardnet_petite_base.pth'
 23 |   CENTER_THRESH: 0.1
 24 |   INTERMEDIATE_CHANNEL: 48 
 25 |   NUM_CLASSES: 1
 26 |   NAME: 'hardnet'
 27 |   HEADS_NAME: 'keypoint'
 28 |   HEADS_NUM: [1, 2, 34, 2, 17, 2]
 29 |   HEAD_CONV: 256
 30 |   DOWN_RATIO: 4
 31 |   NUM_STACKS: 1
 32 |   INPUT_RES: 512
 33 |   OUTPUT_RES: 128
 34 |   INPUT_H: 512
 35 |   INPUT_W: 512
 36 |   PAD: 31
 37 |   NUM_KEYPOINTS: 17
 38 |   TAG_PER_JOINT: true
 39 |   TARGET_TYPE: 'gaussian'
 40 |   SIGMA: 2    
 41 | 
 42 | LOSS:
 43 |   METRIC: 'loss'
 44 |   MSE_LOSS: false
 45 |   REG_LOSS: 'l1'
 46 |   USE_OHKM: false
 47 |   TOPK: 8
 48 |   USE_TARGET_WEIGHT: true
 49 |   USE_DIFFERENT_JOINTS_WEIGHT: false
 50 |   HP_WEIGHT: 1.
 51 |   HM_HP_WEIGHT: 1.
 52 |   DENSE_HP: false
 53 |   HM_HP: true
 54 |   REG_BBOX: true
 55 |   WH_WEIGHT: 0.1
 56 |   REG_OFFSET: true  
 57 |   OFF_WEIGHT: 1.
 58 |   REG_HP_OFFSET: true
 59 |   HM_HP_WEIGHT: 1.
 60 |   
 61 | DATASET:
 62 |   DATASET: 'coco_hp'
 63 |   TRAIN_SET: 'train'
 64 |   TEST_SET: 'valid'
 65 |   TRAIN_IMAGE_DIR: 'images/train2017'
 66 |   TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
 67 |   VAL_IMAGE_DIR: 'images/val2017'
 68 |   VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
 69 | 
 70 |   # training data augmentation
 71 |   MEAN: [0.408, 0.447, 0.470]
 72 |   STD: [0.289, 0.274, 0.278]
 73 |   SHIFT: 0.1
 74 |   SCALE: 0.4
 75 |   ROTATE: 0.
 76 |   # for pose
 77 |   AUG_ROT: 0.
 78 |   FLIP: 0.5
 79 |   NO_COLOR_AUG: false
 80 | 
 81 |   ROT_FACTOR: 30
 82 |   SCALE_MIN: 0.5
 83 |   SCALE_MAX: 1.1
 84 |   IMAGE_SIZE: 512
 85 |   RANDOM_CROP: true 
 86 |   
 87 | TRAIN:
 88 |   OPTIMIZER: 'adam'
 89 |   DISTRIBUTE: true
 90 |   LOCAL_RANK: 0  
 91 |   HIDE_DATA_TIME: false 
 92 |   SAVE_ALL_MODEL: false
 93 |   RESUME: false
 94 |   LR_FACTOR: 0.1
 95 |   LR_STEP: [270, 300]
 96 |   EPOCHS: 320
 97 |   NUM_ITERS: -1
 98 |   LR: 5.e-4
 99 |   BATCH_SIZE: 128
100 |   MASTER_BATCH_SIZE: 32
101 | 
102 |   MOMENTUM: 0.9
103 |   WD: 0.0001
104 |   NESTEROV: false
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 | 
108 |   # 'apply and reset gradients every n batches'
109 |   STRIDE_APPLY: 1
110 | 
111 |   CHECKPOINT: ''
112 |   SHUFFLE: true
113 |   VAL_INTERVALS: 1
114 |   TRAINVAL: false
115 |  
116 | TEST:
117 |   # Test Model Epoch
118 |   MODEL_PATH: '/data/centerpose/hardnet/model_best.pth'
119 |   TASK: 'multi_pose'
120 |   FLIP_TEST: true
121 | 
122 |   DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
123 |   MODEL_FILE: ''
124 |   TEST_SCALES: [1]
125 |   IMAGE_THRE: 0.1
126 |   TOPK: 100
127 |   NMS: false
128 |   NMS_THRE: 0.5
129 |   NOT_PREFETCH_TEST: false
130 |   FIX_RES: false
131 |   KEEP_RES: false
132 | 
133 |   SOFT_NMS: false
134 |   OKS_THRE: 0.5
135 |   VIS_THRESH: 0.3
136 |   KEYPOINT_THRESH: 0.2
137 |   NUM_MIN_KPT: 4
138 |   THRESH_HUMAN: 0.4
139 | 
140 |   EVAL_ORACLE_HM: false
141 |   EVAL_ORACLE_WH: false
142 |   EVAL_ORACLE_OFFSET: false
143 |   EVAL_ORACLE_KPS: false
144 |   EVAL_ORACLE_HMHP: false
145 |   EVAL_ORACLE_HP_OFFSET: false
146 |   EVAL_ORACLE_DEP: false
147 | 


--------------------------------------------------------------------------------
/experiments/hrnet_w32_512.yaml:
--------------------------------------------------------------------------------
  1 | SAMPLE_METHOD: 'coco_hp'
  2 | DATA_DIR: '/home/tensorboy/data'
  3 | EXP_ID: 'coco_pose_hrnet'
  4 | DEBUG: 0
  5 | DEBUG_THEME: 'white'
  6 | SEED: 317
  7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/hrnet'
  8 | LOG_DIR: ''
  9 | EXPERIMENT_NAME: ''
 10 | GPUS: [0, 1, 2, 3]
 11 | WORKERS: 8
 12 | PIN_MEMORY: true
 13 | RANK: 0
 14 | PRINT_FREQ: 100
 15 | CUDNN:
 16 |   BENCHMARK: true
 17 |   DETERMINISTIC: false
 18 |   ENABLED: true
 19 | DATASET:
 20 |   DATASET: 'coco_hp'
 21 |   TRAIN_SET: 'train'
 22 |   TEST_SET: 'valid'
 23 |   TRAIN_IMAGE_DIR: 'images/train2017'
 24 |   TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
 25 |   VAL_IMAGE_DIR: 'images/val2017'
 26 |   VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
 27 | 
 28 |   # training data augmentation
 29 |   MEAN: [0.408, 0.447, 0.470]
 30 |   STD: [0.289, 0.274, 0.278]
 31 |   SHIFT: 0.1
 32 |   SCALE: 0.4
 33 |   ROTATE: 0.
 34 |   # for pose
 35 |   AUG_ROT: 0.
 36 |   FLIP: 0.5
 37 |   NO_COLOR_AUG: false
 38 | 
 39 |   ROT_FACTOR: 30
 40 |   SCALE_MIN: 0.5
 41 |   SCALE_MAX: 1.1
 42 |   IMAGE_SIZE: 512
 43 |   RANDOM_CROP: true 
 44 |   
 45 | LOSS:
 46 |   METRIC: 'loss'
 47 |   MSE_LOSS: false
 48 |   REG_LOSS: 'l1'
 49 |   USE_OHKM: false
 50 |   TOPK: 8
 51 |   USE_TARGET_WEIGHT: true
 52 |   USE_DIFFERENT_JOINTS_WEIGHT: false
 53 |   HP_WEIGHT: 1.
 54 |   HM_HP_WEIGHT: 1.
 55 |   DENSE_HP: false
 56 |   HM_HP: true
 57 |   REG_BBOX: true
 58 |   WH_WEIGHT: 0.1
 59 |   REG_OFFSET: true  
 60 |   OFF_WEIGHT: 1.
 61 |   REG_HP_OFFSET: true
 62 |   HM_HP_WEIGHT: 1.
 63 | MODEL:
 64 |   HEADS_NAME: 'keypoint'
 65 |   INTERMEDIATE_CHANNEL: 32 
 66 |   CENTER_THRESH: 0.1
 67 |   NUM_CLASSES: 1
 68 |   NAME: 'hrnet'
 69 |   DOWN_RATIO: 4
 70 |   NUM_STACKS: 1
 71 |   INPUT_RES: 512
 72 |   OUTPUT_RES: 128
 73 |   INPUT_H: 512
 74 |   INPUT_W: 512
 75 |   PAD: 31
 76 |   NUM_KEYPOINTS: 17
 77 |   SIGMA: 2  
 78 |   HEAD_CONV: 64
 79 |   EXTRA:
 80 |     FINAL_CONV_KERNEL: 1
 81 |     PRETRAINED_LAYERS: ['*']
 82 |     STEM_INPLANES: 64
 83 |     STAGE2:
 84 |       NUM_MODULES: 1
 85 |       NUM_BRANCHES: 2
 86 |       BLOCK: BASIC
 87 |       NUM_BLOCKS:
 88 |       - 4
 89 |       - 4
 90 |       NUM_CHANNELS:
 91 |       - 32
 92 |       - 64
 93 |       FUSE_METHOD: SUM
 94 |     STAGE3:
 95 |       NUM_MODULES: 4
 96 |       NUM_BRANCHES: 3
 97 |       BLOCK: BASIC
 98 |       NUM_BLOCKS:
 99 |       - 4
100 |       - 4
101 |       - 4
102 |       NUM_CHANNELS:
103 |       - 32
104 |       - 64
105 |       - 128
106 |       FUSE_METHOD: SUM
107 |     STAGE4:
108 |       NUM_MODULES: 3
109 |       NUM_BRANCHES: 4
110 |       BLOCK: BASIC
111 |       NUM_BLOCKS:
112 |       - 4
113 |       - 4
114 |       - 4
115 |       - 4
116 |       NUM_CHANNELS:
117 |       - 32
118 |       - 64
119 |       - 128
120 |       - 256
121 |       FUSE_METHOD: SUM
122 |     DECONV:
123 |       NUM_DECONVS: 0
124 |       NUM_CHANNELS:
125 |       - 32
126 |       KERNEL_SIZE:
127 |       - 4
128 |       NUM_BASIC_BLOCKS: 4
129 |       CAT_OUTPUT:
130 |       - True
131 |   INIT_WEIGHTS: true
132 |   PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth'
133 |   TAG_PER_JOINT: true
134 | TEST:
135 |   # Test Model Epoch
136 |   MODEL_PATH: '/home/tensorboy/data/centerpose/hrnet/model_best.pth'
137 |   TASK: 'multi_pose'
138 |   FLIP_TEST: true
139 |   FIX_RES: false
140 |   DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
141 |   MODEL_FILE: ''
142 |   TEST_SCALES: [1,2]
143 |   IMAGE_THRE: 0.1
144 |   TOPK: 100
145 |   NMS: false
146 |   NMS_THRE: 0.5
147 |   NOT_PREFETCH_TEST: false
148 | 
149 |   KEEP_RES: false
150 | 
151 |   SOFT_NMS: false
152 |   OKS_THRE: 0.5
153 |   VIS_THRESH: 0.3
154 |   KEYPOINT_THRESH: 0.2
155 |   NUM_MIN_KPT: 4
156 |   THRESH_HUMAN: 0.4
157 | 
158 |   EVAL_ORACLE_HM: false
159 |   EVAL_ORACLE_WH: false
160 |   EVAL_ORACLE_OFFSET: false
161 |   EVAL_ORACLE_KPS: false
162 |   EVAL_ORACLE_HMHP: false
163 |   EVAL_ORACLE_HP_OFFSET: false
164 |   EVAL_ORACLE_DEP: false
165 | TRAIN:
166 |   DISTRIBUTE: true
167 |   OPTIMIZER: 'sgd'   
168 |   LOCAL_RANK: 0  
169 |   HIDE_DATA_TIME: false 
170 |   SAVE_ALL_MODEL: false
171 |   RESUME: false
172 |   LR_FACTOR: 0.1
173 |   LR_STEP: [270, 300]
174 |   EPOCHS: 320
175 |   NUM_ITERS: -1
176 |   LR: 1.71875e-4
177 |   BATCH_SIZE: 44
178 |   MASTER_BATCH_SIZE: 11
179 | 
180 |   MOMENTUM: 0.9
181 |   WD: 0.0001
182 |   NESTEROV: false
183 |   GAMMA1: 0.99
184 |   GAMMA2: 0.0
185 | 
186 |   # 'apply and reset gradients every n batches'
187 |   STRIDE_APPLY: 1
188 | 
189 |   CHECKPOINT: ''
190 |   SHUFFLE: true
191 |   VAL_INTERVALS: 1
192 |   TRAINVAL: false
193 | 


--------------------------------------------------------------------------------
/experiments/hrnet_w48_512.yaml:
--------------------------------------------------------------------------------
  1 | SAMPLE_METHOD: 'coco_hp'
  2 | DATA_DIR: '/home/tensorboy/data'
  3 | EXP_ID: 'coco_pose_hrnet'
  4 | DEBUG: 0
  5 | DEBUG_THEME: 'white'
  6 | SEED: 317
  7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/hrnet'
  8 | LOG_DIR: ''
  9 | EXPERIMENT_NAME: ''
 10 | GPUS: [0, 1, 2, 3]
 11 | WORKERS: 8
 12 | PIN_MEMORY: true
 13 | RANK: 0
 14 | PRINT_FREQ: 100
 15 | CUDNN:
 16 |   BENCHMARK: true
 17 |   DETERMINISTIC: false
 18 |   ENABLED: true
 19 | DATASET:
 20 |   DATASET: 'coco_hp'
 21 |   TRAIN_SET: 'train'
 22 |   TEST_SET: 'valid'
 23 |   TRAIN_IMAGE_DIR: 'images/train2017'
 24 |   TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
 25 |   VAL_IMAGE_DIR: 'images/val2017'
 26 |   VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
 27 | 
 28 |   # training data augmentation
 29 |   MEAN: [0.408, 0.447, 0.470]
 30 |   STD: [0.289, 0.274, 0.278]
 31 |   SHIFT: 0.1
 32 |   SCALE: 0.4
 33 |   ROTATE: 0.
 34 |   # for pose
 35 |   AUG_ROT: 0.
 36 |   FLIP: 0.5
 37 |   NO_COLOR_AUG: false
 38 | 
 39 |   ROT_FACTOR: 30
 40 |   SCALE_MIN: 0.5
 41 |   SCALE_MAX: 1.1
 42 |   IMAGE_SIZE: 512
 43 |   RANDOM_CROP: true 
 44 |   
 45 | LOSS:
 46 |   METRIC: 'loss'
 47 |   MSE_LOSS: false
 48 |   REG_LOSS: 'l1'
 49 |   USE_OHKM: false
 50 |   TOPK: 8
 51 |   USE_TARGET_WEIGHT: true
 52 |   USE_DIFFERENT_JOINTS_WEIGHT: false
 53 |   HP_WEIGHT: 1.
 54 |   HM_HP_WEIGHT: 1.
 55 |   DENSE_HP: false
 56 |   HM_HP: true
 57 |   REG_BBOX: true
 58 |   WH_WEIGHT: 0.1
 59 |   REG_OFFSET: true  
 60 |   OFF_WEIGHT: 1.
 61 |   REG_HP_OFFSET: true
 62 |   HM_HP_WEIGHT: 1.
 63 | MODEL:
 64 |   HEADS_NAME: 'keypoint'
 65 |   INTERMEDIATE_CHANNEL: 48
 66 |   CENTER_THRESH: 0.1
 67 |   NUM_CLASSES: 1
 68 |   NAME: 'hrnet'
 69 |   DOWN_RATIO: 4
 70 |   NUM_STACKS: 1
 71 |   INPUT_RES: 512
 72 |   OUTPUT_RES: 128
 73 |   INPUT_H: 512
 74 |   INPUT_W: 512
 75 |   PAD: 31
 76 |   NUM_KEYPOINTS: 17
 77 |   SIGMA: 2  
 78 |   HEAD_CONV: 64
 79 |   EXTRA:
 80 |     FINAL_CONV_KERNEL: 1
 81 |     PRETRAINED_LAYERS: ['*']
 82 |     STEM_INPLANES: 64
 83 |     STAGE2:
 84 |       NUM_MODULES: 1
 85 |       NUM_BRANCHES: 2
 86 |       BLOCK: BASIC
 87 |       NUM_BLOCKS:
 88 |       - 4
 89 |       - 4
 90 |       NUM_CHANNELS:
 91 |       - 48
 92 |       - 96
 93 |       FUSE_METHOD: SUM
 94 |     STAGE3:
 95 |       NUM_MODULES: 4
 96 |       NUM_BRANCHES: 3
 97 |       BLOCK: BASIC
 98 |       NUM_BLOCKS:
 99 |       - 4
100 |       - 4
101 |       - 4
102 |       NUM_CHANNELS:
103 |       - 48
104 |       - 96
105 |       - 192
106 |       FUSE_METHOD: SUM
107 |     STAGE4:
108 |       NUM_MODULES: 3
109 |       NUM_BRANCHES: 4
110 |       BLOCK: BASIC
111 |       NUM_BLOCKS:
112 |       - 4
113 |       - 4
114 |       - 4
115 |       - 4
116 |       NUM_CHANNELS:
117 |       - 48
118 |       - 96
119 |       - 192
120 |       - 384
121 |       FUSE_METHOD: SUM
122 |     DECONV:
123 |       NUM_DECONVS: 1
124 |       NUM_CHANNELS:
125 |       - 48
126 |       KERNEL_SIZE:
127 |       - 4
128 |       NUM_BASIC_BLOCKS: 4
129 |       CAT_OUTPUT:
130 |       - True
131 |   INIT_WEIGHTS: true
132 |   PRETRAINED: '/home/tensorboy/data/pretrained_models/imagenet/hrnet_w48-8ef0771d.pth'
133 |   TAG_PER_JOINT: true
134 | TEST:
135 |   # Test Model Epoch
136 |   MODEL_PATH: '/home/tensorboy/data/centerpose/hrnet/model_best.pth'
137 |   TASK: 'multi_pose'
138 |   FLIP_TEST: true
139 |   FIX_RES: false
140 |   DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
141 |   MODEL_FILE: ''
142 |   TEST_SCALES: [1,2]
143 |   IMAGE_THRE: 0.1
144 |   TOPK: 100
145 |   NMS: false
146 |   NMS_THRE: 0.5
147 |   NOT_PREFETCH_TEST: false
148 | 
149 |   KEEP_RES: false
150 | 
151 |   SOFT_NMS: false
152 |   OKS_THRE: 0.5
153 |   VIS_THRESH: 0.3
154 |   KEYPOINT_THRESH: 0.2
155 |   NUM_MIN_KPT: 4
156 |   THRESH_HUMAN: 0.4
157 | 
158 |   EVAL_ORACLE_HM: false
159 |   EVAL_ORACLE_WH: false
160 |   EVAL_ORACLE_OFFSET: false
161 |   EVAL_ORACLE_KPS: false
162 |   EVAL_ORACLE_HMHP: false
163 |   EVAL_ORACLE_HP_OFFSET: false
164 |   EVAL_ORACLE_DEP: false
165 | TRAIN:
166 |   DISTRIBUTE: true
167 |   OPTIMIZER: 'adam'   
168 |   LOCAL_RANK: 0  
169 |   HIDE_DATA_TIME: false 
170 |   SAVE_ALL_MODEL: false
171 |   RESUME: false
172 |   LR_FACTOR: 0.1
173 |   LR_STEP: [270, 300]
174 |   EPOCHS: 320
175 |   NUM_ITERS: -1
176 |   LR: 1.25e-4
177 |   BATCH_SIZE: 32
178 |   MASTER_BATCH_SIZE: 8
179 | 
180 |   MOMENTUM: 0.9
181 |   WD: 0.0001
182 |   NESTEROV: false
183 |   GAMMA1: 0.99
184 |   GAMMA2: 0.0
185 | 
186 |   # 'apply and reset gradients every n batches'
187 |   STRIDE_APPLY: 1
188 | 
189 |   CHECKPOINT: ''
190 |   SHUFFLE: true
191 |   VAL_INTERVALS: 1
192 |   TRAINVAL: false
193 | 


--------------------------------------------------------------------------------
/experiments/mobilenetv2_512x512.yaml:
--------------------------------------------------------------------------------
  1 | SAMPLE_METHOD: 'coco_hp'
  2 | DATA_DIR: '/data'
  3 | EXP_ID: 'coco_pose_mobilenetv2'
  4 | DEBUG: 0
  5 | DEBUG_THEME: 'white'
  6 | SEED: 317
  7 | OUTPUT_DIR: '/data/centerpose/mobilenetv2'
  8 | LOG_DIR: ''
  9 | EXPERIMENT_NAME: ''
 10 | GPUS: [0, 1, 2, 3]
 11 | WORKERS: 8
 12 | PRINT_FREQ: 0
 13 | PIN_MEMORY: true
 14 | RANK: 0
 15 | SAVE_RESULTS: true
 16 | 
 17 | CUDNN:
 18 |   BENCHMARK: true
 19 | 
 20 | MODEL:
 21 |   INIT_WEIGHTS: false
 22 |   PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth'
 23 |   CENTER_THRESH: 0.1
 24 |   NUM_CLASSES: 1
 25 |   NAME: 'mobilenetv2'
 26 |   HEADS_NAME: 'keypoint'
 27 |   HEADS_NUM: [1, 2, 34, 2, 17, 2]
 28 |   HEAD_CONV: 256
 29 |   INTERMEDIATE_CHANNEL: 24
 30 |   DOWN_RATIO: 4
 31 |   NUM_STACKS: 1
 32 |   INPUT_RES: 512
 33 |   OUTPUT_RES: 128
 34 |   INPUT_H: 512
 35 |   INPUT_W: 512
 36 |   PAD: 31
 37 |   NUM_KEYPOINTS: 17
 38 |   TAG_PER_JOINT: true
 39 |   TARGET_TYPE: 'gaussian'
 40 |   SIGMA: 2    
 41 | 
 42 | LOSS:
 43 |   METRIC: 'loss'
 44 |   MSE_LOSS: false
 45 |   REG_LOSS: 'l1'
 46 |   USE_OHKM: false
 47 |   TOPK: 8
 48 |   USE_TARGET_WEIGHT: true
 49 |   USE_DIFFERENT_JOINTS_WEIGHT: false
 50 |   HP_WEIGHT: 1.
 51 |   HM_HP_WEIGHT: 1.
 52 |   DENSE_HP: false
 53 |   HM_HP: true
 54 |   REG_BBOX: true
 55 |   WH_WEIGHT: 0.1
 56 |   REG_OFFSET: true  
 57 |   OFF_WEIGHT: 1.
 58 |   REG_HP_OFFSET: true
 59 |   HM_HP_WEIGHT: 1.
 60 |   
 61 | DATASET:
 62 |   DATASET: 'coco_hp'
 63 |   TRAIN_SET: 'train'
 64 |   TEST_SET: 'valid'
 65 |   TRAIN_IMAGE_DIR: 'images/train2017'
 66 |   TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
 67 |   VAL_IMAGE_DIR: 'images/val2017'
 68 |   VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
 69 | 
 70 |   # training data augmentation
 71 |   MEAN: [0.408, 0.447, 0.470]
 72 |   STD: [0.289, 0.274, 0.278]
 73 |   SHIFT: 0.1
 74 |   SCALE: 0.4
 75 |   ROTATE: 0.
 76 |   # for pose
 77 |   AUG_ROT: 0.
 78 |   FLIP: 0.5
 79 |   NO_COLOR_AUG: false
 80 | 
 81 |   ROT_FACTOR: 30
 82 |   SCALE_MIN: 0.5
 83 |   SCALE_MAX: 1.1
 84 |   IMAGE_SIZE: 512
 85 |   RANDOM_CROP: true 
 86 |   
 87 | TRAIN:
 88 |   OPTIMIZER: 'adam'
 89 |   DISTRIBUTE: true
 90 |   LOCAL_RANK: 0  
 91 |   HIDE_DATA_TIME: false 
 92 |   SAVE_ALL_MODEL: false
 93 |   RESUME: false
 94 |   LR_FACTOR: 0.1
 95 |   LR_STEP: [270, 300]
 96 |   EPOCHS: 320
 97 |   NUM_ITERS: -1
 98 |   LR: 5.e-4
 99 |   BATCH_SIZE: 128
100 |   MASTER_BATCH_SIZE: 32
101 | 
102 |   MOMENTUM: 0.9
103 |   WD: 0.0001
104 |   NESTEROV: false
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 | 
108 |   # 'apply and reset gradients every n batches'
109 |   STRIDE_APPLY: 1
110 | 
111 |   CHECKPOINT: ''
112 |   SHUFFLE: true
113 |   VAL_INTERVALS: 1
114 |   TRAINVAL: false
115 |  
116 | TEST:
117 |   # Test Model Epoch
118 |   MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/mobilenetV3_1x.pth'
119 |   TASK: 'multi_pose'
120 |   FLIP_TEST: true
121 | 
122 |   DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
123 |   MODEL_FILE: ''
124 |   TEST_SCALES: [1]
125 |   IMAGE_THRE: 0.1
126 |   TOPK: 100
127 |   NMS: false
128 |   NMS_THRE: 0.5
129 |   NOT_PREFETCH_TEST: false
130 |   FIX_RES: false
131 |   KEEP_RES: false
132 | 
133 |   SOFT_NMS: false
134 |   OKS_THRE: 0.5
135 |   VIS_THRESH: 0.3
136 |   KEYPOINT_THRESH: 0.2
137 |   NUM_MIN_KPT: 4
138 |   THRESH_HUMAN: 0.4
139 | 
140 |   EVAL_ORACLE_HM: false
141 |   EVAL_ORACLE_WH: false
142 |   EVAL_ORACLE_OFFSET: false
143 |   EVAL_ORACLE_KPS: false
144 |   EVAL_ORACLE_HMHP: false
145 |   EVAL_ORACLE_HP_OFFSET: false
146 |   EVAL_ORACLE_DEP: false
147 | 


--------------------------------------------------------------------------------
/experiments/mobilenetv3_512x512.yaml:
--------------------------------------------------------------------------------
  1 | SAMPLE_METHOD: 'coco_hp'
  2 | DATA_DIR: '/home/tensorboy/data'
  3 | EXP_ID: 'coco_pose_mobilenet'
  4 | DEBUG: 0
  5 | DEBUG_THEME: 'white'
  6 | SEED: 317
  7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/mobilenetv3'
  8 | LOG_DIR: ''
  9 | EXPERIMENT_NAME: ''
 10 | GPUS: [0, 1, 2, 3]
 11 | WORKERS: 8
 12 | PRINT_FREQ: 0
 13 | PIN_MEMORY: true
 14 | RANK: 0
 15 | SAVE_RESULTS: true
 16 | 
 17 | CUDNN:
 18 |   BENCHMARK: true
 19 | 
 20 | MODEL:
 21 |   INIT_WEIGHTS: false
 22 |   PRETRAINED: '/data/pretrained_models/imagenet/hrnet_w32-36af842e.pth'
 23 |   CENTER_THRESH: 0.1
 24 |   NUM_CLASSES: 1
 25 |   NAME: 'mobilenetv3'
 26 |   HEADS_NAME: 'keypoint'
 27 |   HEADS_NUM: [1, 2, 34, 2, 17, 2]
 28 |   HEAD_CONV: 256
 29 |   INTERMEDIATE_CHANNEL: 24
 30 |   DOWN_RATIO: 4
 31 |   NUM_STACKS: 1
 32 |   INPUT_RES: 512
 33 |   OUTPUT_RES: 128
 34 |   INPUT_H: 512
 35 |   INPUT_W: 512
 36 |   PAD: 31
 37 |   NUM_KEYPOINTS: 17
 38 |   TAG_PER_JOINT: true
 39 |   TARGET_TYPE: 'gaussian'
 40 |   SIGMA: 2    
 41 | 
 42 | LOSS:
 43 |   METRIC: 'loss'
 44 |   MSE_LOSS: false
 45 |   REG_LOSS: 'l1'
 46 |   USE_OHKM: false
 47 |   TOPK: 8
 48 |   USE_TARGET_WEIGHT: true
 49 |   USE_DIFFERENT_JOINTS_WEIGHT: false
 50 |   HP_WEIGHT: 1.
 51 |   HM_HP_WEIGHT: 1.
 52 |   DENSE_HP: false
 53 |   HM_HP: true
 54 |   REG_BBOX: true
 55 |   WH_WEIGHT: 0.1
 56 |   REG_OFFSET: true  
 57 |   OFF_WEIGHT: 1.
 58 |   REG_HP_OFFSET: true
 59 |   HM_HP_WEIGHT: 1.
 60 |   
 61 | DATASET:
 62 |   DATASET: 'coco_hp'
 63 |   TRAIN_SET: 'train'
 64 |   TEST_SET: 'valid'
 65 |   TRAIN_IMAGE_DIR: 'images/train2017'
 66 |   TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
 67 |   VAL_IMAGE_DIR: 'images/val2017'
 68 |   VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
 69 | 
 70 |   # training data augmentation
 71 |   MEAN: [0.408, 0.447, 0.470]
 72 |   STD: [0.289, 0.274, 0.278]
 73 |   SHIFT: 0.1
 74 |   SCALE: 0.4
 75 |   ROTATE: 0.
 76 |   # for pose
 77 |   AUG_ROT: 0.
 78 |   FLIP: 0.5
 79 |   NO_COLOR_AUG: false
 80 | 
 81 |   ROT_FACTOR: 30
 82 |   SCALE_MIN: 0.5
 83 |   SCALE_MAX: 1.1
 84 |   IMAGE_SIZE: 512
 85 |   RANDOM_CROP: true 
 86 |   
 87 | TRAIN:
 88 |   OPTIMIZER: 'adam'
 89 |   DISTRIBUTE: true
 90 |   LOCAL_RANK: 0  
 91 |   HIDE_DATA_TIME: false 
 92 |   SAVE_ALL_MODEL: false
 93 |   RESUME: false
 94 |   LR_FACTOR: 0.1
 95 |   LR_STEP: [270, 300]
 96 |   EPOCHS: 320
 97 |   NUM_ITERS: -1
 98 |   LR: 3.359375e-4
 99 |   BATCH_SIZE: 86
100 |   MASTER_BATCH_SIZE: 20
101 | 
102 |   MOMENTUM: 0.9
103 |   WD: 0.0001
104 |   NESTEROV: false
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 | 
108 |   # 'apply and reset gradients every n batches'
109 |   STRIDE_APPLY: 1
110 | 
111 |   CHECKPOINT: ''
112 |   SHUFFLE: true
113 |   VAL_INTERVALS: 1
114 |   TRAINVAL: false
115 |  
116 | TEST:
117 |   # Test Model Epoch
118 |   MODEL_PATH: '/home/tensorboy/data/centerpose/model_zoo/mobilenetV3_1x.pth'
119 |   TASK: 'multi_pose'
120 |   FLIP_TEST: true
121 | 
122 |   DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
123 |   MODEL_FILE: ''
124 |   TEST_SCALES: [1]
125 |   IMAGE_THRE: 0.1
126 |   TOPK: 100
127 |   NMS: false
128 |   NMS_THRE: 0.5
129 |   NOT_PREFETCH_TEST: false
130 |   FIX_RES: false
131 |   KEEP_RES: false
132 | 
133 |   SOFT_NMS: false
134 |   OKS_THRE: 0.5
135 |   VIS_THRESH: 0.3
136 |   KEYPOINT_THRESH: 0.2
137 |   NUM_MIN_KPT: 4
138 |   THRESH_HUMAN: 0.4
139 | 
140 |   EVAL_ORACLE_HM: false
141 |   EVAL_ORACLE_WH: false
142 |   EVAL_ORACLE_OFFSET: false
143 |   EVAL_ORACLE_KPS: false
144 |   EVAL_ORACLE_HMHP: false
145 |   EVAL_ORACLE_HP_OFFSET: false
146 |   EVAL_ORACLE_DEP: false
147 | 


--------------------------------------------------------------------------------
/experiments/res_50_512x512.yaml:
--------------------------------------------------------------------------------
  1 | SAMPLE_METHOD: 'coco_hp'
  2 | DATA_DIR: '/data'
  3 | EXP_ID: 'coco_pose_res_50'
  4 | DEBUG: 0
  5 | DEBUG_THEME: 'white'
  6 | SEED: 317
  7 | OUTPUT_DIR: '/data/centerpose/res50_lre2'
  8 | LOG_DIR: ''
  9 | EXPERIMENT_NAME: ''
 10 | GPUS: [0, 1, 2, 3]
 11 | WORKERS: 8
 12 | PRINT_FREQ: 0
 13 | PIN_MEMORY: true
 14 | RANK: 0
 15 | SAVE_RESULTS: true
 16 | TASK : 'multi_pose'
 17 | 
 18 | CUDNN:
 19 |   BENCHMARK: true
 20 | 
 21 | MODEL:
 22 |   INIT_WEIGHTS: false
 23 |   PRETRAINED: ''
 24 |   CENTER_THRESH: 0.1
 25 |   NUM_CLASSES: 1
 26 |   NAME: 'res_50'
 27 |   HEADS_NAME: 'keypoint'
 28 |   HEADS_NUM: [1, 2, 34, 2, 17, 2]
 29 |   HEAD_CONV: 64
 30 |   INTERMEDIATE_CHANNEL: 256  
 31 |   DOWN_RATIO: 4
 32 |   NUM_STACKS: 1
 33 |   INPUT_RES: 512
 34 |   OUTPUT_RES: 128
 35 |   INPUT_H: 512
 36 |   INPUT_W: 512
 37 |   PAD: 31
 38 |   NUM_KEYPOINTS: 17
 39 |   TAG_PER_JOINT: true
 40 |   TARGET_TYPE: 'gaussian'
 41 |   SIGMA: 2    
 42 | 
 43 | LOSS:
 44 |   METRIC: 'loss'
 45 |   MSE_LOSS: false
 46 |   REG_LOSS: 'l1'
 47 |   USE_OHKM: false
 48 |   TOPK: 8
 49 |   USE_TARGET_WEIGHT: true
 50 |   USE_DIFFERENT_JOINTS_WEIGHT: false
 51 |   HP_WEIGHT: 1.
 52 |   HM_HP_WEIGHT: 1.
 53 |   DENSE_HP: false
 54 |   HM_HP: true
 55 |   REG_BBOX: true
 56 |   WH_WEIGHT: 0.1
 57 |   REG_OFFSET: true  
 58 |   OFF_WEIGHT: 1.
 59 |   REG_HP_OFFSET: true
 60 |   
 61 | DATASET:
 62 |   DATASET: 'coco_hp'
 63 |   TRAIN_SET: 'train'
 64 |   TEST_SET: 'valid'
 65 |   TRAIN_IMAGE_DIR: 'images/train2017'
 66 |   TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
 67 |   VAL_IMAGE_DIR: 'images/val2017'
 68 |   VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
 69 | 
 70 |   # training data augmentation
 71 |   MEAN: [0.408, 0.447, 0.470]
 72 |   STD: [0.289, 0.274, 0.278]
 73 |   SHIFT: 0.1
 74 |   SCALE: 0.4
 75 |   ROTATE: 0.
 76 |   # for pose
 77 |   AUG_ROT: 0.
 78 |   FLIP: 0.5
 79 |   NO_COLOR_AUG: false
 80 | 
 81 |   ROT_FACTOR: 30
 82 |   SCALE_MIN: 0.5
 83 |   SCALE_MAX: 1.1
 84 |   IMAGE_SIZE: 512
 85 |   RANDOM_CROP: true 
 86 |   
 87 | TRAIN:
 88 |   DISTRIBUTE: true
 89 |   OPTIMIZER: 'sgd'  
 90 |   LOCAL_RANK: 0
 91 |   HIDE_DATA_TIME: false 
 92 |   SAVE_ALL_MODEL: false
 93 |   RESUME: false
 94 |   LR_FACTOR: 0.1
 95 |   LR_STEP: [800, 900]
 96 |   EPOCHS: 1000
 97 |   NUM_ITERS: -1
 98 |   LR: 7.e-3
 99 |   BATCH_SIZE: 56
100 |   MASTER_BATCH_SIZE: 14
101 | 
102 |   MOMENTUM: 0.9
103 |   WD: 0.0001
104 |   NESTEROV: false
105 |   GAMMA1: 0.99
106 |   GAMMA2: 0.0
107 | 
108 |   # 'apply and reset gradients every n batches'
109 |   STRIDE_APPLY: 1
110 | 
111 |   CHECKPOINT: ''
112 |   SHUFFLE: true
113 |   VAL_INTERVALS: 1
114 |   TRAINVAL: false
115 |  
116 | TEST:
117 |   # Test Model Epoch
118 |   MODEL_PATH: '/home/tensorboy/data/centerpose/res50/model_best.pth'
119 |   TASK: 'multi_pose'
120 |   FLIP_TEST: true
121 | 
122 |   DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
123 |   MODEL_FILE: ''
124 |   TEST_SCALES: [1]
125 |   IMAGE_THRE: 0.1
126 |   TOPK: 100
127 |   NMS: false
128 |   NMS_THRE: 0.5
129 |   NOT_PREFETCH_TEST: false
130 |   FIX_RES: true
131 | 
132 |   SOFT_NMS: false
133 |   OKS_THRE: 0.5
134 |   VIS_THRESH: 0.3
135 |   KEYPOINT_THRESH: 0.2
136 |   NUM_MIN_KPT: 4
137 |   THRESH_HUMAN: 0.5
138 | 
139 |   EVAL_ORACLE_HM: false
140 |   EVAL_ORACLE_WH: false
141 |   EVAL_ORACLE_OFFSET: false
142 |   EVAL_ORACLE_KPS: false
143 |   EVAL_ORACLE_HMHP: false
144 |   EVAL_ORACLE_HP_OFFSET: false
145 |   EVAL_ORACLE_DEP: false
146 | 


--------------------------------------------------------------------------------
/experiments/shufflenetV2_512x512.yaml:
--------------------------------------------------------------------------------
  1 | SAMPLE_METHOD: 'coco_hp'
  2 | DATA_DIR: '/home/tensorboy/data'
  3 | EXP_ID: 'coco_pose_shufflenetv2'
  4 | DEBUG: 0
  5 | DEBUG_THEME: 'white'
  6 | SEED: 317
  7 | OUTPUT_DIR: '/home/tensorboy/data/centerpose/shufflenet_3x_sgd'
  8 | LOG_DIR: ''
  9 | EXPERIMENT_NAME: ''
 10 | GPUS: [0, 1, 2, 3]
 11 | WORKERS: 8
 12 | PRINT_FREQ: 0
 13 | PIN_MEMORY: true
 14 | RANK: 0
 15 | SAVE_RESULTS: true
 16 | 
 17 | CUDNN:
 18 |   BENCHMARK: true
 19 | 
 20 | MODEL:
 21 |   CENTER_THRESH: 0.1
 22 |   NUM_CLASSES: 1
 23 |   NAME: 'shufflenetV2'
 24 |   HEADS_NAME: 'keypoint'
 25 |   HEADS_NUM: [1, 2, 34, 2, 17, 2]
 26 |   HEAD_CONV: 256
 27 |   INTERMEDIATE_CHANNEL: 256
 28 |   DOWN_RATIO: 4
 29 |   NUM_STACKS: 1
 30 |   INPUT_RES: 512
 31 |   OUTPUT_RES: 128
 32 |   INPUT_H: 512
 33 |   INPUT_W: 512
 34 |   PAD: 31
 35 |   NUM_KEYPOINTS: 17
 36 |   TAG_PER_JOINT: true
 37 |   TARGET_TYPE: 'gaussian'
 38 |   SIGMA: 2    
 39 | 
 40 | LOSS:
 41 |   METRIC: 'loss'
 42 |   MSE_LOSS: false
 43 |   REG_LOSS: 'l1'
 44 |   USE_OHKM: false
 45 |   TOPK: 8
 46 |   USE_TARGET_WEIGHT: true
 47 |   USE_DIFFERENT_JOINTS_WEIGHT: false
 48 |   HP_WEIGHT: 1.
 49 |   HM_HP_WEIGHT: 1.
 50 |   DENSE_HP: false
 51 |   HM_HP: true
 52 |   REG_BBOX: true
 53 |   WH_WEIGHT: 0.1
 54 |   REG_OFFSET: true  
 55 |   OFF_WEIGHT: 1.
 56 |   REG_HP_OFFSET: true
 57 |   HM_HP_WEIGHT: 1.
 58 |   
 59 | DATASET:
 60 |   DATASET: 'coco_hp'
 61 |   TRAIN_SET: 'train'
 62 |   TEST_SET: 'valid'
 63 |   TRAIN_IMAGE_DIR: 'images/train2017'
 64 |   TRAIN_ANNOTATIONS: ['person_keypoints_train2017.json']
 65 |   VAL_IMAGE_DIR: 'images/val2017'
 66 |   VAL_ANNOTATIONS: 'person_keypoints_val2017.json'
 67 | 
 68 |   # training data augmentation
 69 |   MEAN: [0.408, 0.447, 0.470]
 70 |   STD: [0.289, 0.274, 0.278]
 71 |   SHIFT: 0.1
 72 |   SCALE: 0.4
 73 |   ROTATE: 0.
 74 |   # for pose
 75 |   AUG_ROT: 0.
 76 |   FLIP: 0.5
 77 |   NO_COLOR_AUG: false
 78 | 
 79 |   ROT_FACTOR: 30
 80 |   SCALE_MIN: 0.5
 81 |   SCALE_MAX: 1.1
 82 |   IMAGE_SIZE: 512
 83 |   RANDOM_CROP: true 
 84 |   
 85 | TRAIN:
 86 |   OPTIMIZER: 'adam'
 87 |   DISTRIBUTE: true
 88 |   LOCAL_RANK: 0  
 89 |   HIDE_DATA_TIME: false 
 90 |   SAVE_ALL_MODEL: false
 91 |   RESUME: false
 92 |   LR_FACTOR: 0.1
 93 |   LR_STEP: [270, 300]
 94 |   EPOCHS: 320
 95 |   NUM_ITERS: -1
 96 |   LR: 4.6875e-4
 97 |   BATCH_SIZE: 120
 98 |   MASTER_BATCH_SIZE: 30
 99 | 
100 |   MOMENTUM: 0.9
101 |   WD: 0.0001
102 |   NESTEROV: false
103 |   GAMMA1: 0.99
104 |   GAMMA2: 0.0
105 | 
106 |   # 'apply and reset gradients every n batches'
107 |   STRIDE_APPLY: 1
108 | 
109 |   CHECKPOINT: ''
110 |   SHUFFLE: true
111 |   VAL_INTERVALS: 1
112 |   TRAINVAL: false
113 |  
114 | TEST:
115 |   # Test Model Epoch
116 |   MODEL_PATH: '/home/tensorboy/data/centerpose/shufflenet_3x_sgd/model_best.pth'
117 |   TASK: 'multi_pose'
118 |   FLIP_TEST: true
119 | 
120 |   DEMO_FILE: '../images/33823288584_1d21cf0a26_k.jpg'
121 |   MODEL_FILE: ''
122 |   TEST_SCALES: [1]
123 |   IMAGE_THRE: 0.1
124 |   TOPK: 100
125 |   NMS: false
126 |   NMS_THRE: 0.5
127 |   NOT_PREFETCH_TEST: false
128 |   FIX_RES: false
129 |   KEEP_RES: false
130 | 
131 |   SOFT_NMS: false
132 |   OKS_THRE: 0.5
133 |   VIS_THRESH: 0.3
134 |   KEYPOINT_THRESH: 0.2
135 |   NUM_MIN_KPT: 4
136 |   THRESH_HUMAN: 0.4
137 | 
138 |   EVAL_ORACLE_HM: false
139 |   EVAL_ORACLE_WH: false
140 |   EVAL_ORACLE_OFFSET: false
141 |   EVAL_ORACLE_KPS: false
142 |   EVAL_ORACLE_HMHP: false
143 |   EVAL_ORACLE_HP_OFFSET: false
144 |   EVAL_ORACLE_DEP: false
145 | 


--------------------------------------------------------------------------------
/images/image1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/images/image1.jpeg


--------------------------------------------------------------------------------
/lib/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .default import _C as cfg
2 | from .default import update_config
3 | 


--------------------------------------------------------------------------------
/lib/config/default.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | from yacs.config import CfgNode as CN
  4 | 
  5 | _C = CN()
  6 | 
  7 | _C.TASK = 'multi_pose'
  8 | _C.SAMPLE_METHOD = 'coco_hp'
  9 | _C.DATA_DIR = '/data'
 10 | _C.EXP_ID = 'default'
 11 | _C.DEBUG = 0
 12 | _C.DEBUG_THEME = 'white'
 13 | _C.TEST = False
 14 | _C.SEED = 317
 15 | _C.SAVE_RESULTS = False
 16 | 
 17 | _C.OUTPUT_DIR = ''
 18 | _C.LOG_DIR = ''
 19 | _C.EXPERIMENT_NAME = ''
 20 | _C.GPUS = [0, 1, 2, 3]
 21 | _C.WORKERS = 4
 22 | _C.PRINT_FREQ = 20
 23 | _C.PIN_MEMORY = True
 24 | _C.RANK = 0
 25 | 
 26 | # Cudnn related params
 27 | _C.CUDNN = CN()
 28 | _C.CUDNN.ENABLED = True
 29 | _C.CUDNN.BENCHMARK = True
 30 | _C.CUDNN.DETERMINISTIC = False
 31 | 
 32 | # common params for NETWORK
 33 | _C.MODEL = CN()
 34 | _C.MODEL.PRETRAINED = ''
 35 | _C.MODEL.INIT_WEIGHTS = False
 36 | _C.MODEL.NAME = 'res_50'
 37 | # 0 for no conv layer, -1 for defaults setting, 64 for resnets and 256 for dla
 38 | _C.MODEL.HEAD_CONV = 64
 39 | _C.MODEL.INTERMEDIATE_CHANNEL = 64
 40 | _C.MODEL.NUM_STACKS = 1
 41 | _C.MODEL.HEADS_NAME = 'keypoint'
 42 | _C.MODEL.HEADS_NUM = [1, 2, 34, 2, 17, 2]
 43 | _C.MODEL.DOWN_RATIO = 4
 44 | _C.MODEL.INPUT_RES = 512
 45 | _C.MODEL.OUTPUT_RES = 128
 46 | _C.MODEL.INPUT_H = 512
 47 | _C.MODEL.INPUT_W = 512
 48 | _C.MODEL.PAD = 31
 49 | _C.MODEL.NUM_CLASSES = 1
 50 | _C.MODEL.NUM_KEYPOINTS = 17
 51 | _C.MODEL.TAG_PER_JOINT = True
 52 | _C.MODEL.TARGET_TYPE = 'gaussian'
 53 | _C.MODEL.SIGMA = 2
 54 | _C.MODEL.CENTER_THRESH = 0.1
 55 | _C.MODEL.EXTRA = CN(new_allowed=True)
 56 | 
 57 | _C.LOSS = CN()
 58 | _C.LOSS.METRIC = 'loss'
 59 | _C.LOSS.MSE_LOSS = False
 60 | _C.LOSS.USE_OHKM = False
 61 | _C.LOSS.TOPK = 8
 62 | _C.LOSS.USE_TARGET_WEIGHT = True
 63 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False
 64 | 
 65 | # multi pose
 66 | _C.LOSS.HP_WEIGHT = 1.
 67 | _C.LOSS.HM_WEIGHT = 1.
 68 | _C.LOSS.REG_LOSS = 'l1'
 69 | _C.LOSS.HM_HP_WEIGHT = 1.
 70 | _C.LOSS.DENSE_HP = False
 71 | _C.LOSS.HM_HP = True
 72 | _C.LOSS.REG_HP_OFFSET = True
 73 | _C.LOSS.REG_BBOX = True
 74 | _C.LOSS.WH_WEIGHT = 0.1
 75 | _C.LOSS.REG_OFFSET = True
 76 | _C.LOSS.OFF_WEIGHT = 1.
 77 | 
 78 | 
 79 | # DATASET related params
 80 | _C.DATASET = CN()
 81 | _C.DATASET.DATASET = 'coco_hp'
 82 | _C.DATASET.TRAIN_SET = 'train'
 83 | _C.DATASET.TEST_SET = 'valid'
 84 | _C.DATASET.TRAIN_IMAGE_DIR = 'images/train2017'
 85 | _C.DATASET.TRAIN_ANNOTATIONS = ['person_keypoints_train2017.json']
 86 | _C.DATASET.VAL_IMAGE_DIR = 'images/val2017'
 87 | _C.DATASET.VAL_ANNOTATIONS = 'person_keypoints_val2017.json'
 88 | # training data augmentation
 89 | _C.DATASET.MEAN = [0.408, 0.447, 0.470]
 90 | _C.DATASET.STD = [0.289, 0.274, 0.278]
 91 | _C.DATASET.RANDOM_CROP = True
 92 | _C.DATASET.SHIFT = 0.1
 93 | _C.DATASET.SCALE = 0.4
 94 | _C.DATASET.ROTATE = 0.
 95 | # for pose
 96 | _C.DATASET.AUG_ROT = 0.
 97 | _C.DATASET.FLIP = 0.5
 98 | _C.DATASET.NO_COLOR_AUG = False
 99 | _C.DATASET.ROT_FACTOR = 30
100 | _C.DATASET.SCALE_MIN = 0.5
101 | _C.DATASET.SCALE_MAX = 1.1
102 | _C.DATASET.IMAGE_SIZE = 512
103 | 
104 | # train
105 | _C.TRAIN = CN()
106 | 
107 | _C.TRAIN.DISTRIBUTE = True
108 | _C.TRAIN.LOCAL_RANK = 0
109 | _C.TRAIN.HIDE_DATA_TIME = False
110 | _C.TRAIN.SAVE_ALL_MODEL = False
111 | _C.TRAIN.RESUME = False
112 | _C.TRAIN.LR_FACTOR = 0.1
113 | _C.TRAIN.LR_STEP = [90, 120]
114 | _C.TRAIN.EPOCHS = 140
115 | _C.TRAIN.NUM_ITERS = -1
116 | _C.TRAIN.LR = 1.25e-4
117 | _C.TRAIN.BATCH_SIZE = 32
118 | _C.TRAIN.MASTER_BATCH_SIZE = -1
119 | 
120 | 
121 | _C.TRAIN.OPTIMIZER = 'adam'
122 | _C.TRAIN.MOMENTUM = 0.9
123 | _C.TRAIN.WD = 0.0001
124 | _C.TRAIN.NESTEROV = False
125 | _C.TRAIN.GAMMA1 = 0.99
126 | _C.TRAIN.GAMMA2 = 0.0
127 | 
128 | 
129 | # 'apply and reset gradients every n batches'
130 | _C.TRAIN.STRIDE_APPLY = 1
131 | 
132 | _C.TRAIN.RESUME = False
133 | _C.TRAIN.CHECKPOINT = ''
134 | _C.TRAIN.SHUFFLE = True
135 | _C.TRAIN.VAL_INTERVALS = 5
136 | _C.TRAIN.TRAINVAL = False
137 | 
138 | # testing
139 | _C.TEST = CN()
140 | # size of images for each device
141 | _C.TEST.BATCH_SIZE_PER_GPU = 32
142 | # Test Model Epoch
143 | _C.TEST.FLIP_TEST = False
144 | _C.TEST.TASK = 'multi_pose'
145 | _C.TEST.MODEL_PATH = ''
146 | _C.TEST.DEMO_FILE = ''
147 | _C.TEST.MODEL_FILE = ''
148 | _C.TEST.TEST_SCALES = [1]
149 | _C.TEST.IMAGE_THRE = 0.1
150 | _C.TEST.TOPK = 100
151 | _C.TEST.NMS = False
152 | _C.TEST.NMS_THRE = 0.5
153 | _C.TEST.NOT_PREFETCH_TEST = False
154 | _C.TEST.FIX_RES = True
155 | _C.TEST.KEEP_RES = False
156 | 
157 | _C.TEST.SOFT_NMS = False
158 | _C.TEST.OKS_THRE = 0.5
159 | _C.TEST.VIS_THRESH = 0.3
160 | _C.TEST.KEYPOINT_THRESH = 0.2
161 | _C.TEST.NUM_MIN_KPT = 4
162 | _C.TEST.THRESH_HUMAN = 0.4
163 | 
164 | _C.TEST.EVAL_ORACLE_HM = False
165 | _C.TEST.EVAL_ORACLE_WH = False
166 | _C.TEST.EVAL_ORACLE_OFFSET = False
167 | _C.TEST.EVAL_ORACLE_KPS = False
168 | _C.TEST.EVAL_ORACLE_HMHP = False
169 | _C.TEST.EVAL_ORACLE_HP_OFFSET = False
170 | _C.TEST.EVAL_ORACLE_DEP = False
171 | 
172 | 
173 | def update_config(cfg, args_cfg):
174 | 
175 |     cfg.defrost()
176 |     cfg.merge_from_file(args_cfg)
177 |     cfg.freeze()
178 | 
179 | 
180 | if __name__ == '__main__':
181 |     import sys
182 | 
183 |     with open(sys.argv[1], 'w') as f:
184 |         print(_C, file=f)
185 | 


--------------------------------------------------------------------------------
/lib/datasets/coco_hp.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import json
  4 | import os
  5 | import time
  6 | 
  7 | import numpy as np
  8 | import pycocotools.coco as coco
  9 | import torch.utils.data as data
 10 | from pycocotools.cocoeval import COCOeval
 11 | 
 12 | 
 13 | class COCOHP(data.Dataset):
 14 |     num_classes = 1
 15 |     num_joints = 17
 16 |     flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 
 17 |               [11, 12], [13, 14], [15, 16]]
 18 |               
 19 |     def __init__(self, cfg, split):
 20 |         super(COCOHP, self).__init__()
 21 | 
 22 |         self.data_dir = os.path.join(cfg.DATA_DIR, 'coco')
 23 |         self.img_dir = os.path.join(self.data_dir, 'images', '{}2017'.format(split))
 24 |         if split == 'test':
 25 |             self.annot_path = os.path.join(
 26 |             self.data_dir, 'annotations', 
 27 |             'image_info_test-dev2017.json').format(split)
 28 |         else:
 29 |             self.annot_path = os.path.join(
 30 |             self.data_dir, 'annotations', 
 31 |             'person_keypoints_{}2017.json').format(split)
 32 |         self.max_objs = 32
 33 |         self._valid_ids = [1]
 34 |         self.class_name = ['__background__', 'person']        
 35 |         self._data_rng = np.random.RandomState(123)
 36 |         self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
 37 |                                  dtype=np.float32)
 38 |         self._eig_vec = np.array([
 39 |             [-0.58752847, -0.69563484, 0.41340352],
 40 |             [-0.5832747, 0.00994535, -0.81221408],
 41 |             [-0.56089297, 0.71832671, 0.41158938]
 42 |         ], dtype=np.float32)
 43 |         self.split = split
 44 |         self.cfg = cfg
 45 | 
 46 |         print('==> initializing coco 2017 {} data.'.format(split))
 47 |         self.coco = coco.COCO(self.annot_path)
 48 |         images = self.coco.getImgIds()
 49 |         catIds = self.coco.getCatIds(self.class_name[-1])
 50 |         assert catIds == self._valid_ids
 51 |         self.images = self.coco.getImgIds(images,catIds)
 52 |         self.num_samples = len(self.images)
 53 | 
 54 |         print('Loaded {} {} samples'.format(split, self.num_samples))
 55 | 
 56 |     def _to_float(self, x):
 57 |         return float("{:.2f}".format(x))
 58 | 
 59 |     def convert_eval_format(self, all_bboxes):
 60 |         detections = []
 61 |         for image_id in all_bboxes:
 62 |             category_id = 1
 63 |             for dets in all_bboxes[image_id][category_id]:
 64 |                 bbox = dets[:4]
 65 |                 bbox[2] -= bbox[0]
 66 |                 bbox[3] -= bbox[1]
 67 |                 score = dets[4]
 68 |                 keypoint_prob = np.array(np.array(dets[39:56])>0.1).astype(np.int32).reshape(17,1)
 69 |                 keypoints = np.array(dets[5:39], dtype=np.float32).reshape(-1, 2)
 70 |                 bbox_out  = list(map(self._to_float, bbox))
 71 |                 keypoints_pred = np.concatenate([
 72 |                 keypoints, keypoint_prob], axis=1).reshape(51).tolist()
 73 |                 keypoints_pred  = list(map(self._to_float, keypoints_pred))
 74 | 
 75 |                 detection = {
 76 |                   "image_id": int(image_id),
 77 |                   "category_id": int(category_id),
 78 |                   "bbox": bbox_out,
 79 |                   "score": float("{:.2f}".format(score)),
 80 |                   "keypoints": keypoints_pred
 81 |                 }
 82 |                 detections.append(detection)
 83 |         return detections
 84 | 
 85 |     def __len__(self):
 86 |         return self.num_samples
 87 | 
 88 |     def save_results(self, results, save_dir):
 89 |         json.dump(self.convert_eval_format(results), 
 90 |               open('{}/results.json'.format(save_dir), 'w'))
 91 | 
 92 | 
 93 |     def run_eval(self, results, save_dir):
 94 |         #self.save_results(results, save_dir)
 95 |         #seconds = time.time()
 96 |         #local_time = time.ctime(seconds).replace(' ', '_').replace(':','_')
 97 |         #coco_dets = self.coco.loadRes('{}/{}_results.json'.format(save_dir, local_time))
 98 |         coco_dets = self.coco.loadRes(self.convert_eval_format(results))        
 99 |         #coco_eval = COCOeval(self.coco, coco_dets, "bbox")
100 |         #coco_eval.evaluate()
101 |         #coco_eval.accumulate()
102 |    
103 |         coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
104 |         coco_eval.evaluate()
105 |         coco_eval.accumulate()
106 |         coco_eval.summarize()
107 |         return coco_eval.stats[0]
108 | 


--------------------------------------------------------------------------------
/lib/datasets/data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | COCO_PERSON_SKELETON = [
 4 |     (16, 14), (14, 12), (17, 15), (15, 13), (12, 13), (6, 12), (7, 13),
 5 |     (6, 7), (6, 8), (7, 9), (8, 10), (9, 11), (2, 3), (1, 2), (1, 3),
 6 |     (2, 4), (3, 5), (4, 6), (5, 7),
 7 | ]
 8 | 
 9 | COCO_KEYPOINTS = [
10 |     'nose',            # 1
11 |     'left_eye',        # 2
12 |     'right_eye',       # 3
13 |     'left_ear',        # 4
14 |     'right_ear',       # 5
15 |     'left_shoulder',   # 6
16 |     'right_shoulder',  # 7
17 |     'left_elbow',      # 8
18 |     'right_elbow',     # 9
19 |     'left_wrist',      # 10
20 |     'right_wrist',     # 11
21 |     'left_hip',        # 12
22 |     'right_hip',       # 13
23 |     'left_knee',       # 14
24 |     'right_knee',      # 15
25 |     'left_ankle',      # 16
26 |     'right_ankle',     # 17
27 | ]
28 | 
29 | 
30 | HFLIP = {
31 |     'left_eye': 'right_eye',
32 |     'right_eye': 'left_eye',
33 |     'left_ear': 'right_ear',
34 |     'right_ear': 'left_ear',
35 |     'left_shoulder': 'right_shoulder',
36 |     'right_shoulder': 'left_shoulder',
37 |     'left_elbow': 'right_elbow',
38 |     'right_elbow': 'left_elbow',
39 |     'left_wrist': 'right_wrist',
40 |     'right_wrist': 'left_wrist',
41 |     'left_hip': 'right_hip',
42 |     'right_hip': 'left_hip',
43 |     'left_knee': 'right_knee',
44 |     'right_knee': 'left_knee',
45 |     'left_ankle': 'right_ankle',
46 |     'right_ankle': 'left_ankle',
47 | }
48 | 


--------------------------------------------------------------------------------
/lib/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | from .coco_hp import COCOHP
 4 | from .multi_pose import MultiPoseDataset
 5 | 
 6 | dataset_factory = {
 7 |   'coco_hp': COCOHP,
 8 | }
 9 | 
10 | _sample_factory = {
11 |   'multi_pose': MultiPoseDataset,
12 | }
13 | 
14 | 
15 | def get_dataset(dataset, task):
16 |   class Dataset(dataset_factory[dataset], _sample_factory[task]):
17 |     pass
18 |   return Dataset
19 | 


--------------------------------------------------------------------------------
/lib/detectors/base_detector.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import time
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | import torch
  8 | from progress.bar import Bar
  9 | 
 10 | from models.model import create_model, load_model
 11 | from utils.debugger import Debugger
 12 | from utils.image import get_affine_transform
 13 | 
 14 | 
 15 | class BaseDetector(object):
 16 |     def __init__(self, cfg):
 17 |     
 18 |         print('Creating model...')
 19 |         self.model = create_model(cfg.MODEL.NAME, cfg.MODEL.HEAD_CONV, cfg)
 20 |         self.model = load_model(self.model, cfg.TEST.MODEL_PATH)
 21 |         self.model = self.model.to(torch.device('cuda'))
 22 |         self.model.eval()
 23 | 
 24 |         self.mean = np.array(cfg.DATASET.MEAN, dtype=np.float32).reshape(1, 1, 3)
 25 |         self.std = np.array(cfg.DATASET.STD, dtype=np.float32).reshape(1, 1, 3)
 26 |         self.max_per_image = 100
 27 |         self.num_classes = cfg.MODEL.NUM_CLASSES
 28 |         self.scales = cfg.TEST.TEST_SCALES
 29 |         self.cfg = cfg
 30 |         self.pause = True
 31 | 
 32 |     def pre_process(self, image, scale, meta=None):
 33 |         height, width = image.shape[0:2]
 34 | 
 35 |         new_height = int(height * scale)
 36 |         new_width  = int(width * scale)
 37 |         if self.cfg.TEST.FIX_RES:
 38 |             inp_height, inp_width = self.cfg.MODEL.INPUT_H, self.cfg.MODEL.INPUT_W
 39 |             c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
 40 |             s = max(height, width) * 1.0
 41 |         else:
 42 |             inp_height = (new_height | self.cfg.MODEL.PAD) + 1
 43 |             inp_width = (new_width | self.cfg.MODEL.PAD) + 1
 44 |             c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
 45 |             s = np.array([inp_width, inp_height], dtype=np.float32)
 46 | 
 47 |         trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
 48 |         resized_image = cv2.resize(image, (new_width, new_height))
 49 |         inp_image = cv2.warpAffine(
 50 |             resized_image, trans_input, (inp_width, inp_height),
 51 |             flags=cv2.INTER_LINEAR)
 52 | 
 53 |         inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32)
 54 | 
 55 |         images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width)
 56 |         if self.cfg.TEST.FLIP_TEST:
 57 |             images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
 58 |         images = torch.from_numpy(images)
 59 |         meta = {'c': c, 's': s, 
 60 |                 'out_height': inp_height // self.cfg.MODEL.DOWN_RATIO, 
 61 |                 'out_width': inp_width // self.cfg.MODEL.DOWN_RATIO}
 62 |         return images, meta
 63 | 
 64 |     def process(self, images, return_time=False):
 65 |         raise NotImplementedError
 66 | 
 67 |     def post_process(self, dets, meta, scale=1):
 68 |         raise NotImplementedError
 69 | 
 70 |     def merge_outputs(self, detections):
 71 |         raise NotImplementedError
 72 | 
 73 |     def debug(self, debugger, images, dets, output, scale=1):
 74 |         raise NotImplementedError
 75 | 
 76 |     def show_results(self, debugger, image, results):
 77 |         raise NotImplementedError
 78 | 
 79 |     def run(self, image_or_path_or_tensor, meta=None):
 80 |         load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
 81 |         merge_time, tot_time = 0, 0
 82 |         debugger = Debugger((self.cfg.DEBUG==3), theme=self.cfg.DEBUG_THEME, 
 83 |                    num_classes=self.cfg.MODEL.NUM_CLASSES, dataset=self.cfg.SAMPLE_METHOD, down_ratio=self.cfg.MODEL.DOWN_RATIO)
 84 |         start_time = time.time()
 85 |         pre_processed = False
 86 |         if isinstance(image_or_path_or_tensor, np.ndarray):
 87 |             image = image_or_path_or_tensor
 88 |         elif type(image_or_path_or_tensor) == type (''): 
 89 |             image = cv2.imread(image_or_path_or_tensor)
 90 |         else:
 91 |             image = image_or_path_or_tensor['image'][0].numpy()
 92 |             pre_processed_images = image_or_path_or_tensor
 93 |             pre_processed = True
 94 | 
 95 |         loaded_time = time.time()
 96 |         load_time += (loaded_time - start_time)
 97 | 
 98 |         detections = []
 99 |         for scale in self.scales:
100 |             scale_start_time = time.time()
101 |             if not pre_processed:
102 |                 images, meta = self.pre_process(image, scale, meta)
103 |             else:
104 |                 images = pre_processed_images['images'][scale][0]
105 |                 meta = pre_processed_images['meta'][scale]
106 |                 meta = {k: v.numpy()[0] for k, v in meta.items()}
107 |             images = images.to(torch.device('cuda'))
108 |             torch.cuda.synchronize()
109 |             pre_process_time = time.time()
110 |             pre_time += pre_process_time - scale_start_time
111 | 
112 |             output, dets, forward_time = self.process(images, return_time=True)
113 | 
114 |             torch.cuda.synchronize()
115 |             net_time += forward_time - pre_process_time
116 |             decode_time = time.time()
117 |             dec_time += decode_time - forward_time
118 | 
119 |             if self.cfg.DEBUG >= 2:
120 |                 self.debug(debugger, images, dets, output, scale)
121 | 
122 |             dets= self.post_process(dets, meta, scale)
123 |             torch.cuda.synchronize()
124 |             post_process_time = time.time()
125 |             post_time += post_process_time - decode_time
126 | 
127 |             detections.append(dets)
128 |     
129 |         results = self.merge_outputs(detections)
130 |         torch.cuda.synchronize()
131 |         end_time = time.time()
132 |         merge_time += end_time - post_process_time
133 |         tot_time += end_time - start_time
134 | 
135 |         if self.cfg.DEBUG >= 1:
136 |             self.show_results(debugger, image, results)
137 | 
138 |         return {'results': {1:results}, 'tot': tot_time, 'load': load_time,
139 |                 'pre': pre_time, 'net': net_time, 'dec': dec_time,
140 |                 'post': post_time, 'merge': merge_time}
141 | 


--------------------------------------------------------------------------------
/lib/detectors/detector_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | 
3 | from .multi_pose import MultiPoseDetector
4 | 
5 | detector_factory = {
6 |   'multi_pose': MultiPoseDetector, 
7 | }
8 | 


--------------------------------------------------------------------------------
/lib/detectors/multi_pose.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import time
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | import torch
  8 | from progress.bar import Bar
  9 | 
 10 | from models.decode import multi_pose_decode
 11 | from models.utils import flip_lr, flip_lr_off, flip_tensor
 12 | from utils.debugger import Debugger
 13 | from utils.image import get_affine_transform
 14 | from utils.post_process import multi_pose_post_process
 15 | from .base_detector import BaseDetector
 16 | 
 17 | try:
 18 |   from external.nms import soft_nms_39
 19 | except:
 20 |   print('NMS not imported! If you need it,'
 21 |         ' do \n cd $CenterNet_ROOT/src/lib/external \n make')
 22 | 
 23 | 
 24 | class MultiPoseDetector(BaseDetector):
 25 |     def __init__(self, cfg):
 26 |         super(MultiPoseDetector, self).__init__(cfg)
 27 |         self.flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
 28 | 
 29 |     def process(self, images, return_time=False):
 30 |         with torch.no_grad():
 31 |             torch.cuda.synchronize()
 32 |             outputs = self.model(images)
 33 |             hm, wh, hps, reg, hm_hp, hp_offset = outputs            
 34 | 
 35 |             hm = hm.sigmoid_()
 36 |             if self.cfg.LOSS.HM_HP and not self.cfg.LOSS.MSE_LOSS:
 37 |                 hm_hp = hm_hp.sigmoid_()
 38 | 
 39 |             reg = reg if self.cfg.LOSS.REG_OFFSET else None
 40 |             hm_hp = hm_hp if self.cfg.LOSS.HM_HP else None
 41 |             hp_offset = hp_offset if self.cfg.LOSS.REG_HP_OFFSET else None
 42 |             torch.cuda.synchronize()
 43 |             forward_time = time.time()
 44 | 
 45 |             if self.cfg.TEST.FLIP_TEST:
 46 |                 hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2
 47 |                 wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2
 48 |                 hps = (hps[0:1] + 
 49 |                   flip_lr_off(hps[1:2], self.flip_idx)) / 2
 50 |                 hm_hp = (hm_hp[0:1] + flip_lr(hm_hp[1:2], self.flip_idx)) / 2 \
 51 |                         if hm_hp is not None else None
 52 |                 reg = reg[0:1] if reg is not None else None
 53 |                 hp_offset = hp_offset[0:1] if hp_offset is not None else None
 54 | 
 55 |             dets = multi_pose_decode(hm, wh, hps, reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.cfg.TEST.TOPK)
 56 | 
 57 |         if return_time:
 58 |             return outputs, dets, forward_time
 59 |         else:
 60 |             return outputs, dets
 61 | 
 62 |     def post_process(self, dets, meta, scale=1):
 63 |         dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
 64 |         dets= multi_pose_post_process(
 65 |           dets.copy(), [meta['c']], [meta['s']],
 66 |           meta['out_height'], meta['out_width'])
 67 |         for j in range(1, self.num_classes + 1):
 68 |             dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 56)
 69 |             dets[0][j][:, :4] /= scale
 70 |             dets[0][j][:, 5:39] /= scale
 71 |         return dets[0]
 72 | 
 73 |     def merge_outputs(self, detections):
 74 |         results = np.concatenate(
 75 |             [detection[1] for detection in detections], axis=0).astype(np.float32)           
 76 |         if self.cfg.TEST.NMS or len(self.cfg.TEST.TEST_SCALES) > 1:
 77 |             soft_nms_39(results, Nt=0.5, method=2)
 78 |         results = results.tolist()
 79 |         return results
 80 |         
 81 | 
 82 |     def debug(self, debugger, images, dets, output, scale=1):
 83 |         dets = dets.detach().cpu().numpy().copy()
 84 |         dets[:, :, :4] *= self.cfg.MODEL.DOWN_RATIO
 85 |         dets[:, :, 5:39] *= self.cfg.MODEL.DOWN_RATIO
 86 |         img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
 87 |         img = np.clip(((
 88 |           img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8)
 89 |         pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
 90 |         debugger.add_blend_img(img, pred, 'pred_hm')
 91 |         if self.LOSS.HM_HP:
 92 |             pred = debugger.gen_colormap_hp(
 93 |                 output['hm_hp'][0].detach().cpu().numpy())
 94 |             debugger.add_blend_img(img, pred, 'pred_hmhp')
 95 |   
 96 |     def show_results(self, debugger, image, results):
 97 |         debugger.add_img(image, img_id='multi_pose')
 98 |         for b_id, detection in enumerate(results):        
 99 |             bbox = detection[:4]
100 |             bbox_prob = detection[4]
101 |             keypoints = detection[5:39]
102 |             keypoints_prob = detection[39:]
103 |             if bbox_prob > self.cfg.TEST.VIS_THRESH:
104 |                 debugger.add_coco_bbox(bbox, 0, bbox_prob, img_id='multi_pose')              
105 |                 debugger.add_coco_hp(keypoints, keypoints_prob, img_id='multi_pose')  
106 |                     
107 |         debugger.show_all_imgs(pause=self.pause)
108 | 


--------------------------------------------------------------------------------
/lib/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 | 
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 | 


--------------------------------------------------------------------------------
/lib/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/lib/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/external/__init__.py


--------------------------------------------------------------------------------
/lib/external/build/temp.linux-x86_64-3.6/nms.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/external/build/temp.linux-x86_64-3.6/nms.o


--------------------------------------------------------------------------------
/lib/external/make.sh:
--------------------------------------------------------------------------------
1 | python setup.py build_ext --inplace
2 | 


--------------------------------------------------------------------------------
/lib/external/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | 
 4 | import numpy
 5 | from Cython.Build import cythonize
 6 | 
 7 | extensions = [
 8 |     Extension(
 9 |         "nms", 
10 |         ["nms.pyx"],
11 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
12 |     )
13 | ]
14 | 
15 | setup(
16 |     name="coco",
17 |     ext_modules=cythonize(extensions),
18 |     include_dirs=[numpy.get_include()]
19 | )
20 | 


--------------------------------------------------------------------------------
/lib/logger.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 4 | import os
 5 | import sys
 6 | import time
 7 | 
 8 | import torch
 9 | 
10 | USE_TENSORBOARD = False
11 | try:
12 |   import tensorboardX
13 |   print('Using tensorboardX')
14 | except:
15 |   USE_TENSORBOARD = False
16 | 
17 | class Logger(object):
18 |   def __init__(self, cfg):
19 |     """Create a summary writer logging to log_dir."""
20 |     if not os.path.exists(cfg.OUTPUT_DIR):
21 |       try:
22 |         os.makedirs(cfg.OUTPUT_DIR)
23 |       except:
24 |         pass
25 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
26 | 
27 |     file_name = os.path.join(cfg.OUTPUT_DIR, 'opt.txt')
28 |     with open(file_name, 'wt') as opt_file:
29 |       opt_file.write('==> torch version: {}\n'.format(torch.__version__))
30 |       opt_file.write('==> cudnn version: {}\n'.format(
31 |         torch.backends.cudnn.version()))
32 |       opt_file.write('==> Cmd:\n')
33 |       opt_file.write(str(sys.argv))
34 |       opt_file.write('\n==> Opt:\n')
35 |           
36 |     log_dir = cfg.OUTPUT_DIR + '/logs_{}'.format(time_str)
37 |     if USE_TENSORBOARD:
38 |       self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
39 |     else:
40 |       try:
41 |         os.makedirs(os.path.dirname(log_dir))
42 |       except:
43 |         pass
44 |       try:
45 |         os.makedirs(log_dir)
46 |       except:
47 |         pass
48 |     self.log = open(log_dir + '/log.txt', 'w')
49 |     try:
50 |       os.system('cp {}/opt.txt {}/'.format(cfg.OUTPUT_DIR, log_dir))
51 |     except:
52 |       pass
53 |     self.start_line = True
54 | 
55 |   def write(self, txt):
56 |     if self.start_line:
57 |       time_str = time.strftime('%Y-%m-%d-%H-%M')
58 |       self.log.write('{}: {}'.format(time_str, txt))
59 |     else:
60 |       self.log.write(txt)  
61 |     self.start_line = False
62 |     if '\n' in txt:
63 |       self.start_line = True
64 |       self.log.flush()
65 |   
66 |   def close(self):
67 |     self.log.close()
68 |   
69 |   def scalar_summary(self, tag, value, step):
70 |     """Log a scalar variable."""
71 |     if USE_TENSORBOARD:
72 |       self.writer.add_scalar(tag, value, step)
73 | 


--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Charles Shang
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/README.md:
--------------------------------------------------------------------------------
 1 | ## Deformable Convolutional Networks V2 with Pytorch 1.0
 2 | 
 3 | ### Build
 4 | ```bash
 5 |     ./make.sh         # build
 6 |     python test.py    # run examples and gradient check 
 7 | ```
 8 | 
 9 | ### An Example
10 | - deformable conv
11 | ```python
12 |     from dcn_v2 import DCN
13 |     input = torch.randn(2, 64, 128, 128).cuda()
14 |     # wrap all things (offset and mask) in DCN
15 |     dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
16 |     output = dcn(input)
17 |     print(output.shape)
18 | ```
19 | - deformable roi pooling
20 | ```python
21 |     from dcn_v2 import DCNPooling
22 |     input = torch.randn(2, 32, 64, 64).cuda()
23 |     batch_inds = torch.randint(2, (20, 1)).cuda().float()
24 |     x = torch.randint(256, (20, 1)).cuda().float()
25 |     y = torch.randint(256, (20, 1)).cuda().float()
26 |     w = torch.randint(64, (20, 1)).cuda().float()
27 |     h = torch.randint(64, (20, 1)).cuda().float()
28 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
29 | 
30 |     # mdformable pooling (V2)
31 |     # wrap all things (offset and mask) in DCNPooling
32 |     dpooling = DCNPooling(spatial_scale=1.0 / 4,
33 |                          pooled_size=7,
34 |                          output_dim=32,
35 |                          no_trans=False,
36 |                          group_size=1,
37 |                          trans_std=0.1).cuda()
38 | 
39 |     dout = dpooling(input, rois)
40 | ```
41 | ### Note
42 | Now the master branch is for pytorch 1.0 (new ATen API), you can switch back to pytorch 0.4 with,
43 | ```bash
44 | git checkout pytorch_0.4
45 | ```
46 | 
47 | ### Known Issues:
48 | 
49 | - [x] Gradient check w.r.t offset (solved)
50 | - [ ] Backward is not reentrant (minor)
51 | 
52 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
53 | 
54 | <s>I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
55 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
56 | non-differential points? </s>
57 | 
58 | Update: all gradient check passes with double precision. 
59 | 
60 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 
61 | float `<1e-15` for double), 
62 | so it may not be a serious problem (?)
63 | 
64 | Please post an issue or PR if you have any comments.
65 |     


--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/DCNv2/__init__.py


--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python setup.py build develop
3 | 


--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import glob
 4 | import os
 5 | 
 6 | import torch
 7 | from setuptools import find_packages, setup
 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
 9 | 
10 | requirements = ["torch", "torchvision"]
11 | 
12 | def get_extensions():
13 |     this_dir = os.path.dirname(os.path.abspath(__file__))
14 |     extensions_dir = os.path.join(this_dir, "src")
15 | 
16 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
17 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
18 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
19 | 
20 |     sources = main_file + source_cpu
21 |     extension = CppExtension
22 |     extra_compile_args = {"cxx": []}
23 |     define_macros = []
24 | 
25 |     if torch.cuda.is_available() and CUDA_HOME is not None:
26 |         extension = CUDAExtension
27 |         sources += source_cuda
28 |         define_macros += [("WITH_CUDA", None)]
29 |         extra_compile_args["nvcc"] = [
30 |             "-DCUDA_HAS_FP16=1",
31 |             "-D__CUDA_NO_HALF_OPERATORS__",
32 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
33 |             "-D__CUDA_NO_HALF2_OPERATORS__",
34 |         ]
35 |     else:
36 |         raise NotImplementedError('Cuda is not availabel')
37 | 
38 |     sources = [os.path.join(extensions_dir, s) for s in sources]
39 |     include_dirs = [extensions_dir]
40 |     ext_modules = [
41 |         extension(
42 |             "_ext",
43 |             sources,
44 |             include_dirs=include_dirs,
45 |             define_macros=define_macros,
46 |             extra_compile_args=extra_compile_args,
47 |         )
48 |     ]
49 |     return ext_modules
50 | 
51 | setup(
52 |     name="DCNv2",
53 |     version="0.1",
54 |     author="charlesshang",
55 |     url="https://github.com/charlesshang/DCNv2",
56 |     description="deformable convolutional networks",
57 |     packages=find_packages(exclude=("configs", "tests",)),
58 |     # install_requires=requirements,
59 |     ext_modules=get_extensions(),
60 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
61 | )
62 | 


--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/cpu/dcn_v2_cpu.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | 
 6 | 
 7 | at::Tensor
 8 | dcn_v2_cpu_forward(const at::Tensor &input,
 9 |                    const at::Tensor &weight,
10 |                    const at::Tensor &bias,
11 |                    const at::Tensor &offset,
12 |                    const at::Tensor &mask,
13 |                    const int kernel_h,
14 |                    const int kernel_w,
15 |                    const int stride_h,
16 |                    const int stride_w,
17 |                    const int pad_h,
18 |                    const int pad_w,
19 |                    const int dilation_h,
20 |                    const int dilation_w,
21 |                    const int deformable_group)
22 | {
23 |     AT_ERROR("Not implement on cpu");
24 | }
25 | 
26 | std::vector<at::Tensor>
27 | dcn_v2_cpu_backward(const at::Tensor &input,
28 |                     const at::Tensor &weight,
29 |                     const at::Tensor &bias,
30 |                     const at::Tensor &offset,
31 |                     const at::Tensor &mask,
32 |                     const at::Tensor &grad_output,
33 |                     int kernel_h, int kernel_w,
34 |                     int stride_h, int stride_w,
35 |                     int pad_h, int pad_w,
36 |                     int dilation_h, int dilation_w,
37 |                     int deformable_group)
38 | {
39 |     AT_ERROR("Not implement on cpu");
40 | }
41 | 
42 | std::tuple<at::Tensor, at::Tensor>
43 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
44 |                                  const at::Tensor &bbox,
45 |                                  const at::Tensor &trans,
46 |                                  const int no_trans,
47 |                                  const float spatial_scale,
48 |                                  const int output_dim,
49 |                                  const int group_size,
50 |                                  const int pooled_size,
51 |                                  const int part_size,
52 |                                  const int sample_per_part,
53 |                                  const float trans_std)
54 | {
55 |     AT_ERROR("Not implement on cpu");
56 | }
57 | 
58 | std::tuple<at::Tensor, at::Tensor>
59 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
60 |                                   const at::Tensor &input,
61 |                                   const at::Tensor &bbox,
62 |                                   const at::Tensor &trans,
63 |                                   const at::Tensor &top_count,
64 |                                   const int no_trans,
65 |                                   const float spatial_scale,
66 |                                   const int output_dim,
67 |                                   const int group_size,
68 |                                   const int pooled_size,
69 |                                   const int part_size,
70 |                                   const int sample_per_part,
71 |                                   const float trans_std)
72 | {
73 |     AT_ERROR("Not implement on cpu");
74 | }


--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | at::Tensor
 5 | dcn_v2_cpu_forward(const at::Tensor &input,
 6 |                     const at::Tensor &weight,
 7 |                     const at::Tensor &bias,
 8 |                     const at::Tensor &offset,
 9 |                     const at::Tensor &mask,
10 |                     const int kernel_h,
11 |                     const int kernel_w,
12 |                     const int stride_h,
13 |                     const int stride_w,
14 |                     const int pad_h,
15 |                     const int pad_w,
16 |                     const int dilation_h,
17 |                     const int dilation_w,
18 |                     const int deformable_group);
19 | 
20 | std::vector<at::Tensor>
21 | dcn_v2_cpu_backward(const at::Tensor &input,
22 |                      const at::Tensor &weight,
23 |                      const at::Tensor &bias,
24 |                      const at::Tensor &offset,
25 |                      const at::Tensor &mask,
26 |                      const at::Tensor &grad_output,
27 |                      int kernel_h, int kernel_w,
28 |                      int stride_h, int stride_w,
29 |                      int pad_h, int pad_w,
30 |                      int dilation_h, int dilation_w,
31 |                      int deformable_group);
32 | 
33 | 
34 | std::tuple<at::Tensor, at::Tensor>
35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
36 |                                   const at::Tensor &bbox,
37 |                                   const at::Tensor &trans,
38 |                                   const int no_trans,
39 |                                   const float spatial_scale,
40 |                                   const int output_dim,
41 |                                   const int group_size,
42 |                                   const int pooled_size,
43 |                                   const int part_size,
44 |                                   const int sample_per_part,
45 |                                   const float trans_std);
46 | 
47 | std::tuple<at::Tensor, at::Tensor>
48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
49 |                                    const at::Tensor &input,
50 |                                    const at::Tensor &bbox,
51 |                                    const at::Tensor &trans,
52 |                                    const at::Tensor &top_count,
53 |                                    const int no_trans,
54 |                                    const float spatial_scale,
55 |                                    const int output_dim,
56 |                                    const int group_size,
57 |                                    const int pooled_size,
58 |                                    const int part_size,
59 |                                    const int sample_per_part,
60 |                                    const float trans_std);


--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/cuda/dcn_v2_im2col_cuda.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /*!
  3 |  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  4 |  *
  5 |  * COPYRIGHT
  6 |  *
  7 |  * All contributions by the University of California:
  8 |  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
  9 |  * All rights reserved.
 10 |  *
 11 |  * All other contributions:
 12 |  * Copyright (c) 2014-2017, the respective contributors
 13 |  * All rights reserved.
 14 |  *
 15 |  * Caffe uses a shared copyright model: each contributor holds copyright over
 16 |  * their contributions to Caffe. The project versioning records all such
 17 |  * contribution and copyright details. If a contributor wants to further mark
 18 |  * their specific copyright on a particular contribution, they should indicate
 19 |  * their copyright solely in the commit message of the change when it is
 20 |  * committed.
 21 |  *
 22 |  * LICENSE
 23 |  *
 24 |  * Redistribution and use in source and binary forms, with or without
 25 |  * modification, are permitted provided that the following conditions are met:
 26 |  *
 27 |  * 1. Redistributions of source code must retain the above copyright notice, this
 28 |  * list of conditions and the following disclaimer.
 29 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 30 |  * this list of conditions and the following disclaimer in the documentation
 31 |  * and/or other materials provided with the distribution.
 32 |  *
 33 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 34 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 35 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 36 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 37 |  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 38 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 39 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 40 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 41 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 42 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 43 |  *
 44 |  * CONTRIBUTION AGREEMENT
 45 |  *
 46 |  * By contributing to the BVLC/caffe repository through pull-request, comment,
 47 |  * or otherwise, the contributor releases their content to the
 48 |  * license and copyright terms herein.
 49 |  *
 50 |  ***************** END Caffe Copyright Notice and Disclaimer ********************
 51 |  *
 52 |  * Copyright (c) 2018 Microsoft
 53 |  * Licensed under The MIT License [see LICENSE for details]
 54 |  * \file modulated_deformable_im2col.h
 55 |  * \brief Function definitions of converting an image to
 56 |  * column matrix based on kernel, padding, dilation, and offset.
 57 |  * These functions are mainly used in deformable convolution operators.
 58 |  * \ref: https://arxiv.org/abs/1811.11168
 59 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
 60 |  */
 61 | 
 62 | /***************** Adapted by Charles Shang *********************/
 63 | 
 64 | #ifndef DCN_V2_IM2COL_CUDA
 65 | #define DCN_V2_IM2COL_CUDA
 66 | 
 67 | #ifdef __cplusplus
 68 | extern "C"
 69 | {
 70 | #endif
 71 | 
 72 |   void modulated_deformable_im2col_cuda(cudaStream_t stream,
 73 |                                         const float *data_im, const float *data_offset, const float *data_mask,
 74 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 75 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 76 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 77 |                                         const int dilation_h, const int dilation_w,
 78 |                                         const int deformable_group, float *data_col);
 79 | 
 80 |   void modulated_deformable_col2im_cuda(cudaStream_t stream,
 81 |                                         const float *data_col, const float *data_offset, const float *data_mask,
 82 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 83 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 84 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 85 |                                         const int dilation_h, const int dilation_w,
 86 |                                         const int deformable_group, float *grad_im);
 87 | 
 88 |   void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
 89 |                                          const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
 90 |                                          const int batch_size, const int channels, const int height_im, const int width_im,
 91 |                                          const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 92 |                                          const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 93 |                                          const int dilation_h, const int dilation_w,
 94 |                                          const int deformable_group,
 95 |                                          float *grad_offset, float *grad_mask);
 96 | 
 97 | #ifdef __cplusplus
 98 | }
 99 | #endif
100 | 
101 | #endif


--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | at::Tensor
 5 | dcn_v2_cuda_forward(const at::Tensor &input,
 6 |                     const at::Tensor &weight,
 7 |                     const at::Tensor &bias,
 8 |                     const at::Tensor &offset,
 9 |                     const at::Tensor &mask,
10 |                     const int kernel_h,
11 |                     const int kernel_w,
12 |                     const int stride_h,
13 |                     const int stride_w,
14 |                     const int pad_h,
15 |                     const int pad_w,
16 |                     const int dilation_h,
17 |                     const int dilation_w,
18 |                     const int deformable_group);
19 | 
20 | std::vector<at::Tensor>
21 | dcn_v2_cuda_backward(const at::Tensor &input,
22 |                      const at::Tensor &weight,
23 |                      const at::Tensor &bias,
24 |                      const at::Tensor &offset,
25 |                      const at::Tensor &mask,
26 |                      const at::Tensor &grad_output,
27 |                      int kernel_h, int kernel_w,
28 |                      int stride_h, int stride_w,
29 |                      int pad_h, int pad_w,
30 |                      int dilation_h, int dilation_w,
31 |                      int deformable_group);
32 | 
33 | 
34 | std::tuple<at::Tensor, at::Tensor>
35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,
36 |                                   const at::Tensor &bbox,
37 |                                   const at::Tensor &trans,
38 |                                   const int no_trans,
39 |                                   const float spatial_scale,
40 |                                   const int output_dim,
41 |                                   const int group_size,
42 |                                   const int pooled_size,
43 |                                   const int part_size,
44 |                                   const int sample_per_part,
45 |                                   const float trans_std);
46 | 
47 | std::tuple<at::Tensor, at::Tensor>
48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,
49 |                                    const at::Tensor &input,
50 |                                    const at::Tensor &bbox,
51 |                                    const at::Tensor &trans,
52 |                                    const at::Tensor &top_count,
53 |                                    const int no_trans,
54 |                                    const float spatial_scale,
55 |                                    const int output_dim,
56 |                                    const int group_size,
57 |                                    const int pooled_size,
58 |                                    const int part_size,
59 |                                    const int sample_per_part,
60 |                                    const float trans_std);


--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/dcn_v2.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "cpu/vision.h"
  4 | 
  5 | #ifdef WITH_CUDA
  6 | #include "cuda/vision.h"
  7 | #endif
  8 | 
  9 | at::Tensor
 10 | dcn_v2_forward(const at::Tensor &input,
 11 |                const at::Tensor &weight,
 12 |                const at::Tensor &bias,
 13 |                const at::Tensor &offset,
 14 |                const at::Tensor &mask,
 15 |                const int kernel_h,
 16 |                const int kernel_w,
 17 |                const int stride_h,
 18 |                const int stride_w,
 19 |                const int pad_h,
 20 |                const int pad_w,
 21 |                const int dilation_h,
 22 |                const int dilation_w,
 23 |                const int deformable_group)
 24 | {
 25 |     if (input.type().is_cuda())
 26 |     {
 27 | #ifdef WITH_CUDA
 28 |         return dcn_v2_cuda_forward(input, weight, bias, offset, mask,
 29 |                                    kernel_h, kernel_w,
 30 |                                    stride_h, stride_w,
 31 |                                    pad_h, pad_w,
 32 |                                    dilation_h, dilation_w,
 33 |                                    deformable_group);
 34 | #else
 35 |         AT_ERROR("Not compiled with GPU support");
 36 | #endif
 37 |     }
 38 |     AT_ERROR("Not implemented on the CPU");
 39 | }
 40 | 
 41 | std::vector<at::Tensor>
 42 | dcn_v2_backward(const at::Tensor &input,
 43 |                 const at::Tensor &weight,
 44 |                 const at::Tensor &bias,
 45 |                 const at::Tensor &offset,
 46 |                 const at::Tensor &mask,
 47 |                 const at::Tensor &grad_output,
 48 |                 int kernel_h, int kernel_w,
 49 |                 int stride_h, int stride_w,
 50 |                 int pad_h, int pad_w,
 51 |                 int dilation_h, int dilation_w,
 52 |                 int deformable_group)
 53 | {
 54 |     if (input.type().is_cuda())
 55 |     {
 56 | #ifdef WITH_CUDA
 57 |         return dcn_v2_cuda_backward(input,
 58 |                                     weight,
 59 |                                     bias,
 60 |                                     offset,
 61 |                                     mask,
 62 |                                     grad_output,
 63 |                                     kernel_h, kernel_w,
 64 |                                     stride_h, stride_w,
 65 |                                     pad_h, pad_w,
 66 |                                     dilation_h, dilation_w,
 67 |                                     deformable_group);
 68 | #else
 69 |         AT_ERROR("Not compiled with GPU support");
 70 | #endif
 71 |     }
 72 |     AT_ERROR("Not implemented on the CPU");
 73 | }
 74 | 
 75 | std::tuple<at::Tensor, at::Tensor>
 76 | dcn_v2_psroi_pooling_forward(const at::Tensor &input,
 77 |                              const at::Tensor &bbox,
 78 |                              const at::Tensor &trans,
 79 |                              const int no_trans,
 80 |                              const float spatial_scale,
 81 |                              const int output_dim,
 82 |                              const int group_size,
 83 |                              const int pooled_size,
 84 |                              const int part_size,
 85 |                              const int sample_per_part,
 86 |                              const float trans_std)
 87 | {
 88 |     if (input.type().is_cuda())
 89 |     {
 90 | #ifdef WITH_CUDA
 91 |         return dcn_v2_psroi_pooling_cuda_forward(input,
 92 |                                                  bbox,
 93 |                                                  trans,
 94 |                                                  no_trans,
 95 |                                                  spatial_scale,
 96 |                                                  output_dim,
 97 |                                                  group_size,
 98 |                                                  pooled_size,
 99 |                                                  part_size,
100 |                                                  sample_per_part,
101 |                                                  trans_std);
102 | #else
103 |         AT_ERROR("Not compiled with GPU support");
104 | #endif
105 |     }
106 |     AT_ERROR("Not implemented on the CPU");
107 | }
108 | 
109 | std::tuple<at::Tensor, at::Tensor>
110 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad,
111 |                               const at::Tensor &input,
112 |                               const at::Tensor &bbox,
113 |                               const at::Tensor &trans,
114 |                               const at::Tensor &top_count,
115 |                               const int no_trans,
116 |                               const float spatial_scale,
117 |                               const int output_dim,
118 |                               const int group_size,
119 |                               const int pooled_size,
120 |                               const int part_size,
121 |                               const int sample_per_part,
122 |                               const float trans_std)
123 | {
124 |     if (input.type().is_cuda())
125 |     {
126 | #ifdef WITH_CUDA
127 |         return dcn_v2_psroi_pooling_cuda_backward(out_grad,
128 |                                                   input,
129 |                                                   bbox,
130 |                                                   trans,
131 |                                                   top_count,
132 |                                                   no_trans,
133 |                                                   spatial_scale,
134 |                                                   output_dim,
135 |                                                   group_size,
136 |                                                   pooled_size,
137 |                                                   part_size,
138 |                                                   sample_per_part,
139 |                                                   trans_std);
140 | #else
141 |         AT_ERROR("Not compiled with GPU support");
142 | #endif
143 |     }
144 |     AT_ERROR("Not implemented on the CPU");
145 | }


--------------------------------------------------------------------------------
/lib/models/backbones/DCNv2/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "dcn_v2.h"
 3 | 
 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 5 |   m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward");
 6 |   m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward");
 7 |   m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward");
 8 |   m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward");
 9 | }
10 | 


--------------------------------------------------------------------------------
/lib/models/backbones/Utitled Document:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/Utitled Document


--------------------------------------------------------------------------------
/lib/models/backbones/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/__init__.py


--------------------------------------------------------------------------------
/lib/models/backbones/darknet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from collections import OrderedDict
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 |        
  9 | class BasicBlock(nn.Module):
 10 |     def __init__(self, inplanes, planes):
 11 |         super(BasicBlock, self).__init__()
 12 |         self.conv1 = nn.Conv2d(inplanes, planes[0], kernel_size=1,
 13 |                                stride=1, padding=0, bias=False)
 14 |         self.bn1 = nn.BatchNorm2d(planes[0])
 15 |         self.relu1 = nn.LeakyReLU(0.1)
 16 |         self.conv2 = nn.Conv2d(planes[0], planes[1], kernel_size=3,
 17 |                                stride=1, padding=1, bias=False)
 18 |         self.bn2 = nn.BatchNorm2d(planes[1])
 19 |         self.relu2 = nn.LeakyReLU(0.1)
 20 | 
 21 |     def forward(self, x):
 22 |         residual = x
 23 | 
 24 |         out = self.conv1(x)
 25 |         out = self.bn1(out)
 26 |         out = self.relu1(out)
 27 | 
 28 |         out = self.conv2(out)
 29 |         out = self.bn2(out)
 30 |         out = self.relu2(out)
 31 | 
 32 |         out += residual
 33 |         return out
 34 | 
 35 | 
 36 | class DarkNet(nn.Module):
 37 |     def __init__(self, layers):
 38 |         super(DarkNet, self).__init__()
 39 |         self.inplanes = 32
 40 |         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
 41 |         self.bn1 = nn.BatchNorm2d(self.inplanes)
 42 |         self.relu1 = nn.LeakyReLU(0.1)
 43 | 
 44 |         self.layer1 = self._make_layer([32, 64], layers[0])
 45 |         self.layer2 = self._make_layer([64, 128], layers[1])
 46 |         self.layer3 = self._make_layer([128, 256], layers[2])
 47 |         #self.layer4 = self._make_layer([256, 512], layers[3])
 48 |         #self.layer5 = self._make_layer([512, 1024], layers[4])
 49 | 
 50 |         self.layers_out_filters = [64, 128, 256]
 51 | 
 52 |         for m in self.modules():
 53 |             if isinstance(m, nn.BatchNorm2d):
 54 |                 m.weight.data.fill_(1)
 55 |                 m.bias.data.zero_()
 56 |         
 57 |     def _make_layer(self, planes, blocks):
 58 |         layers = []
 59 |         #  downsample
 60 |         layers.append(("ds_conv", nn.Conv2d(self.inplanes, planes[1], kernel_size=3,
 61 |                                 stride=2, padding=1, bias=False)))
 62 |         layers.append(("ds_bn", nn.BatchNorm2d(planes[1])))
 63 |         layers.append(("ds_relu", nn.LeakyReLU(0.1)))
 64 |         #  blocks
 65 |         self.inplanes = planes[1]
 66 |         for i in range(0, blocks):
 67 |             layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes)))
 68 |         return nn.Sequential(OrderedDict(layers))
 69 | 
 70 |     def forward(self, x):
 71 |         x = self.conv1(x)
 72 |         x = self.bn1(x)
 73 |         x = self.relu1(x)
 74 | 
 75 |         x = self.layer1(x)
 76 |         x = self.layer2(x)
 77 |         x = self.layer3(x)
 78 |         x = F.interpolate(x, size=(128, 128), 
 79 |             mode="bilinear", align_corners=True)
 80 | 
 81 |         return x
 82 | 
 83 | 
 84 | def darknet21(cfg,is_train=True, **kwargs):
 85 |     model = DarkNet([1, 1, 2, 2, 1])
 86 |     if is_train and cfg.BACKBONE.INIT_WEIGHTS:
 87 |         if isinstance(cfg.BACKBONE.PRETRAINED, str):
 88 |             model.load_state_dict(torch.load(cfg.BACKBONE.PRETRAINED))
 89 |         else:
 90 |             raise Exception("darknet request a pretrained path. got [{}]".format(cfg.BACKBONE.PRETRAINED))
 91 |     return model
 92 | 
 93 | def darknet53(num_layers, cfg):
 94 |     model = DarkNet([1, 2, 8])
 95 |     #if is_train and cfg.BACKBONE.INIT_WEIGHTS:
 96 |     #    if isinstance(cfg.BACKBONE.PRETRAINED, str):
 97 |     #        model.load_state_dict(torch.load(cfg.BACKBONE.PRETRAINED))
 98 |     #    else:
 99 |     #        raise Exception("darknet request a pretrained path. got [{}]".format(cfg.BACKBONE.PRETRAINED))
100 |     return model
101 | 


--------------------------------------------------------------------------------
/lib/models/backbones/efficientdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .efficientdet import EfficientDet
2 | 
3 | 
4 | def get_efficientdet(num_layers, cfg):
5 |     model = EfficientDet(intermediate_channels=cfg.MODEL.INTERMEDIATE_CHANNEL)
6 |     return model
7 | 


--------------------------------------------------------------------------------
/lib/models/backbones/efficientdet/efficientdet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import math
 4 | from .efficientnet import EfficientNet
 5 | from .bifpn import BIFPN
 6 | from .retinahead import RetinaHead
 7 | from torchvision.ops import nms 
 8 | import torch.nn.functional as F
 9 | 
10 | MODEL_MAP = {
11 |     'efficientdet-d0': 'efficientnet-b0',
12 |     'efficientdet-d1': 'efficientnet-b1',
13 |     'efficientdet-d2': 'efficientnet-b2',
14 |     'efficientdet-d3': 'efficientnet-b3',
15 |     'efficientdet-d4': 'efficientnet-b4',
16 |     'efficientdet-d5': 'efficientnet-b5',
17 |     'efficientdet-d6': 'efficientnet-b6',
18 |     'efficientdet-d7': 'efficientnet-b6',
19 | }
20 | class EfficientDet(nn.Module):
21 |     def __init__(self,
22 |                  intermediate_channels,
23 |                  network = 'efficientdet-d0',
24 |                  D_bifpn=3,
25 |                  W_bifpn=32,
26 |                  D_class=3,
27 |                  scale_ratios = [0.5, 1, 2, 4, 8,16,32],
28 |                  ):
29 |         super(EfficientDet, self).__init__()
30 |         self.backbone = EfficientNet.from_pretrained(MODEL_MAP[network])
31 |         self.neck = BIFPN(in_channels=self.backbone.get_list_features(),
32 |                                 out_channels=W_bifpn,
33 |                                 stack=D_bifpn,
34 |                                 num_outs=7)
35 |         self.bbox_head = RetinaHead(num_classes = intermediate_channels,
36 |                                     in_channels = W_bifpn)
37 | 
38 |         self.scale_ratios = scale_ratios
39 |         for m in self.modules():
40 |             if isinstance(m, nn.Conv2d):
41 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
42 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
43 |             elif isinstance(m, nn.BatchNorm2d):
44 |                 m.weight.data.fill_(1)
45 |                 m.bias.data.zero_()
46 |         self.freeze_bn()
47 | 
48 |     def forward(self, inputs):
49 |         x = self.extract_feat(inputs)
50 |         outs = self.bbox_head(x)
51 | 
52 |         return outs[0][1]
53 |         
54 |     def freeze_bn(self):
55 |         '''Freeze BatchNorm layers.'''
56 |         for layer in self.modules():
57 |             if isinstance(layer, nn.BatchNorm2d):
58 |                 layer.eval()
59 |     def extract_feat(self, img):
60 |         """
61 |             Directly extract features from the backbone+neck
62 |         """
63 |         x = self.backbone(img)
64 |         x = self.neck(x)
65 |         return x
66 |     
67 | 


--------------------------------------------------------------------------------
/lib/models/backbones/efficientdet/retinahead.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch.nn as nn
  3 | 
  4 | from .conv_module import ConvModule, bias_init_with_prob, normal_init
  5 | from six.moves import map, zip
  6 | 
  7 | def multi_apply(func, *args, **kwargs):
  8 |     pfunc = partial(func, **kwargs) if kwargs else func
  9 |     map_results = map(pfunc, *args)
 10 |     return tuple(map(list, zip(*map_results)))
 11 | 
 12 | class RetinaHead(nn.Module):
 13 |     """
 14 |     An anchor-based head used in [1]_.
 15 |     The head contains two subnetworks. The first classifies anchor boxes and
 16 |     the second regresses deltas for the anchors.
 17 |     References:
 18 |         .. [1]  https://arxiv.org/pdf/1708.02002.pdf
 19 |     Example:
 20 |         >>> import torch
 21 |         >>> self = RetinaHead(11, 7)
 22 |         >>> x = torch.rand(1, 7, 32, 32)
 23 |         >>> cls_score, bbox_pred = self.forward_single(x)
 24 |         >>> # Each anchor predicts a score for each class except background
 25 |         >>> cls_per_anchor = cls_score.shape[1] / self.num_anchors
 26 |         >>> box_per_anchor = bbox_pred.shape[1] / self.num_anchors
 27 |         >>> assert cls_per_anchor == (self.num_classes - 1)
 28 |         >>> assert box_per_anchor == 4
 29 |     """
 30 | 
 31 |     def __init__(self,
 32 |                 num_classes,
 33 |                  in_channels,
 34 |                  feat_channels=64,
 35 |                  stacked_convs=4,
 36 |                  octave_base_scale=4,
 37 |                  scales_per_octave=3,
 38 |                  conv_cfg=None,
 39 |                  norm_cfg=None,
 40 |                  **kwargs):
 41 |         super(RetinaHead, self).__init__()
 42 |         self.in_channels = in_channels
 43 |         self.num_classes = num_classes
 44 |         self.feat_channels = feat_channels
 45 |         self.stacked_convs = stacked_convs
 46 |         self.octave_base_scale = octave_base_scale
 47 |         self.scales_per_octave = scales_per_octave
 48 |         self.conv_cfg = conv_cfg
 49 |         self.norm_cfg = norm_cfg
 50 |         octave_scales = np.array(
 51 |             [2**(i / scales_per_octave) for i in range(scales_per_octave)])
 52 |         self.cls_out_channels = num_classes
 53 |         self._init_layers()
 54 |     def _init_layers(self):
 55 |         self.relu = nn.ReLU(inplace=True)
 56 |         self.cls_convs = nn.ModuleList()
 57 |         #self.reg_convs = nn.ModuleList()
 58 |         for i in range(self.stacked_convs):
 59 |             chn = self.in_channels if i == 0 else self.feat_channels
 60 |             self.cls_convs.append(
 61 |                 ConvModule(
 62 |                     chn,
 63 |                     self.feat_channels,
 64 |                     3,
 65 |                     stride=1,
 66 |                     padding=1,
 67 |                     conv_cfg=self.conv_cfg,
 68 |                     norm_cfg=self.norm_cfg))
 69 |         self.retina_cls = nn.Conv2d(
 70 |             self.feat_channels,
 71 |             self.cls_out_channels,
 72 |             3,
 73 |             padding=1)
 74 |         #self.output_act = nn.Sigmoid()
 75 | 
 76 |     def init_weights(self):
 77 |         for m in self.cls_convs:
 78 |             normal_init(m.conv, std=0.01)
 79 |         for m in self.reg_convs:
 80 |             normal_init(m.conv, std=0.01)
 81 |         bias_cls = bias_init_with_prob(0.01)
 82 |         normal_init(self.retina_cls, std=0.01, bias=bias_cls)
 83 |         #normal_init(self.retina_reg, std=0.01)
 84 | 
 85 |     def forward_single(self, x):
 86 |         cls_feat = x
 87 |         #reg_feat = x
 88 |         for cls_conv in self.cls_convs:
 89 |             cls_feat = cls_conv(cls_feat)
 90 |         #for reg_conv in self.reg_convs:
 91 |         #    reg_feat = reg_conv(reg_feat)
 92 |         
 93 |         cls_score = self.retina_cls(cls_feat)
 94 |         # out is B x C x W x H, with C = n_classes + n_anchors
 95 |         #cls_score = cls_score.permute(0, 2, 3, 1)
 96 |         #batch_size, width, height, channels = cls_score.shape
 97 |         #cls_score = cls_score.view(batch_size, width, height, self.num_anchors, self.num_classes)
 98 |         #cls_score = cls_score.contiguous().view(x.size(0), -1, self.num_classes)
 99 | 
100 | 
101 |         #bbox_pred = self.retina_reg(reg_feat)
102 |         #bbox_pred = bbox_pred.permute(0, 2, 3, 1)
103 |         #bbox_pred = bbox_pred.contiguous().view(bbox_pred.size(0), -1, 4)
104 |         return [cls_score]
105 |     def forward(self, feats):
106 |         return multi_apply(self.forward_single, feats)
107 | 


--------------------------------------------------------------------------------
/lib/models/backbones/mobilenet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/backbones/mobilenet/__init__.py


--------------------------------------------------------------------------------
/lib/models/backbones/test_mode.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from darknet import darknet53
 4 | from hardnet import hardnet
 5 | 
 6 | model = hardnet(19).cuda()
 7 | inputs = torch.randn((1,3,512,512)).cuda()
 8 | 
 9 | outs = model(inputs)
10 | 
11 | print(outs.shape)
12 | 
13 | 
14 | 
15 | model = darknet53(0,1,2).cuda()
16 | 
17 | inputs = torch.randn((1,3,512,512)).cuda()
18 | 
19 | outs = model(inputs)
20 | 
21 | print(outs.shape)
22 | 


--------------------------------------------------------------------------------
/lib/models/heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/heads/__init__.py


--------------------------------------------------------------------------------
/lib/models/heads/keypoint.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import os
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 |         
 9 | class KeypointHead(nn.Module):
10 | 
11 |     def __init__(self, intermediate_channel, head_conv):
12 |         super(KeypointHead, self).__init__()    
13 |         
14 |         self.hm = nn.Sequential(
15 |                     nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
16 |                     nn.ReLU(inplace=True),
17 |                     nn.Conv2d(head_conv, 1, kernel_size=1, stride=1, padding=0))
18 |         self.wh = nn.Sequential(
19 |                     nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
20 |                     nn.ReLU(inplace=True),
21 |                     nn.Conv2d(head_conv, 2, kernel_size=1, stride=1, padding=0))
22 |         self.hps = nn.Sequential(
23 |                     nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
24 |                     nn.ReLU(inplace=True),
25 |                     nn.Conv2d(head_conv, 34, kernel_size=1, stride=1, padding=0))                  
26 |         self.reg = nn.Sequential(
27 |                     nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
28 |                     nn.ReLU(inplace=True),
29 |                     nn.Conv2d(head_conv, 2, kernel_size=1, stride=1, padding=0))        
30 |         self.hm_hp = nn.Sequential(
31 |                     nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
32 |                     nn.ReLU(inplace=True),
33 |                     nn.Conv2d(head_conv, 17, kernel_size=1, stride=1, padding=0))                                           
34 |         self.hp_offset = nn.Sequential(
35 |                         nn.Conv2d(intermediate_channel, head_conv, kernel_size=3, padding=1, bias=True),
36 |                         nn.ReLU(inplace=True),
37 |                         nn.Conv2d(head_conv, 2, kernel_size=1, stride=1, padding=0))                      
38 |         self.init_weights()
39 |                                            
40 |     def forward(self, x):
41 |         
42 |         return [self.hm(x), self.wh(x), self.hps(x), self.reg(x), self.hm_hp(x), self.hp_offset(x)]
43 |                         
44 |     def init_weights(self):
45 |         self.hm[-1].bias.data.fill_(-2.19)     
46 |         self.hm_hp[-1].bias.data.fill_(-2.19)                                    
47 |         self.fill_fc_weights(self.wh)
48 |         self.fill_fc_weights(self.hps)
49 |         self.fill_fc_weights(self.reg)
50 |         self.fill_fc_weights(self.hp_offset)        
51 |         
52 |     def fill_fc_weights(self, layers):
53 |       for m in layers.modules():
54 |         if isinstance(m, nn.Conv2d):
55 |           nn.init.normal_(m.weight, std=0.001)
56 |           if m.bias is not None:
57 |             nn.init.constant_(m.bias, 0)                              
58 | 


--------------------------------------------------------------------------------
/lib/models/heads/mask.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/models/heads/mask.py


--------------------------------------------------------------------------------
/lib/models/model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torchvision.models as models
  8 | 
  9 | from .backbones.darknet import darknet53
 10 | from .backbones.dlav0 import get_pose_net as get_dlav0
 11 | from .backbones.hardnet import get_hard_net
 12 | from .backbones.large_hourglass import get_large_hourglass_net
 13 | from .backbones.mobilenet.mobilenetv3 import get_mobile_pose_netv3
 14 | from .backbones.mobilenet.mobilenetv2 import get_mobile_pose_netv2
 15 | from .backbones.msra_resnet import get_resnet
 16 | from .backbones.pose_dla_dcn import get_pose_net as get_dla_dcn
 17 | from .backbones.pose_higher_hrnet import get_hrpose_net
 18 | from .backbones.resnet_dcn import get_pose_net as get_pose_net_dcn
 19 | from .backbones.shufflenetv2_dcn import get_shufflev2_net
 20 | from .backbones.ghost_net import get_ghost_net
 21 | from .backbones.efficientdet import get_efficientdet
 22 | from .heads.keypoint import KeypointHead
 23 | 
 24 | _backbone_factory = {
 25 |   'res': get_resnet, # default Resnet with deconv
 26 |   'dlav0': get_dlav0, # default DLAup
 27 |   'dla': get_dla_dcn,
 28 |   'resdcn': get_pose_net_dcn,
 29 |   'hourglass': get_large_hourglass_net,
 30 |   'mobilenetv3': get_mobile_pose_netv3,
 31 |   'mobilenetv2': get_mobile_pose_netv2,  
 32 |   'shufflenetV2': get_shufflev2_net,
 33 |   'hrnet': get_hrpose_net,
 34 |   'hardnet': get_hard_net,
 35 |   'darknet': darknet53,
 36 |   'ghostnet': get_ghost_net,
 37 |   'efficientdet':get_efficientdet,
 38 | }
 39 | 
 40 | _head_factory = {
 41 |   'keypoint': KeypointHead
 42 | }
 43 | 
 44 | class BackBoneWithHead(nn.Module):
 45 | 
 46 |     def __init__(self, arch, head_conv, cfg):
 47 |         super(BackBoneWithHead, self).__init__()    
 48 |         
 49 |         num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
 50 |         arch = arch[:arch.find('_')] if '_' in arch else arch
 51 |         backbone = _backbone_factory[arch]
 52 |         self.backbone_model = backbone(num_layers=num_layers, cfg = cfg)
 53 |         
 54 |         head = _head_factory[cfg.MODEL.HEADS_NAME]
 55 |         self.head_model = head(cfg.MODEL.INTERMEDIATE_CHANNEL, cfg.MODEL.HEAD_CONV)
 56 | 
 57 |     def forward(self, x):
 58 |         x = self.backbone_model(x)
 59 |         return self.head_model(x)
 60 | 
 61 | 
 62 | 
 63 | def create_model(arch, head_conv, cfg):
 64 |    
 65 |     return BackBoneWithHead(arch, head_conv, cfg)
 66 | 
 67 | def load_model(model, model_path, optimizer=None, resume=False, 
 68 |                lr=None, lr_step=None):
 69 |     start_epoch = 0
 70 |     checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
 71 |     print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
 72 |     state_dict_ = checkpoint['state_dict']
 73 |     state_dict = {}
 74 |   
 75 |     # convert data_parallal to model
 76 |     for k in state_dict_:
 77 |         if k.startswith('module') and not k.startswith('module_list'):
 78 |             state_dict[k[7:]] = state_dict_[k]
 79 |         else:
 80 |             state_dict[k] = state_dict_[k]
 81 |     model_state_dict = model.state_dict()
 82 | 
 83 |     # check loaded parameters and created model parameters
 84 |     msg = 'If you see this, your model does not fully load the ' + \
 85 |         'pre-trained weight. Please make sure ' + \
 86 |         'you have correctly specified --arch xxx ' + \
 87 |         'or set the correct --num_classes for your own dataset.'
 88 |     for k in state_dict:
 89 |         if k in model_state_dict:
 90 |             if state_dict[k].shape != model_state_dict[k].shape:
 91 |                 print('Skip loading parameter {}, required shape{}, '\
 92 |                       'loaded shape{}. {}'.format(
 93 |                   k, model_state_dict[k].shape, state_dict[k].shape, msg))
 94 |                 state_dict[k] = model_state_dict[k]
 95 |         else:
 96 |             print('Drop parameter {}.'.format(k) + msg)
 97 |     for k in model_state_dict:
 98 |         if not (k in state_dict):
 99 |             print('No param {}.'.format(k) + msg)
100 |             state_dict[k] = model_state_dict[k]
101 |     model.load_state_dict(state_dict, strict=False)
102 | 
103 |     # resume optimizer parameters
104 |     if optimizer is not None and resume:
105 |         if 'optimizer' in checkpoint:
106 |             optimizer.load_state_dict(checkpoint['optimizer'])
107 |             start_epoch = checkpoint['epoch']
108 |             start_lr = lr
109 |             for step in lr_step:
110 |                 if start_epoch >= step:
111 |                     start_lr *= 0.1
112 |             for param_group in optimizer.param_groups:
113 |                 param_group['lr'] = start_lr
114 |             print('Resumed optimizer with start lr', start_lr)
115 |         else:
116 |             print('No optimizer parameters in checkpoint.')
117 |     if optimizer is not None:
118 |         return model, optimizer, start_epoch
119 |     else:
120 |         return model
121 | 
122 | def save_model(path, epoch, model, optimizer=None):
123 |     if isinstance(model, torch.nn.DataParallel):
124 |         state_dict = model.module.state_dict()
125 |     else:
126 |         state_dict = model.state_dict()
127 |     data = {'epoch': epoch,
128 |           'state_dict': state_dict}
129 |     if not (optimizer is None):
130 |         data['optimizer'] = optimizer.state_dict()
131 |     torch.save(data, path)
132 | 


--------------------------------------------------------------------------------
/lib/models/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | def _sigmoid(x):
 8 |     y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
 9 |     return y
10 | 
11 | def _gather_feat(feat, ind, mask=None):
12 |     dim  = feat.size(2)
13 |     ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
14 |     feat = feat.gather(1, ind)
15 |     if mask is not None:
16 |         mask = mask.unsqueeze(2).expand_as(feat)
17 |         feat = feat[mask]
18 |         feat = feat.view(-1, dim)
19 |     return feat
20 | 
21 | def _transpose_and_gather_feat(feat, ind):
22 |     feat = feat.permute(0, 2, 3, 1).contiguous()
23 |     feat = feat.view(feat.size(0), -1, feat.size(3))
24 |     feat = _gather_feat(feat, ind)
25 |     return feat
26 | 
27 | def flip_tensor(x):
28 |     return torch.flip(x, [3])
29 | 
30 | def flip_lr(x, flip_idx):
31 |     tmp = x.detach().cpu().numpy()[..., ::-1].copy()
32 |     shape = tmp.shape
33 |     for e in flip_idx:
34 |         tmp[:, e[0], ...], tmp[:, e[1], ...] = \
35 |             tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
36 |     return torch.from_numpy(tmp.reshape(shape)).to(x.device)
37 | 
38 | def flip_lr_off(x, flip_idx):
39 |     tmp = x.detach().cpu().numpy()[..., ::-1].copy()
40 |     shape = tmp.shape
41 |     tmp = tmp.reshape(tmp.shape[0], 17, 2, 
42 |                     tmp.shape[2], tmp.shape[3])
43 |     tmp[:, :, 0, :, :] *= -1
44 |     for e in flip_idx:
45 |         tmp[:, e[0], ...], tmp[:, e[1], ...] = \
46 |             tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
47 |     return torch.from_numpy(tmp.reshape(shape)).to(x.device)
48 | 


--------------------------------------------------------------------------------
/lib/trains/base_trainer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import time
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from progress.bar import Bar
  8 | 
  9 | from utils.utils import AverageMeter
 10 | 
 11 | 
 12 | class BaseTrainer(object):
 13 |     def __init__(
 14 |         self, cfg, local_rank, model, optimizer=None):
 15 |         self.cfg = cfg
 16 |         self.optimizer = optimizer
 17 |         self.loss_stats, self.loss = self._get_losses(cfg, local_rank)
 18 |         self.model = model
 19 |         self.local_rank = local_rank
 20 | 
 21 |     def set_device(self, gpus, chunk_sizes, device):
 22 |     
 23 |         if  self.cfg.TRAIN.DISTRIBUTE:
 24 |             self.model = self.model.to(device)
 25 |             self.model = nn.parallel.DistributedDataParallel(self.model, find_unused_parameters=True,
 26 |                                                         device_ids=[self.local_rank, ],
 27 |                                                         output_device=self.local_rank)
 28 |         else:
 29 |             self.model = nn.DataParallel(self.model).to(device)
 30 |         self.loss.to(device)
 31 |         for state in self.optimizer.state.values():
 32 |             for k, v in state.items():
 33 |                 if isinstance(v, torch.Tensor):
 34 |                     state[k] = v.to(device=device, non_blocking=True)
 35 | 
 36 |     def run_epoch(self, phase, epoch, data_loader):
 37 |     
 38 |         model = self.model    
 39 |         if phase == 'train':
 40 |             self.model.train()
 41 |         else:
 42 |             if len(self.cfg.GPUS) > 1:
 43 |                 model = model.module        
 44 |             model.eval()
 45 |             torch.cuda.empty_cache()
 46 | 
 47 |         cfg = self.cfg
 48 |         results = {}
 49 |         data_time, batch_time = AverageMeter(), AverageMeter()
 50 |         avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
 51 |         num_iters = len(data_loader)
 52 |         bar = Bar('{}/{}'.format(cfg.TASK, cfg.EXP_ID), max=num_iters)
 53 |         end = time.time()
 54 |         for iter_id, batch in enumerate(data_loader):
 55 |             if iter_id >= num_iters:
 56 |                 break
 57 |             data_time.update(time.time() - end)
 58 | 
 59 |             for k in batch:
 60 |                 if k != 'meta':
 61 |                     batch[k] = batch[k].to(device=torch.device('cuda:%d'%self.local_rank), non_blocking=True)    
 62 |             
 63 |             outputs = model(batch['input'])
 64 |             loss, loss_stats = self.loss(outputs, batch)
 65 |             
 66 |             loss = loss.mean()
 67 |             if phase == 'train':
 68 |                 self.optimizer.zero_grad()
 69 |                 loss.backward()
 70 |                 self.optimizer.step()
 71 |             batch_time.update(time.time() - end)
 72 |             end = time.time()
 73 | 
 74 |             Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
 75 |                 epoch, iter_id, num_iters, phase=phase,
 76 |                 total=bar.elapsed_td, eta=bar.eta_td)
 77 |             for l in avg_loss_stats:
 78 |                 avg_loss_stats[l].update(
 79 |                   loss_stats[l].mean().item(), batch['input'].size(0))
 80 |                 Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg)
 81 |             if not cfg.TRAIN.HIDE_DATA_TIME:
 82 |                 Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
 83 |                     '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
 84 |             if cfg.PRINT_FREQ > 0:
 85 |                 if iter_id % cfg.PRINT_FREQ == 0:
 86 |                     print('{}/{}| {}'.format(cfg.TASK, cfg.EXP_ID, Bar.suffix)) 
 87 |             else:
 88 |                 bar.next()
 89 |       
 90 |             if cfg.DEBUG > 0:
 91 |                 self.debug(batch, outputs, iter_id)
 92 |       
 93 |             if phase == 'val':
 94 |                 self.save_result(outputs, batch, results)
 95 |             del outputs, loss, loss_stats
 96 |     
 97 |         bar.finish()
 98 |         ret = {k: v.avg for k, v in avg_loss_stats.items()}
 99 |         ret['time'] = bar.elapsed_td.total_seconds() / 60.
100 |         
101 |         return ret, results
102 | 
103 |     def debug(self, batch, output, iter_id):
104 |         raise NotImplementedError
105 | 
106 |     def save_result(self, output, batch, results):
107 |         raise NotImplementedError
108 | 
109 |     def _get_losses(self, cfg):
110 |         raise NotImplementedError
111 | 
112 |     def val(self, epoch, data_loader):
113 |         return self.run_epoch('val', epoch, data_loader)
114 | 
115 |     def train(self, epoch, data_loader):
116 |         return self.run_epoch('train', epoch, data_loader)
117 | 


--------------------------------------------------------------------------------
/lib/trains/train_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function
2 | 
3 | from .multi_pose import MultiPoseTrainer
4 | 
5 | 
6 | train_factory = {
7 |   'multi_pose': MultiPoseTrainer, 
8 | }
9 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/oracle_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import numba
 4 | import numpy as np
 5 | 
 6 | 
 7 | @numba.jit(nopython=True, nogil=True)
 8 | def gen_oracle_map(feat, ind, w, h):
 9 |   # feat: B x maxN x featDim
10 |   # ind: B x maxN
11 |   batch_size = feat.shape[0]
12 |   max_objs = feat.shape[1]
13 |   feat_dim = feat.shape[2]
14 |   out = np.zeros((batch_size, feat_dim, h, w), dtype=np.float32)
15 |   vis = np.zeros((batch_size, h, w), dtype=np.uint8)
16 |   ds = [(0, 1), (0, -1), (1, 0), (-1, 0)]
17 |   for i in range(batch_size):
18 |     queue_ind = np.zeros((h*w*2, 2), dtype=np.int32)
19 |     queue_feat = np.zeros((h*w*2, feat_dim), dtype=np.float32)
20 |     head, tail = 0, 0
21 |     for j in range(max_objs):
22 |       if ind[i][j] > 0:
23 |         x, y = ind[i][j] % w, ind[i][j] // w
24 |         out[i, :, y, x] = feat[i][j]
25 |         vis[i, y, x] = 1
26 |         queue_ind[tail] = x, y
27 |         queue_feat[tail] = feat[i][j]
28 |         tail += 1
29 |     while tail - head > 0:
30 |       x, y = queue_ind[head]
31 |       f = queue_feat[head]
32 |       head += 1
33 |       for (dx, dy) in ds:
34 |         xx, yy = x + dx, y + dy
35 |         if xx >= 0 and yy >= 0 and xx < w and yy < h and vis[i, yy, xx] < 1:
36 |           out[i, :, yy, xx] = f
37 |           vis[i, yy, xx] = 1
38 |           queue_ind[tail] = xx, yy
39 |           queue_feat[tail] = f
40 |           tail += 1
41 |   return out
42 | 


--------------------------------------------------------------------------------
/lib/utils/post_process.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import numpy as np
 4 | 
 5 | from .image import transform_preds
 6 | 
 7 | 
 8 | def multi_pose_post_process(dets, c, s, h, w):
 9 |     # dets: batch x max_dets x 40
10 |     # return list of 39 in image coord
11 |     ret = []
12 |     for i in range(dets.shape[0]):
13 |         bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h))
14 |         pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h))
15 |         top_preds = np.concatenate(
16 |             [bbox.reshape(-1, 4), dets[i, :, 4:5], 
17 |             pts.reshape(-1, 34), dets[i, :, 39:56]], axis=1).astype(np.float32).tolist()
18 |         ret.append({np.ones(1, dtype=np.int32)[0]: top_preds})
19 |     return ret
20 |    
21 | 


--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | class AverageMeter(object):
 7 |     """Computes and stores the average and current value"""
 8 |     def __init__(self):
 9 |         self.reset()
10 | 
11 |     def reset(self):
12 |         self.val = 0
13 |         self.avg = 0
14 |         self.sum = 0
15 |         self.count = 0
16 | 
17 |     def update(self, val, n=1):
18 |         self.val = val
19 |         self.sum += val * n
20 |         self.count += n
21 |         if self.count > 0:
22 |           self.avg = self.sum / self.count
23 | 


--------------------------------------------------------------------------------
/readme/DATA.md:
--------------------------------------------------------------------------------
 1 | # Dataset preparation
 2 | 
 3 | If you want to reproduce the results in the paper for benchmark evaluation and training, you will need to setup dataset.
 4 | 
 5 | 
 6 | ### COCO
 7 | - Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download).
 8 | - Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download). 
 9 | - Place the data (or create symlinks) to make the data folder like:
10 | 
11 |   ~~~
12 |   ${CenterNet_ROOT}
13 |   |-- data
14 |   `-- |-- coco
15 |       `-- |-- annotations
16 |           |   |-- instances_train2017.json
17 |           |   |-- instances_val2017.json
18 |           |   |-- person_keypoints_train2017.json
19 |           |   |-- person_keypoints_val2017.json
20 |           |   |-- image_info_test-dev2017.json
21 |           |---|-- train2017
22 |           |---|-- val2017
23 |           `---|-- test2017
24 |   ~~~
25 | 


--------------------------------------------------------------------------------
/readme/DEVELOP.md:
--------------------------------------------------------------------------------
 1 | # Develop
 2 | 
 3 | This document provides tutorials to develop CenterNet. `lib/src/opts` lists a few more options that the current version supports.
 4 | 
 5 | ## New dataset
 6 | Basically there are three steps:
 7 | 
 8 | - Convert the dataset annotation to [COCO format](http://cocodataset.org/#format-data). Please refer to [src/tools/convert_kitti_to_coco.py](../src/tools/convert_kitti_to_coco.py) for an example to convert kitti format to coco format.
 9 | - Create a dataset intilization file in `src/lib/datasets/dataset`. In most cases you can just copy `src/lib/datasets/dataset/coco.py` to your dataset name and change the category information, and annotation path.
10 | - Import your dataset at `src/lib/datasets/dataset_factory`.
11 | 
12 | ## New task
13 | 
14 | You will need to add files to `src/lib/datasets/sample/`, `src/lib/datasets/trains/`, and `src/lib/datasets/detectors/`, which specify the data generation during training, the training targets, and the testing, respectively.
15 | 
16 | ## New architecture
17 | 
18 | - Add your model file to `src/lib/models/networks/`. The model should accept a dict `heads` of `{name: channels}`, which specify the name of each network output and its number of channals. Make sure your model returns a list (for multiple stages. Single stage model should return a list containing a single element.). The element of the list is a dict contraining the same keys with `heads`.
19 | - Add your model in `model_factory` of `src/lib/models/model.py`.


--------------------------------------------------------------------------------
/readme/GETTING_STARTED.md:
--------------------------------------------------------------------------------
 1 | # Getting Started
 2 | 
 3 | This document provides tutorials to train and evaluate CenterNet. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md).
 4 | 
 5 | ## Benchmark evaluation
 6 | 
 7 | First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`. 
 8 | 
 9 | ### COCO
10 | 
11 | To evaluate COCO object detection with DLA
12 | run
13 | 
14 | ~~~
15 | python test.py ctdet --exp_id coco_dla --keep_res --load_model ../models/ctdet_coco_dla_2x.pth
16 | ~~~
17 | 
18 | This will give an AP of `37.4` if setup correctly. `--keep_res` is for keep the original image resolution. Without `--keep_res` it will resize the images to `512 x 512`. You can add `--flip_test` and `--flip_test --test_scales 0.5,0.75,1,1.25,1.5` to the above commend, for flip test and multi_scale test, respectively. The expected APs are `39.2` and `41.7`, respectively.
19 | 
20 | To test with hourglass net, run
21 | 
22 | ~~~
23 | python test.py ctdet --exp_id coco_hg --arch hourglass --fix_res --load_model ../models/ctdet_coco_hg.pth
24 | ~~~
25 | 
26 | Similarly, to evaluate human pose estimation, run the following command for dla
27 | 
28 | ~~~
29 | python test.py multi_pose --exp_id dla --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
30 | ~~~
31 | 
32 | and the following for hourglass
33 | 
34 | ~~~
35 | python test.py multi_pose --exp_id hg --arch hourglass --keep_res --load_model ../models/multi_pose_dla_3x.pth --flip_test
36 | ~~~
37 | 
38 | The expected results can be found in the model zoo.
39 | 
40 | ### Pascal
41 | 
42 | To evaluate object detection on Pascal VOC (test2007), run
43 | 
44 | ~~~
45 | python test.py ctdet --exp_id dla --dataset pascal --load_model ../models/ctdet_pascal_dla.pth --flip_test
46 | ~~~
47 | 
48 | Note that we fix the resolution during testing.
49 | And you can change to other network architectures and resolutions by specifying `--arch` and `--input_res 512`.
50 | 
51 | ### KITTI
52 | 
53 | To evaluate the kitti dataset, first compile the evaluation tool (from [here](https://github.com/prclibo/kitti_eval)):
54 | 
55 | ~~~
56 | cd CenterNet_ROOT/src/tools/kitti_eval
57 | g++ -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp -O3
58 | ~~~
59 | 
60 | Then run the evaluation with pretrained model:
61 | 
62 | ~~~
63 | python test.py ddd --exp_id 3dop --dataset kitti --kitti_split 3dop --load_model ../models/ddd_3dop.pth
64 | ~~~
65 | 
66 | to evaluate the 3DOP split. For the subcnn split, change `--kitti_split` to `subcnn` and load the corresponding models.
67 | Note that test time augmentation is not trivially applicable for 3D orientation.
68 | 
69 | ## Training
70 | 
71 | We have packed all the training scripts in the [experiments](../experiments) folder.
72 | The experiment names are correspond to the model name in the [model zoo](MODEL_ZOO.md).
73 | The number of GPUs for each experiments can be found in the scripts and the model zoo.
74 | In the case that you don't have 8 GPUs, you can follow the [linear learning rate rule](https://arxiv.org/abs/1706.02677) to scale the learning rate as batch size.
75 | For example, to train COCO object detection with dla on 2 GPUs, run
76 | 
77 | ~~~
78 | python main.py ctdet --exp_id coco_dla --batch_size 32 --master_batch 15 --lr 1.25e-4  --gpus 0,1
79 | ~~~
80 | 
81 | The default learning rate is `1.25e-4` for batch size `32` (on 2 GPUs).
82 | By default, pytorch evenly splits the total batch size to each GPUs.
83 | `--master_batch` allows using different batchsize for the master GPU, which usually costs more memory than other GPUs.
84 | If it encounters GPU memory out, using slightly less batch size (e.g., `112` of `128`) with the same learning is fine.
85 | 
86 | If the training is terminated before finishing, you can use the same commond with `--resume` to resume training. It will found the lastest model with the same `exp_id`.
87 | 
88 | Our HourglassNet model is finetuned from the pretrained [ExtremeNet model](https://drive.google.com/file/d/1omiOUjWCrFbTJREypuZaODu0bOlF_7Fg/view?usp=sharing) (from the [ExtremeNet repo](https://github.com/xingyizhou/ExtremeNet)).
89 | You will need to download the model, run `python convert_hourglass_weight.py` to convert the model format, and load the model for training (see the [script](../experiments/ctdet_coco_hg.sh)).
90 | 


--------------------------------------------------------------------------------
/readme/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | 
 4 | The code was tested on Ubuntu 18.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v1.1. NVIDIA GPUs are needed for both training and testing.
 5 | After install Anaconda:
 6 | 
 7 | 0. [Optional but recommended] create a new conda environment. 
 8 | 
 9 |     ~~~
10 |     conda create --name CenterNet python=3.6
11 |     ~~~
12 |     And activate the environment.
13 |     
14 |     ~~~
15 |     conda activate CenterNet
16 |     ~~~
17 | 
18 | 1. Install pytorch1.1:
19 | 
20 |     ~~~
21 |     pip install torch==1.1
22 |     ~~~
23 |      
24 | 2. Install [COCOAPI](https://github.com/cocodataset/cocoapi):
25 | 
26 |     ~~~
27 |     # COCOAPI=/path/to/clone/cocoapi
28 |     git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
29 |     cd $COCOAPI/PythonAPI
30 |     make
31 |     python setup.py install --user
32 |     ~~~
33 | 
34 | 3. Clone this repo:
35 | 
36 |     ~~~
37 |     CenterNet_ROOT=/path/to/clone/CenterNet
38 |     git clone https://github.com/tensorboy/centerpose $CenterNet_ROOT
39 |     ~~~
40 | 
41 | 
42 | 4. Install the requirements
43 | 
44 |     ~~~
45 |     pip install -r requirements.txt
46 |     ~~~
47 |     
48 |     
49 | 5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_0.4)).
50 | 
51 |     ~~~
52 |     cd $CenterNet_ROOT/src/lib/models/networks/DCNv2
53 |     ./make.sh
54 |     ~~~
55 | 6. [Optional, only required if you are using extremenet or multi-scale testing] Compile NMS if your want to use multi-scale testing or test ExtremeNet.
56 | 
57 |     ~~~
58 |     cd $CenterNet_ROOT/src/lib/external
59 |     make
60 |     ~~~
61 | 


--------------------------------------------------------------------------------
/readme/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/demo.gif


--------------------------------------------------------------------------------
/readme/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/fig2.png


--------------------------------------------------------------------------------
/readme/multi_pose_screenshot_27.11.2019.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/multi_pose_screenshot_27.11.2019.png


--------------------------------------------------------------------------------
/readme/performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tensorboy/centerpose/555d753cd82693476f91f78c53aa4147f5a83015/readme/performance.png


--------------------------------------------------------------------------------
/readme/plot_speed_accuracy.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import pandas as pd
 4 | import plotly.express as px
 5 | import plotly.graph_objects as go
 6 | 
 7 | sizeref = 2000
 8 | 
 9 | # Dictionary with dataframes for each continent
10 | continent_names = ['DLA', 'Resnet', 'MobileNet', 'ShuffleNet', 'HigherResolution', 'HardNet']
11 | continent_data = {}
12 | 
13 | continent_data['DLA-34'] = {'map':[62.3], 'speed':[23], 'size':82.7/1.5}
14 | continent_data['Resnet50'] = {'map':[54.5], 'speed':[28], 'size':139.8/1.5}
15 | continent_data['MobileNetV3'] = {'map':[46.0], 'speed':[30], 'size':9.7/1.5}
16 | continent_data['ShuffleNetV2'] = {'map':[43.9], 'speed':[25], 'size':40./1.5}
17 | continent_data['HigherResolution'] = {'map':[63.8], 'speed':[16], 'size':115.2/1.5}
18 | continent_data['HardNet'] = {'map':[46.0], 'speed':[30], 'size':19.3/1.5}
19 | continent_data['Darknet53'] = {'map':[38.2], 'speed':[30], 'size':27.1/1.5}
20 | 
21 | # Create figure
22 | fig = go.Figure()
23 | 
24 | for continent_name, continent in continent_data.items():
25 |     fig.add_trace(go.Scatter(
26 |         x=continent['speed'], y=continent['map'],
27 |         name=continent_name, text='model performance',
28 |         marker_size=continent['size'],
29 |         ))
30 | # Tune marker appearance and layout
31 | fig.update_traces(mode='markers', marker=dict(sizemode='area',
32 |                                               sizeref=sizeref, line_width=2))
33 | 
34 | fig.update_layout(
35 |     title='mAP v.s. FPS',
36 |     xaxis=dict(
37 |         title='FPS (frames per second)',
38 |         gridcolor='white',
39 |         type='log',
40 |         gridwidth=2,
41 |     ),
42 |     yaxis=dict(
43 |         title='Mean Average Precision (mAP)',
44 |         gridcolor='white',
45 |         gridwidth=2,
46 |     ),
47 |     paper_bgcolor='rgb(243, 243, 243)',
48 |     plot_bgcolor='rgb(243, 243, 243)',
49 | )
50 | fig.show()
51 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python
 2 | Cython
 3 | numba
 4 | progress
 5 | matplotlib
 6 | easydict
 7 | scipy
 8 | pycocotools
 9 | yacs
10 | pthflops
11 | 


--------------------------------------------------------------------------------
/samples/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | 
 5 | def add_path(path):
 6 |     if path not in sys.path:
 7 |         sys.path.insert(0, path)
 8 | 
 9 | this_dir = osp.dirname(__file__)
10 | 
11 | # Add lib to PYTHONPATH
12 | lib_path = osp.join(this_dir, '..', 'lib')
13 | add_path(lib_path)
14 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | 
 5 | def add_path(path):
 6 |     if path not in sys.path:
 7 |         sys.path.insert(0, path)
 8 | 
 9 | this_dir = osp.dirname(__file__)
10 | 
11 | # Add lib to PYTHONPATH
12 | lib_path = osp.join(this_dir, '..', 'lib')
13 | add_path(lib_path)
14 | 


--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import argparse
 4 | import os
 5 | 
 6 | import cv2
 7 | 
 8 | import _init_paths
 9 | from config import cfg, update_config
10 | from detectors.detector_factory import detector_factory
11 | 
12 | 
13 | def parse_args():
14 |     parser = argparse.ArgumentParser(description='Train keypoints network')
15 |     # general
16 |     parser.add_argument('--cfg',
17 |                         help='experiment configure file name',
18 |                         required=True,
19 |                         type=str)                        
20 |     parser.add_argument('--TESTMODEL',
21 |                         help='model directory',
22 |                         type=str,
23 |                         default='')      
24 |     parser.add_argument('--DEMOFILE',
25 |                         help='source images or video',
26 |                         type=str,
27 |                         default='')                          
28 |     parser.add_argument('--DEBUG', type=int, default=0,
29 |                          help='level of visualization.'
30 |                               '1: only show the final detection results'
31 |                               '2: show the network output features'
32 |                               '3: use matplot to display' # useful when lunching training with ipython notebook
33 |                               '4: save all visualizations to disk')  
34 |     parser.add_argument('--NMS',
35 |                         help='whether to do NMS',
36 |                         type=bool,
37 |                         default=True)                                                                       
38 |     args = parser.parse_args()
39 | 
40 |     return args
41 | 
42 |     
43 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
44 | video_ext = ['mp4', 'mov', 'avi', 'mkv']
45 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
46 | 
47 | def demo(cfg):
48 |     Detector = detector_factory[cfg.TEST.TASK]
49 |     detector = Detector(cfg)
50 | 
51 |     if cfg.TEST.DEMO_FILE == 'webcam' or \
52 |         cfg.TEST.DEMO_FILE[cfg.TEST.DEMO_FILE.rfind('.') + 1:].lower() in video_ext:
53 |         cam = cv2.VideoCapture(0 if cfg.TEST.DEMO_FILE == 'webcam' else cfg.TEST.DEMO_FILE)
54 |         detector.pause = False
55 |         while True:
56 |             _, img = cam.read()
57 |             cv2.imshow('input', img)
58 |             ret = detector.run(img)
59 |             time_str = ''
60 |             for stat in time_stats:
61 |                 time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
62 |             print(time_str)
63 |             if cv2.waitKey(1) == 27:
64 |                 return  # esc to quit
65 |     else:
66 |         if os.path.isdir(cfg.TEST.DEMO_FILE):
67 |             image_names = []
68 |             ls = os.listdir(cfg.TEST.DEMO_FILE)
69 |             for file_name in sorted(ls):
70 |                 ext = file_name[file_name.rfind('.') + 1:].lower()
71 |                 if ext in image_ext:
72 |                     image_names.append(os.path.join(cfg.TEST.DEMO_FILE, file_name))
73 |         else:
74 |             image_names = [cfg.TEST.DEMO_FILE]
75 |     
76 |         for (image_name) in image_names:
77 |             ret = detector.run(image_name)
78 | 
79 |             time_str = ''
80 |             for stat in time_stats:
81 |                 time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
82 |             print(time_str)
83 | if __name__ == '__main__':
84 |     args = parse_args()
85 |     update_config(cfg, args.cfg)
86 |     cfg.defrost()
87 |     cfg.TEST.MODEL_PATH = args.TESTMODEL
88 |     cfg.TEST.DEMO_FILE = args.DEMOFILE
89 |     cfg.TEST.NMS = args.NMS
90 |     cfg.DEBUG = args.DEBUG
91 |     cfg.freeze()
92 |     demo(cfg)
93 | 


--------------------------------------------------------------------------------
/tools/evaluate.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import argparse
 4 | import json
 5 | import os
 6 | import time
 7 | 
 8 | import cv2
 9 | import numpy as np
10 | import torch
11 | from progress.bar import Bar
12 | 
13 | import _init_paths
14 | from config import cfg, update_config
15 | from datasets.dataset_factory import dataset_factory
16 | from detectors.detector_factory import detector_factory
17 | from external.nms import soft_nms
18 | from logger import Logger
19 | from utils.utils import AverageMeter
20 | 
21 | 
22 | def parse_args():
23 |     parser = argparse.ArgumentParser(description='Train keypoints network')
24 |     # general
25 |     parser.add_argument('--cfg',
26 |                         help='experiment configure file name',
27 |                         required=True,
28 |                         type=str)
29 |     parser.add_argument('--NMS',
30 |                         help='whether to do NMS',
31 |                         type=bool,
32 |                         default=True)       
33 |     parser.add_argument('--TESTMODEL',
34 |                         help='model directory',
35 |                         type=str,
36 |                         default='')                                                
37 |     parser.add_argument('--DEBUG', type=int, default=0,
38 |                          help='level of visualization.'
39 |                               '1: only show the final detection results'
40 |                               '2: show the network output features'
41 |                               '3: use matplot to display' # useful when lunching training with ipython notebook
42 |                               '4: save all visualizations to disk')                             
43 |     args = parser.parse_args()
44 | 
45 |     return args
46 |     
47 |     
48 | def test(cfg):
49 | 
50 |     Dataset = dataset_factory[cfg.SAMPLE_METHOD]
51 |     Logger(cfg)
52 |     Detector = detector_factory[cfg.TEST.TASK]
53 | 
54 |     dataset = Dataset(cfg, 'val')
55 |     detector = Detector(cfg)
56 | 
57 |     results = {}
58 |     num_iters = len(dataset)
59 |     bar = Bar('{}'.format(cfg.EXP_ID), max=num_iters)
60 |     time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
61 |     avg_time_stats = {t: AverageMeter() for t in time_stats}
62 |     for ind in range(num_iters):
63 |         img_id = dataset.images[ind]
64 |         img_info = dataset.coco.loadImgs(ids=[img_id])[0]
65 |         img_path = os.path.join(dataset.img_dir, img_info['file_name'])
66 |         #img_path = '/home/tensorboy/data/coco/images/val2017/000000004134.jpg'
67 |         ret = detector.run(img_path)
68 | 
69 |         results[img_id] = ret['results']
70 | 
71 |         Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
72 |                        ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
73 |         for t in avg_time_stats:
74 |             avg_time_stats[t].update(ret[t])
75 |             Bar.suffix = Bar.suffix + '|{} {:.3f} '.format(t, avg_time_stats[t].avg)
76 |         bar.next()
77 |     bar.finish()
78 |     dataset.run_eval(results, cfg.OUTPUT_DIR)
79 | 
80 | if __name__ == '__main__':
81 |     args = parse_args()
82 |     update_config(cfg, args.cfg)
83 |     cfg.defrost()
84 |     cfg.DEBUG = args.DEBUG
85 |     cfg.TEST.MODEL_PATH = args.TESTMODEL    
86 |     cfg.TEST.NMS = args.NMS    
87 |     cfg.freeze()
88 |     test(cfg)
89 | 


--------------------------------------------------------------------------------