├── README.md ├── imgs └── demo1.jpg ├── main.cpp └── main.py /README.md: -------------------------------------------------------------------------------- 1 | # crowdcounting-p2pnet-opencv 2 | 使用OpenCV部署P2PNet人群检测和计数，包含C++和Python两种版本的实现 3 | 4 | 人群检测和计数是计算机视觉领域的一个热门研究课题，腾讯优图实验室在ICCV 2021发布了一篇论文 5 | 《Rethinking Counting and Localization in Crowds:A Purely Point-Based Framework》。 6 | 在运行了他们发布的程序之后，我编写了一套使用OpenCV部署的程序，依然是包含C++和Python两种版本的实现。 7 | 8 | onnx模型文件在百度云盘，链接：https://pan.baidu.com/s/1M3CG3QAPnsTTuOKcwVkjDg 9 | 提取码：pd8v 10 | -------------------------------------------------------------------------------- /imgs/demo1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/crowdcounting-p2pnet-opencv/f642bc39b42377574752a636cb34f9a61129370d/imgs/demo1.jpg -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace cv; 9 | using namespace dnn; 10 | using namespace std; 11 | 12 | struct CrowdPoint 13 | { 14 | cv::Point pt; 15 | float prob; 16 | }; 17 | 18 | static void shift(int w, int h, int stride, vector anchor_points, vector& shifted_anchor_points) 19 | { 20 | vector x_, y_; 21 | for (int i = 0; i < w; i++) 22 | { 23 | float x = (i + 0.5) * stride; 24 | x_.push_back(x); 25 | } 26 | for (int i = 0; i < h; i++) 27 | { 28 | float y = (i + 0.5) * stride; 29 | y_.push_back(y); 30 | } 31 | 32 | vector shift_x((size_t)w * h, 0), shift_y((size_t)w * h, 0); 33 | for (int i = 0; i < h; i++) 34 | { 35 | for (int j = 0; j < w; j++) 36 | { 37 | shift_x[i * w + j] = x_[j]; 38 | } 39 | } 40 | for (int i = 0; i < h; i++) 41 | { 42 | for (int j = 0; j < w; j++) 43 | { 44 | shift_y[i * w + j] = y_[i]; 45 | } 46 | } 47 | 48 | vector shifts((size_t)w * h * 2, 0); 49 | for (int i = 0; i < w * h; i++) 50 | { 51 | shifts[i * 2] = shift_x[i]; 52 | shifts[i * 2 + 1] = shift_y[i]; 53 | } 54 | 55 | shifted_anchor_points.resize((size_t)2 * w * h * anchor_points.size() / 2, 0); 56 | for (int i = 0; i < w * h; i++) 57 | { 58 | for (int j = 0; j < anchor_points.size() / 2; j++) 59 | { 60 | float x = anchor_points[j * 2] + shifts[i * 2]; 61 | float y = anchor_points[j * 2 + 1] + shifts[i * 2 + 1]; 62 | shifted_anchor_points[i * anchor_points.size() / 2 * 2 + j * 2] = x; 63 | shifted_anchor_points[i * anchor_points.size() / 2 * 2 + j * 2 + 1] = y; 64 | } 65 | } 66 | } 67 | static void generate_anchor_points(int stride, int row, int line, vector& anchor_points) 68 | { 69 | float row_step = (float)stride / row; 70 | float line_step = (float)stride / line; 71 | 72 | vector x_, y_; 73 | for (int i = 1; i < line + 1; i++) 74 | { 75 | float x = (i - 0.5) * line_step - stride / 2; 76 | x_.push_back(x); 77 | } 78 | for (int i = 1; i < row + 1; i++) 79 | { 80 | float y = (i - 0.5) * row_step - stride / 2; 81 | y_.push_back(y); 82 | } 83 | vector shift_x((size_t)row * line, 0), shift_y((size_t)row * line, 0); 84 | for (int i = 0; i < row; i++) 85 | { 86 | for (int j = 0; j < line; j++) 87 | { 88 | shift_x[i * line + j] = x_[j]; 89 | } 90 | } 91 | for (int i = 0; i < row; i++) 92 | { 93 | for (int j = 0; j < line; j++) 94 | { 95 | shift_y[i * line + j] = y_[i]; 96 | } 97 | } 98 | anchor_points.resize((size_t)row * line * 2, 0); 99 | for (int i = 0; i < row * line; i++) 100 | { 101 | float x = shift_x[i]; 102 | float y = shift_y[i]; 103 | anchor_points[i * 2] = x; 104 | anchor_points[i * 2 + 1] = y; 105 | } 106 | } 107 | static void generate_anchor_points(int img_w, int img_h, vector pyramid_levels, int row, int line, vector& all_anchor_points) 108 | { 109 | vector > image_shapes; 110 | vector strides; 111 | for (int i = 0; i < pyramid_levels.size(); i++) 112 | { 113 | int new_h = floor((img_h + pow(2, pyramid_levels[i]) - 1) / pow(2, pyramid_levels[i])); 114 | int new_w = floor((img_w + pow(2, pyramid_levels[i]) - 1) / pow(2, pyramid_levels[i])); 115 | image_shapes.push_back(make_pair(new_w, new_h)); 116 | strides.push_back(pow(2, pyramid_levels[i])); 117 | } 118 | 119 | all_anchor_points.clear(); 120 | for (int i = 0; i < pyramid_levels.size(); i++) 121 | { 122 | vector anchor_points; 123 | generate_anchor_points(pow(2, pyramid_levels[i]), row, line, anchor_points); 124 | vector shifted_anchor_points; 125 | shift(image_shapes[i].first, image_shapes[i].second, strides[i], anchor_points, shifted_anchor_points); 126 | all_anchor_points.insert(all_anchor_points.end(), shifted_anchor_points.begin(), shifted_anchor_points.end()); 127 | } 128 | } 129 | 130 | class P2PNet 131 | { 132 | public: 133 | P2PNet(const float confThreshold = 0.5) 134 | { 135 | this->confThreshold = confThreshold; 136 | this->net = readNet("SHTechA.onnx"); 137 | } 138 | void detect(Mat& frame); 139 | private: 140 | float confThreshold; 141 | Net net; 142 | Mat preprocess(Mat srcimgt); 143 | const float mean[3] = { 0.485, 0.456, 0.406 }; 144 | const float std[3] = { 0.229, 0.224, 0.225 }; 145 | vector output_names = { "pred_logits", "pred_points" }; 146 | }; 147 | 148 | 149 | Mat P2PNet::preprocess(Mat srcimg) 150 | { 151 | int srch = srcimg.rows, srcw = srcimg.cols; 152 | int new_width = srcw / 128 * 128; 153 | int new_height = srch / 128 * 128; 154 | Mat dstimg; 155 | cvtColor(srcimg, dstimg, cv::COLOR_BGR2RGB); 156 | resize(dstimg, dstimg, Size(new_width, new_height), INTER_AREA); 157 | dstimg.convertTo(dstimg, CV_32F); 158 | int i = 0, j = 0; 159 | for (i = 0; i < dstimg.rows; i++) 160 | { 161 | float* pdata = (float*)(dstimg.data + i * dstimg.step); 162 | for (j = 0; j < dstimg.cols; j++) 163 | { 164 | pdata[0] = (pdata[0] / 255.0 - this->mean[0]) / this->std[0]; 165 | pdata[1] = (pdata[1] / 255.0 - this->mean[1]) / this->std[1]; 166 | pdata[2] = (pdata[2] / 255.0 - this->mean[2]) / this->std[2]; 167 | pdata += 3; 168 | } 169 | } 170 | return dstimg; 171 | } 172 | 173 | void P2PNet::detect(Mat& frame) 174 | { 175 | const int width = frame.cols; 176 | const int height = frame.rows; 177 | Mat img = this->preprocess(frame); 178 | const int new_width = img.cols; 179 | const int new_height = img.rows; 180 | Mat blob = blobFromImage(img); 181 | this->net.setInput(blob); 182 | vector outs; 183 | //this->net.forward(outs, this->net.getUnconnectedOutLayersNames()); 184 | this->net.forward(outs, output_names); 185 | 186 | vector pyramid_levels(1, 3); 187 | vector all_anchor_points; 188 | generate_anchor_points(img.cols, img.rows, pyramid_levels, 2, 2, all_anchor_points); 189 | const int num_proposal = outs[0].cols; 190 | int i = 0; 191 | float* pscore = (float*)outs[0].data; 192 | float* pcoord = (float*)outs[1].data; 193 | vector crowd_points; 194 | for (i = 0; i < num_proposal; i++) 195 | { 196 | if (pscore[i] > this->confThreshold) 197 | { 198 | float x = (pcoord[i] + all_anchor_points[i * 2]) / (float)new_width * (float)width; 199 | float y = (pcoord[i+1]+ all_anchor_points[i * 2 + 1]) / (float)new_height * (float)height; 200 | crowd_points.push_back({ Point(int(x), int(y)), pscore[i] }); 201 | } 202 | pcoord += 2; 203 | } 204 | cout << "have " << crowd_points.size() << " people" << endl; 205 | for (i = 0; i < crowd_points.size(); i++) 206 | { 207 | cv::circle(frame, crowd_points[i].pt, 2, cv::Scalar(0, 0, 255), -1, 8, 0); 208 | } 209 | } 210 | 211 | int main() 212 | { 213 | P2PNet mynet(0.5); 214 | string imgpath = "imgs/demo1.jpg"; 215 | Mat srcimg = imread(imgpath); 216 | mynet.detect(srcimg); 217 | 218 | static const string kWinName = "Deep learning object detection in OpenCV"; 219 | namedWindow(kWinName, WINDOW_NORMAL); 220 | imshow(kWinName, srcimg); 221 | waitKey(0); 222 | destroyAllWindows(); 223 | } -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import numpy as np 4 | 5 | class AnchorPoints(): 6 | def __init__(self, pyramid_levels=None, strides=None, row=3, line=3): 7 | super(AnchorPoints, self).__init__() 8 | 9 | if pyramid_levels is None: 10 | self.pyramid_levels = [3, 4, 5, 6, 7] 11 | else: 12 | self.pyramid_levels = pyramid_levels 13 | 14 | if strides is None: 15 | self.strides = [2 ** x for x in self.pyramid_levels] 16 | 17 | self.row = row 18 | self.line = line 19 | 20 | def generate_anchor_points(self, stride=16, row=3, line=3): 21 | row_step = stride / row 22 | line_step = stride / line 23 | 24 | shift_x = (np.arange(1, line + 1) - 0.5) * line_step - stride / 2 25 | shift_y = (np.arange(1, row + 1) - 0.5) * row_step - stride / 2 26 | 27 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 28 | 29 | anchor_points = np.vstack(( 30 | shift_x.ravel(), shift_y.ravel() 31 | )).transpose() 32 | 33 | return anchor_points 34 | 35 | # shift the meta-anchor to get an acnhor points 36 | def shift(self, shape, stride, anchor_points): 37 | shift_x = (np.arange(0, shape[1]) + 0.5) * stride 38 | shift_y = (np.arange(0, shape[0]) + 0.5) * stride 39 | 40 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 41 | 42 | shifts = np.vstack(( 43 | shift_x.ravel(), shift_y.ravel() 44 | )).transpose() 45 | 46 | A = anchor_points.shape[0] 47 | K = shifts.shape[0] 48 | all_anchor_points = (anchor_points.reshape((1, A, 2)) + shifts.reshape((1, K, 2)).transpose((1, 0, 2))) 49 | all_anchor_points = all_anchor_points.reshape((K * A, 2)) 50 | 51 | return all_anchor_points 52 | def __call__(self, image): 53 | image_shape = image.shape[2:] 54 | image_shape = np.array(image_shape) 55 | image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels] 56 | 57 | all_anchor_points = np.zeros((0, 2)).astype(np.float32) 58 | # get reference points for each level 59 | for idx, p in enumerate(self.pyramid_levels): 60 | anchor_points = self.generate_anchor_points(2**p, row=self.row, line=self.line) 61 | shifted_anchor_points = self.shift(image_shapes[idx], self.strides[idx], anchor_points) 62 | all_anchor_points = np.append(all_anchor_points, shifted_anchor_points, axis=0) 63 | all_anchor_points = np.expand_dims(all_anchor_points, axis=0) 64 | return all_anchor_points.astype(np.float32) 65 | 66 | class P2PNet(): 67 | def __init__(self, modelPath, confThreshold=0.5): 68 | self.model = cv2.dnn.readNet(modelPath) 69 | self.inputNames = 'input' 70 | self.outputNames = ['pred_logits', 'pred_points'] 71 | self.confThreshold = confThreshold 72 | self.mean_ = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape((1,1,3)) 73 | self.std_ = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape((1,1,3)) 74 | self.anchor_points = AnchorPoints(pyramid_levels=[3,], row=2, line=2) 75 | def detect(self, srcimg): 76 | img = cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB) 77 | height, width = img.shape[:2] 78 | new_width = width // 128 * 128 79 | new_height = height // 128 * 128 80 | img = cv2.resize(img, (new_width, new_height), interpolation = cv2.INTER_AREA) 81 | print(img.shape) 82 | img = (img.astype(np.float32) / 255.0 - self.mean_) / self.std_ 83 | 84 | # Preprocess 85 | inputBlob = cv2.dnn.blobFromImage(img) 86 | # Forward 87 | self.model.setInput(inputBlob, self.inputNames) 88 | outputBlob = self.model.forward(self.outputNames) 89 | # self.model.setInput(inputBlob) 90 | # outputBlob = self.model.forward(self.model.getUnconnectedOutLayersNames()) 91 | anchor_points = self.anchor_points(inputBlob) 92 | output_coord = outputBlob[1] + anchor_points 93 | points = output_coord[outputBlob[0] > self.confThreshold] 94 | scores = outputBlob[0][outputBlob[0] > self.confThreshold] 95 | 96 | ratioh, ratiow = srcimg.shape[0]/img.shape[0], srcimg.shape[1]/img.shape[1] 97 | points[:, 0] *= ratiow 98 | points[:, 1] *= ratioh 99 | return scores, points 100 | 101 | if __name__=='__main__': 102 | parser = argparse.ArgumentParser('Set parameters for P2PNet evaluation', add_help=False) 103 | parser.add_argument('--imgpath', default='imgs/demo1.jpg', type=str, 104 | help="image path") 105 | parser.add_argument('--onnx_path', default='SHTechA.onnx', 106 | help='path where the onnx file saved') 107 | parser.add_argument('--conf_threshold', type=float, default=0.5, 108 | help='Filter out faces of confidence < conf_threshold.') 109 | args = parser.parse_args() 110 | 111 | srcimg = cv2.imread(args.imgpath) 112 | net = P2PNet(args.onnx_path, confThreshold=args.conf_threshold) 113 | scores, points = net.detect(srcimg) 114 | print('have', points.shape[0], 'people') 115 | for i in range(points.shape[0]): 116 | cv2.circle(srcimg, (int(points[i, 0]), int(points[i, 1])), 2, (0, 0, 255), -1) 117 | 118 | winName = 'Deep learning object detection in OpenCV' 119 | cv2.namedWindow(winName, 0) 120 | cv2.imshow(winName, srcimg) 121 | cv2.waitKey(0) 122 | cv2.destroyAllWindows() --------------------------------------------------------------------------------