├── .gitattributes ├── .ipynb_checkpoints └── main-checkpoint.ipynb ├── README.md ├── data output.xlsx ├── main.ipynb ├── mobilenet_ssd ├── MobileNetSSD_deploy.caffemodel └── MobileNetSSD_deploy.prototxt ├── mods ├── __init__.py ├── centroidtracker.py └── trackableobject.py ├── people_counter.py └── videos └── sample.mp4 /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/main-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Penggunaan untuk Terminal\n", 10 | "# To read and write back out to video:\n", 11 | "# python people_counter.py --prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt \\\n", 12 | "#\t--model mobilenet_ssd/MobileNetSSD_deploy.caffemodel --input videos/example_01.mp4 \\\n", 13 | "#\t--output output/output_01.avi\n", 14 | "#\n", 15 | "# To read from webcam and write back out to disk:\n", 16 | "# python people_counter.py --prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt \\\n", 17 | "#\t--model mobilenet_ssd/MobileNetSSD_deploy.caffemodel \\\n", 18 | "#\t--output output/webcam_output.avi" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from mods.centroidtracker import CentroidTracker\n", 28 | "from mods.trackableobject import TrackableObject\n", 29 | "from imutils.video import VideoStream\n", 30 | "from imutils.video import FPS\n", 31 | "import numpy as np\n", 32 | "import argparse\n", 33 | "import imutils\n", 34 | "import time\n", 35 | "import dlib\n", 36 | "import cv2" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "# construct the argument parse and parse the arguments\n", 46 | "#ap = argparse.ArgumentParser()\n", 47 | "#ap.add_argument(\"-p\", \"--prototxt\", required=True,\n", 48 | "#\thelp=\"path to Caffe 'deploy' prototxt file\")\n", 49 | "#ap.add_argument(\"-m\", \"--model\", required=True,\n", 50 | "#\thelp=\"path to Caffe pre-trained model\")\n", 51 | "#ap.add_argument(\"-i\", \"--input\", type=str,\n", 52 | "#\thelp=\"path to optional input video file\")\n", 53 | "#ap.add_argument(\"-o\", \"--output\", type=str,\n", 54 | "#\thelp=\"path to optional output video file\")\n", 55 | "#ap.add_argument(\"-c\", \"--confidence\", type=float, default=0.4,\n", 56 | "#\thelp=\"minimum probability to filter weak detections\")\n", 57 | "#ap.add_argument(\"-s\", \"--skip-frames\", type=int, default=30,\n", 58 | "#\thelp=\"# of skip frames between detections\")\n", 59 | "#args = vars(ap.parse_args())" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "# initialize the list of class labels MobileNet SSD was trained to\n", 69 | "# detect\n", 70 | "CLASSES = [\"background\", \"aeroplane\", \"bicycle\", \"bird\", \"boat\",\n", 71 | "\t\"bottle\", \"bus\", \"car\", \"cat\", \"chair\", \"cow\", \"diningtable\",\n", 72 | "\t\"dog\", \"horse\", \"motorbike\", \"person\", \"pottedplant\", \"sheep\",\n", 73 | "\t\"sofa\", \"train\", \"tvmonitor\"]\n", 74 | "\n", 75 | "prototxt = \"mobilenet_ssd/MobileNetSSD_deploy.prototxt\"\n", 76 | "model = \"mobilenet_ssd/MobileNetSSD_deploy.caffemodel\"\n", 77 | "\n", 78 | "input_file = \"videos/sample.mp4\"\n", 79 | "output_file = \"output/test.avi\"\n", 80 | "confidence_input = 0.4\n", 81 | "frame_skip = 30" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "import pandas as pd\n", 91 | "\n", 92 | "# dataframe Name and Age columns\n", 93 | "df = pd.DataFrame({'no_data': ['Null'],\n", 94 | " 'nama_CCTV': ['Null'],\n", 95 | " 'menit': ['Null'],\n", 96 | " 'jumlah_counting': ['Null']})\n", 97 | "\n", 98 | "# Create a Pandas Excel writer using XlsxWriter as the engine.\n", 99 | "writer = pd.ExcelWriter('data output.xlsx', engine='xlsxwriter')\n", 100 | "\n", 101 | "# Convert the dataframe to an XlsxWriter Excel object.\n", 102 | "df.to_excel(writer, sheet_name='Data Jumlah Peserta CFD', index=False)\n", 103 | "\n", 104 | "# Close the Pandas Excel writer and output the Excel file.\n", 105 | "writer.save()" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 6, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "[INFO] loading model...\n", 118 | "[INFO] opening video file...\n", 119 | "[INFO] elapsed time: 916.63\n", 120 | "[INFO] approx. FPS: 1.96\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "from openpyxl import load_workbook\n", 126 | "\n", 127 | "# load our serialized model from disk\n", 128 | "print(\"[INFO] loading model...\")\n", 129 | "net = cv2.dnn.readNetFromCaffe(prototxt, model)\n", 130 | "\n", 131 | "# if a video path was not supplied, grab a reference to the webcam\n", 132 | "if not (input_file , False):\n", 133 | " print(\"[INFO] starting video stream...\")\n", 134 | " vs = VideoStream(src=0).start()\n", 135 | " time.sleep(2.0)\n", 136 | "\n", 137 | "# otherwise, grab a reference to the video file\n", 138 | "else:\n", 139 | " print(\"[INFO] opening video file...\")\n", 140 | " vs = cv2.VideoCapture(input_file)\n", 141 | "\n", 142 | "# initialize the video writer (we'll instantiate later if need be)\n", 143 | "writer = None\n", 144 | "\n", 145 | "# initialize the frame dimensions (we'll set them as soon as we read\n", 146 | "# the first frame from the video)\n", 147 | "W = None\n", 148 | "H = None\n", 149 | "\n", 150 | "# instantiate our centroid tracker, then initialize a list to store\n", 151 | "# each of our dlib correlation trackers, followed by a dictionary to\n", 152 | "# map each unique object ID to a TrackableObject\n", 153 | "ct = CentroidTracker(maxDisappeared=40, maxDistance=50)\n", 154 | "trackers = []\n", 155 | "trackableObjects = {}\n", 156 | "\n", 157 | "# initialize the total number of frames processed thus far, along\n", 158 | "# with the total number of objects that have moved either up or down\n", 159 | "totalFrames = 0\n", 160 | "totalDown = 0\n", 161 | "totalUp = 0\n", 162 | "totalSemua = 0\n", 163 | "\n", 164 | "# start the frames per second throughput estimator\n", 165 | "fps = FPS().start()\n", 166 | "\n", 167 | "# loop over frames from the video stream\n", 168 | "while True:\n", 169 | " # grab the next frame and handle if we are reading from either\n", 170 | " # VideoCapture or VideoStream\n", 171 | " frame = vs.read()\n", 172 | " frame = frame[1] if (input_file, False) else frame\n", 173 | "\n", 174 | " # if we are viewing a video and we did not grab a frame then we\n", 175 | " # have reached the end of the video\n", 176 | " if input_file is not None and frame is None:\n", 177 | " break\n", 178 | "\n", 179 | " # resize the frame to have a maximum width of 500 pixels (the\n", 180 | " # less data we have, the faster we can process it), then convert\n", 181 | " # the frame from BGR to RGB for dlib\n", 182 | " frame = imutils.resize(frame, width=1000)\n", 183 | " rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n", 184 | "\n", 185 | " # if the frame dimensions are empty, set them\n", 186 | " if W is None or H is None:\n", 187 | " (H, W) = frame.shape[:2]\n", 188 | "\n", 189 | " # if we are supposed to be writing a video to disk, initialize\n", 190 | " # the writer\n", 191 | " if output_file is not None and writer is None:\n", 192 | " fourcc = cv2.VideoWriter_fourcc(*\"MJPG\")\n", 193 | " writer = cv2.VideoWriter(output_file, fourcc, 30,\n", 194 | " (W, H), True)\n", 195 | "\n", 196 | " # initialize the current status along with our list of bounding\n", 197 | " # box rectangles returned by either (1) our object detector or\n", 198 | " # (2) the correlation trackers\n", 199 | " status = \"Waiting\"\n", 200 | " rects = []\n", 201 | "\n", 202 | " # check to see if we should run a more computationally expensive\n", 203 | " # object detection method to aid our tracker\n", 204 | " if totalFrames % frame_skip == 0:\n", 205 | " # set the status and initialize our new set of object trackers\n", 206 | " status = \"Detecting\"\n", 207 | " trackers = []\n", 208 | "\n", 209 | " # convert the frame to a blob and pass the blob through the\n", 210 | " # network and obtain the detections\n", 211 | " blob = cv2.dnn.blobFromImage(frame, 0.007843, (W, H), 127.5)\n", 212 | " net.setInput(blob)\n", 213 | " detections = net.forward()\n", 214 | "\n", 215 | " # loop over the detections\n", 216 | " for i in np.arange(0, detections.shape[2]):\n", 217 | " # extract the confidence (i.e., probability) associated\n", 218 | " # with the prediction\n", 219 | " confidence = detections[0, 0, i, 2]\n", 220 | "\n", 221 | " # filter out weak detections by requiring a minimum\n", 222 | " # confidence\n", 223 | " if confidence > confidence_input:\n", 224 | " # extract the index of the class label from the\n", 225 | " # detections list\n", 226 | " idx = int(detections[0, 0, i, 1])\n", 227 | "\n", 228 | " # if the class label is not a person, ignore it\n", 229 | " if CLASSES[idx] != \"person\":\n", 230 | " continue\n", 231 | "\n", 232 | " # compute the (x, y)-coordinates of the bounding box\n", 233 | " # for the object\n", 234 | " box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])\n", 235 | " (startX, startY, endX, endY) = box.astype(\"int\")\n", 236 | "\n", 237 | " # construct a dlib rectangle object from the bounding\n", 238 | " # box coordinates and then start the dlib correlation\n", 239 | " # tracker\n", 240 | " tracker = dlib.correlation_tracker()\n", 241 | " rect = dlib.rectangle(startX, startY, endX, endY)\n", 242 | " tracker.start_track(rgb, rect)\n", 243 | "\n", 244 | " # add the tracker to our list of trackers so we can\n", 245 | " # utilize it during skip frames\n", 246 | " trackers.append(tracker)\n", 247 | "\n", 248 | " # otherwise, we should utilize our object *trackers* rather than\n", 249 | " # object *detectors* to obtain a higher frame processing throughput\n", 250 | " else:\n", 251 | " # loop over the trackers\n", 252 | " for tracker in trackers:\n", 253 | " # set the status of our system to be 'tracking' rather\n", 254 | " # than 'waiting' or 'detecting'\n", 255 | " status = \"Tracking\"\n", 256 | "\n", 257 | " # update the tracker and grab the updated position\n", 258 | " tracker.update(rgb)\n", 259 | " pos = tracker.get_position()\n", 260 | "\n", 261 | " # unpack the position object\n", 262 | " startX = int(pos.left())\n", 263 | " startY = int(pos.top())\n", 264 | " endX = int(pos.right())\n", 265 | " endY = int(pos.bottom())\n", 266 | "\n", 267 | " # add the bounding box coordinates to the rectangles list\n", 268 | " rects.append((startX, startY, endX, endY))\n", 269 | "\n", 270 | " # draw a horizontal line in the center of the frame -- once an\n", 271 | " # object crosses this line we will determine whether they were\n", 272 | " # moving 'up' or 'down'\n", 273 | " cv2.line(frame, (0, H // 1), (W, H // 10), (0, 255, 255), 2)\n", 274 | "\n", 275 | " # use the centroid tracker to associate the (1) old object\n", 276 | " # centroids with (2) the newly computed object centroids\n", 277 | " objects = ct.update(rects)\n", 278 | "\n", 279 | " # loop over the tracked objects\n", 280 | " for (objectID, centroid) in objects.items():\n", 281 | " # check to see if a trackable object exists for the current\n", 282 | " # object ID\n", 283 | " to = trackableObjects.get(objectID, None)\n", 284 | "\n", 285 | " # if there is no existing trackable object, create one\n", 286 | " if to is None:\n", 287 | " to = TrackableObject(objectID, centroid)\n", 288 | "\n", 289 | " # otherwise, there is a trackable object so we can utilize it\n", 290 | " # to determine direction\n", 291 | " else:\n", 292 | " # the difference between the y-coordinate of the *current*\n", 293 | " # centroid and the mean of *previous* centroids will tell\n", 294 | " # us in which direction the object is moving (negative for\n", 295 | " # 'up' and positive for 'down')\n", 296 | " y = [c[1] for c in to.centroids]\n", 297 | " direction = centroid[1] - np.mean(y)\n", 298 | " to.centroids.append(centroid)\n", 299 | "\n", 300 | " # check to see if the object has been counted or not\n", 301 | " if not to.counted:\n", 302 | " # if the direction is negative (indicating the object\n", 303 | " # is moving up) AND the centroid is above the center\n", 304 | " # line, count the object\n", 305 | " if direction < 0 and centroid[1] < H // 2:\n", 306 | " totalSemua += 1\n", 307 | " to.counted = True\n", 308 | "\n", 309 | " # if the direction is positive (indicating the object\n", 310 | " # is moving down) AND the centroid is below the\n", 311 | " # center line, count the object\n", 312 | " elif direction > 0 and centroid[1] > H // 2:\n", 313 | " totalSemua += 1\n", 314 | " to.counted = True\n", 315 | "\n", 316 | " # store the trackable object in our dictionary\n", 317 | " trackableObjects[objectID] = to\n", 318 | "\n", 319 | " # draw both the ID of the object and the centroid of the\n", 320 | " # object on the output frame\n", 321 | " text = \"ID {}\".format(objectID)\n", 322 | " cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),\n", 323 | " cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)\n", 324 | " cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)\n", 325 | "\n", 326 | " # construct a tuple of information we will be displaying on the\n", 327 | " # frame\n", 328 | " \n", 329 | " info = [\n", 330 | " (\"Total Counting\", totalSemua),\n", 331 | " (\"Status\", status),\n", 332 | " ]\n", 333 | "\n", 334 | " # loop over the info tuples and draw them on our frame\n", 335 | " for (i, (k, v)) in enumerate(info):\n", 336 | " text = \"{}: {}\".format(k, v)\n", 337 | " cv2.putText(frame, text, (10, H - ((i * 20) + 20)),\n", 338 | " cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)\n", 339 | "\n", 340 | " # check to see if we should write the frame to disk\n", 341 | " if writer is not None:\n", 342 | " writer.write(frame)\n", 343 | "\n", 344 | " # show the output frame\n", 345 | " cv2.imshow(\"Frame\", frame)\n", 346 | " key = cv2.waitKey(1) & 0xFF\n", 347 | "\n", 348 | " # if the `q` key was pressed, break from the loop\n", 349 | " if key == ord(\"q\"):\n", 350 | " break\n", 351 | "\n", 352 | " # increment the total number of frames processed thus far and\n", 353 | " # then update the FPS counter\n", 354 | " totalFrames += 1\n", 355 | " fps.update()\n", 356 | " elapsed_time = time.time() - start_time\n", 357 | " \n", 358 | " # new dataframe with same columns\n", 359 | " #df = pd.DataFrame({'no_data': [totalFrames],\n", 360 | " # 'nama_CCTV': ['Sample'],\n", 361 | " # 'menit': [\"Jam\"],\n", 362 | " # 'jumlah_counting': [totalSemua]})\n", 363 | " #writerEx = pd.ExcelWriter('data output.xlsx', engine='openpyxl')\n", 364 | " # try to open an existing workbook\n", 365 | " #writerEx.book = load_workbook('data output.xlsx')\n", 366 | " # copy existing sheets\n", 367 | " #writerEx.sheets = dict((ws.title, ws) for ws in writerEx.book.worksheets)\n", 368 | " # read existing file\n", 369 | " #reader = pd.read_excel(r'data output.xlsx')\n", 370 | " # write out the new sheet\n", 371 | " #df.to_excel(writerEx,sheet_name='Data Jumlah Peserta CFD',index=False,header=False,startrow=len(reader)+1)\n", 372 | " #writerEx.close()\n", 373 | " #print (elapsed_time)\n", 374 | " \n", 375 | " \n", 376 | " if (totalFrames == 1 or totalFrames == 18000 or totalFrames == 36000 or totalFrames == 54000 or totalFrames == 72000 \n", 377 | " or totalFrames == 90000 or totalFrames == 108000 or totalFrames == 126000 or totalFrames == 144000 \n", 378 | " or totalFrames == 162000 or totalFrames == 180000 or totalFrames == 198000 or totalFrames == 216000 \n", 379 | " or totalFrames == 234000 or totalFrames == 252000 or totalFrames == 270000 or totalFrames == 288000 \n", 380 | " or totalFrames == 306000 or totalFrames == 324000) :\n", 381 | " #new dataframe with same columns\n", 382 | " df = pd.DataFrame({'NO': [totalFrames],\n", 383 | " 'CCTV': [nama_cctv],\n", 384 | " 'Menit': [elapsed_time],\n", 385 | " 'Jumlah Counting': [totalSemua]})\n", 386 | " writerEx = pd.ExcelWriter('data output.xlsx', engine='openpyxl')\n", 387 | " # try to open an existing workbook\n", 388 | " writerEx.book = load_workbook('data output.xlsx')\n", 389 | " # copy existing sheets\n", 390 | " writerEx.sheets = dict((ws.title, ws) for ws in writerEx.book.worksheets)\n", 391 | " # read existing file\n", 392 | " reader = pd.read_excel(r'data output.xlsx')\n", 393 | " # write out the new sheet\n", 394 | " df.to_excel(writerEx,sheet_name='Data Jumlah Peserta CFD',index=False,header=False,startrow=len(reader)+1)\n", 395 | " writerEx.close()\n", 396 | " print (elapsed_time)\n", 397 | " \n", 398 | "# stop the timer and display FPS information\n", 399 | "fps.stop()\n", 400 | "print(\"[INFO] elapsed time: {:.2f}\".format(fps.elapsed()))\n", 401 | "print(\"[INFO] approx. FPS: {:.2f}\".format(fps.fps()))\n", 402 | "\n", 403 | "# check to see if we need to release the video writer pointer\n", 404 | "if writer is not None:\n", 405 | " writer.release()\n", 406 | "\n", 407 | "# if we are not using a video file, stop the camera video stream\n", 408 | "if not (input_file, False):\n", 409 | " vs.stop()\n", 410 | "\n", 411 | "# otherwise, release the video file pointer\n", 412 | "else:\n", 413 | " vs.release()\n", 414 | "\n", 415 | "# close any open windows\n", 416 | "cv2.destroyAllWindows()" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 37, 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [ 425 | "import pandas as pd\n", 426 | "\n", 427 | "data = {\n", 428 | " \"Sales Month 1\": [10, 20],\n", 429 | " \"Sales Month 2\": [5, 35],\n", 430 | "}\n", 431 | "df = pd.DataFrame(data)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 40, 437 | "metadata": {}, 438 | "outputs": [], 439 | "source": [ 440 | "import pandas as pd\n", 441 | "\n", 442 | "# dataframe Name and Age columns\n", 443 | "df = pd.DataFrame({'Name': ['A', 'B', 'C', 'D'],\n", 444 | " 'Age': [10, 0, 30, 50]})\n", 445 | "\n", 446 | "# Create a Pandas Excel writer using XlsxWriter as the engine.\n", 447 | "writer = pd.ExcelWriter('demo.xlsx', engine='xlsxwriter')\n", 448 | "\n", 449 | "# Convert the dataframe to an XlsxWriter Excel object.\n", 450 | "df.to_excel(writer, sheet_name='Sheet1', index=False)\n", 451 | "\n", 452 | "# Close the Pandas Excel writer and output the Excel file.\n", 453 | "writer.save()" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 41, 459 | "metadata": {}, 460 | "outputs": [ 461 | { 462 | "name": "stdout", 463 | "output_type": "stream", 464 | "text": [ 465 | " Name Age\n", 466 | "0 A 10\n", 467 | "1 B 0\n", 468 | "2 C 30\n", 469 | "3 D 50\n" 470 | ] 471 | } 472 | ], 473 | "source": [ 474 | "import pandas as pd\n", 475 | "reader = pd.read_excel(r'demo.xlsx')\n", 476 | "print(reader)" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": 63, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [ 485 | "import pandas as pd\n", 486 | "from openpyxl import load_workbook\n", 487 | "\n", 488 | "df = pd.DataFrame({'no_data': [1],\n", 489 | " 'nama_CCTV': ['Gelora'],\n", 490 | " 'menit': [8.10],\n", 491 | " 'jumlah_counting': [100]})\n", 492 | "writerEx = pd.ExcelWriter('data output.xlsx', engine='openpyxl')\n", 493 | "# try to open an existing workbook\n", 494 | "writerEx.book = load_workbook('data output.xlsx')\n", 495 | "# copy existing sheets\n", 496 | "writerEx.sheets = dict((ws.title, ws) for ws in writerEx.book.worksheets)\n", 497 | "# read existing file\n", 498 | "reader = pd.read_excel(r'data output.xlsx')\n", 499 | "# write out the new sheet\n", 500 | "df.to_excel(writerEx,sheet_name='Data Jumlah Peserta CFD',index=False,header=False,startrow=len(reader)+1)\n", 501 | "writerEx.close()" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": null, 507 | "metadata": {}, 508 | "outputs": [], 509 | "source": [] 510 | } 511 | ], 512 | "metadata": { 513 | "kernelspec": { 514 | "display_name": "Python 3", 515 | "language": "python", 516 | "name": "python3" 517 | }, 518 | "language_info": { 519 | "codemirror_mode": { 520 | "name": "ipython", 521 | "version": 3 522 | }, 523 | "file_extension": ".py", 524 | "mimetype": "text/x-python", 525 | "name": "python", 526 | "nbconvert_exporter": "python", 527 | "pygments_lexer": "ipython3", 528 | "version": "3.8.6" 529 | } 530 | }, 531 | "nbformat": 4, 532 | "nbformat_minor": 4 533 | } 534 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # People Counting OpenCV Object Detection 2 | People Counter Based on Pyimagesearch Source Code with modified for Export to Excel File 3 | 4 | https://www.pyimagesearch.com/2018/08/13/opencv-people-counter/ 5 | 6 | https://medium.com/better-programming/using-python-pandas-with-excel-d5082102ca27 7 | -------------------------------------------------------------------------------- /data output.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fulankun1412/People-Counting-OpenCV-Object-Detection/e5949294f1975141a815b6ca2228d025b118a430/data output.xlsx -------------------------------------------------------------------------------- /main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Penggunaan untuk Terminal\n", 10 | "# To read and write back out to video:\n", 11 | "# python people_counter.py --prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt \\\n", 12 | "#\t--model mobilenet_ssd/MobileNetSSD_deploy.caffemodel --input videos/example_01.mp4 \\\n", 13 | "#\t--output output/output_01.avi\n", 14 | "#\n", 15 | "# To read from webcam and write back out to disk:\n", 16 | "# python people_counter.py --prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt \\\n", 17 | "#\t--model mobilenet_ssd/MobileNetSSD_deploy.caffemodel \\\n", 18 | "#\t--output output/webcam_output.avi" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "from mods.centroidtracker import CentroidTracker\n", 28 | "from mods.trackableobject import TrackableObject\n", 29 | "from imutils.video import VideoStream\n", 30 | "from imutils.video import FPS\n", 31 | "import numpy as np\n", 32 | "import argparse\n", 33 | "import imutils\n", 34 | "import time\n", 35 | "import dlib\n", 36 | "import cv2" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "# construct the argument parse and parse the arguments\n", 46 | "#ap = argparse.ArgumentParser()\n", 47 | "#ap.add_argument(\"-p\", \"--prototxt\", required=True,\n", 48 | "#\thelp=\"path to Caffe 'deploy' prototxt file\")\n", 49 | "#ap.add_argument(\"-m\", \"--model\", required=True,\n", 50 | "#\thelp=\"path to Caffe pre-trained model\")\n", 51 | "#ap.add_argument(\"-i\", \"--input\", type=str,\n", 52 | "#\thelp=\"path to optional input video file\")\n", 53 | "#ap.add_argument(\"-o\", \"--output\", type=str,\n", 54 | "#\thelp=\"path to optional output video file\")\n", 55 | "#ap.add_argument(\"-c\", \"--confidence\", type=float, default=0.4,\n", 56 | "#\thelp=\"minimum probability to filter weak detections\")\n", 57 | "#ap.add_argument(\"-s\", \"--skip-frames\", type=int, default=30,\n", 58 | "#\thelp=\"# of skip frames between detections\")\n", 59 | "#args = vars(ap.parse_args())" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "# initialize the list of class labels MobileNet SSD was trained to\n", 69 | "# detect\n", 70 | "CLASSES = [\"background\", \"aeroplane\", \"bicycle\", \"bird\", \"boat\",\n", 71 | "\t\"bottle\", \"bus\", \"car\", \"cat\", \"chair\", \"cow\", \"diningtable\",\n", 72 | "\t\"dog\", \"horse\", \"motorbike\", \"person\", \"pottedplant\", \"sheep\",\n", 73 | "\t\"sofa\", \"train\", \"tvmonitor\"]\n", 74 | "\n", 75 | "prototxt = \"mobilenet_ssd/MobileNetSSD_deploy.prototxt\"\n", 76 | "model = \"mobilenet_ssd/MobileNetSSD_deploy.caffemodel\"\n", 77 | "\n", 78 | "input_file = \"videos/sample.mp4\"\n", 79 | "output_file = \"output/test.avi\"\n", 80 | "confidence_input = 0.4\n", 81 | "frame_skip = 30" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "import pandas as pd\n", 91 | "\n", 92 | "# dataframe Name and Age columns\n", 93 | "df = pd.DataFrame({'no_data': ['Null'],\n", 94 | " 'nama_CCTV': ['Null'],\n", 95 | " 'menit': ['Null'],\n", 96 | " 'jumlah_counting': ['Null']})\n", 97 | "\n", 98 | "# Create a Pandas Excel writer using XlsxWriter as the engine.\n", 99 | "writer = pd.ExcelWriter('data output.xlsx', engine='xlsxwriter')\n", 100 | "\n", 101 | "# Convert the dataframe to an XlsxWriter Excel object.\n", 102 | "df.to_excel(writer, sheet_name='Data Jumlah Peserta CFD', index=False)\n", 103 | "\n", 104 | "# Close the Pandas Excel writer and output the Excel file.\n", 105 | "writer.save()" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 6, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "[INFO] loading model...\n", 118 | "[INFO] opening video file...\n", 119 | "[INFO] elapsed time: 916.63\n", 120 | "[INFO] approx. FPS: 1.96\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "from openpyxl import load_workbook\n", 126 | "\n", 127 | "# load our serialized model from disk\n", 128 | "print(\"[INFO] loading model...\")\n", 129 | "net = cv2.dnn.readNetFromCaffe(prototxt, model)\n", 130 | "\n", 131 | "# if a video path was not supplied, grab a reference to the webcam\n", 132 | "if not (input_file , False):\n", 133 | " print(\"[INFO] starting video stream...\")\n", 134 | " vs = VideoStream(src=0).start()\n", 135 | " time.sleep(2.0)\n", 136 | "\n", 137 | "# otherwise, grab a reference to the video file\n", 138 | "else:\n", 139 | " print(\"[INFO] opening video file...\")\n", 140 | " vs = cv2.VideoCapture(input_file)\n", 141 | "\n", 142 | "# initialize the video writer (we'll instantiate later if need be)\n", 143 | "writer = None\n", 144 | "\n", 145 | "# initialize the frame dimensions (we'll set them as soon as we read\n", 146 | "# the first frame from the video)\n", 147 | "W = None\n", 148 | "H = None\n", 149 | "\n", 150 | "# instantiate our centroid tracker, then initialize a list to store\n", 151 | "# each of our dlib correlation trackers, followed by a dictionary to\n", 152 | "# map each unique object ID to a TrackableObject\n", 153 | "ct = CentroidTracker(maxDisappeared=40, maxDistance=50)\n", 154 | "trackers = []\n", 155 | "trackableObjects = {}\n", 156 | "\n", 157 | "# initialize the total number of frames processed thus far, along\n", 158 | "# with the total number of objects that have moved either up or down\n", 159 | "totalFrames = 0\n", 160 | "totalDown = 0\n", 161 | "totalUp = 0\n", 162 | "totalSemua = 0\n", 163 | "\n", 164 | "# start the frames per second throughput estimator\n", 165 | "fps = FPS().start()\n", 166 | "\n", 167 | "# loop over frames from the video stream\n", 168 | "while True:\n", 169 | " # grab the next frame and handle if we are reading from either\n", 170 | " # VideoCapture or VideoStream\n", 171 | " frame = vs.read()\n", 172 | " frame = frame[1] if (input_file, False) else frame\n", 173 | "\n", 174 | " # if we are viewing a video and we did not grab a frame then we\n", 175 | " # have reached the end of the video\n", 176 | " if input_file is not None and frame is None:\n", 177 | " break\n", 178 | "\n", 179 | " # resize the frame to have a maximum width of 500 pixels (the\n", 180 | " # less data we have, the faster we can process it), then convert\n", 181 | " # the frame from BGR to RGB for dlib\n", 182 | " frame = imutils.resize(frame, width=1000)\n", 183 | " rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n", 184 | "\n", 185 | " # if the frame dimensions are empty, set them\n", 186 | " if W is None or H is None:\n", 187 | " (H, W) = frame.shape[:2]\n", 188 | "\n", 189 | " # if we are supposed to be writing a video to disk, initialize\n", 190 | " # the writer\n", 191 | " if output_file is not None and writer is None:\n", 192 | " fourcc = cv2.VideoWriter_fourcc(*\"MJPG\")\n", 193 | " writer = cv2.VideoWriter(output_file, fourcc, 30,\n", 194 | " (W, H), True)\n", 195 | "\n", 196 | " # initialize the current status along with our list of bounding\n", 197 | " # box rectangles returned by either (1) our object detector or\n", 198 | " # (2) the correlation trackers\n", 199 | " status = \"Waiting\"\n", 200 | " rects = []\n", 201 | "\n", 202 | " # check to see if we should run a more computationally expensive\n", 203 | " # object detection method to aid our tracker\n", 204 | " if totalFrames % frame_skip == 0:\n", 205 | " # set the status and initialize our new set of object trackers\n", 206 | " status = \"Detecting\"\n", 207 | " trackers = []\n", 208 | "\n", 209 | " # convert the frame to a blob and pass the blob through the\n", 210 | " # network and obtain the detections\n", 211 | " blob = cv2.dnn.blobFromImage(frame, 0.007843, (W, H), 127.5)\n", 212 | " net.setInput(blob)\n", 213 | " detections = net.forward()\n", 214 | "\n", 215 | " # loop over the detections\n", 216 | " for i in np.arange(0, detections.shape[2]):\n", 217 | " # extract the confidence (i.e., probability) associated\n", 218 | " # with the prediction\n", 219 | " confidence = detections[0, 0, i, 2]\n", 220 | "\n", 221 | " # filter out weak detections by requiring a minimum\n", 222 | " # confidence\n", 223 | " if confidence > confidence_input:\n", 224 | " # extract the index of the class label from the\n", 225 | " # detections list\n", 226 | " idx = int(detections[0, 0, i, 1])\n", 227 | "\n", 228 | " # if the class label is not a person, ignore it\n", 229 | " if CLASSES[idx] != \"person\":\n", 230 | " continue\n", 231 | "\n", 232 | " # compute the (x, y)-coordinates of the bounding box\n", 233 | " # for the object\n", 234 | " box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])\n", 235 | " (startX, startY, endX, endY) = box.astype(\"int\")\n", 236 | "\n", 237 | " # construct a dlib rectangle object from the bounding\n", 238 | " # box coordinates and then start the dlib correlation\n", 239 | " # tracker\n", 240 | " tracker = dlib.correlation_tracker()\n", 241 | " rect = dlib.rectangle(startX, startY, endX, endY)\n", 242 | " tracker.start_track(rgb, rect)\n", 243 | "\n", 244 | " # add the tracker to our list of trackers so we can\n", 245 | " # utilize it during skip frames\n", 246 | " trackers.append(tracker)\n", 247 | "\n", 248 | " # otherwise, we should utilize our object *trackers* rather than\n", 249 | " # object *detectors* to obtain a higher frame processing throughput\n", 250 | " else:\n", 251 | " # loop over the trackers\n", 252 | " for tracker in trackers:\n", 253 | " # set the status of our system to be 'tracking' rather\n", 254 | " # than 'waiting' or 'detecting'\n", 255 | " status = \"Tracking\"\n", 256 | "\n", 257 | " # update the tracker and grab the updated position\n", 258 | " tracker.update(rgb)\n", 259 | " pos = tracker.get_position()\n", 260 | "\n", 261 | " # unpack the position object\n", 262 | " startX = int(pos.left())\n", 263 | " startY = int(pos.top())\n", 264 | " endX = int(pos.right())\n", 265 | " endY = int(pos.bottom())\n", 266 | "\n", 267 | " # add the bounding box coordinates to the rectangles list\n", 268 | " rects.append((startX, startY, endX, endY))\n", 269 | "\n", 270 | " # draw a horizontal line in the center of the frame -- once an\n", 271 | " # object crosses this line we will determine whether they were\n", 272 | " # moving 'up' or 'down'\n", 273 | " cv2.line(frame, (0, H // 1), (W, H // 10), (0, 255, 255), 2)\n", 274 | "\n", 275 | " # use the centroid tracker to associate the (1) old object\n", 276 | " # centroids with (2) the newly computed object centroids\n", 277 | " objects = ct.update(rects)\n", 278 | "\n", 279 | " # loop over the tracked objects\n", 280 | " for (objectID, centroid) in objects.items():\n", 281 | " # check to see if a trackable object exists for the current\n", 282 | " # object ID\n", 283 | " to = trackableObjects.get(objectID, None)\n", 284 | "\n", 285 | " # if there is no existing trackable object, create one\n", 286 | " if to is None:\n", 287 | " to = TrackableObject(objectID, centroid)\n", 288 | "\n", 289 | " # otherwise, there is a trackable object so we can utilize it\n", 290 | " # to determine direction\n", 291 | " else:\n", 292 | " # the difference between the y-coordinate of the *current*\n", 293 | " # centroid and the mean of *previous* centroids will tell\n", 294 | " # us in which direction the object is moving (negative for\n", 295 | " # 'up' and positive for 'down')\n", 296 | " y = [c[1] for c in to.centroids]\n", 297 | " direction = centroid[1] - np.mean(y)\n", 298 | " to.centroids.append(centroid)\n", 299 | "\n", 300 | " # check to see if the object has been counted or not\n", 301 | " if not to.counted:\n", 302 | " # if the direction is negative (indicating the object\n", 303 | " # is moving up) AND the centroid is above the center\n", 304 | " # line, count the object\n", 305 | " if direction < 0 and centroid[1] < H // 2:\n", 306 | " totalSemua += 1\n", 307 | " to.counted = True\n", 308 | "\n", 309 | " # if the direction is positive (indicating the object\n", 310 | " # is moving down) AND the centroid is below the\n", 311 | " # center line, count the object\n", 312 | " elif direction > 0 and centroid[1] > H // 2:\n", 313 | " totalSemua += 1\n", 314 | " to.counted = True\n", 315 | "\n", 316 | " # store the trackable object in our dictionary\n", 317 | " trackableObjects[objectID] = to\n", 318 | "\n", 319 | " # draw both the ID of the object and the centroid of the\n", 320 | " # object on the output frame\n", 321 | " text = \"ID {}\".format(objectID)\n", 322 | " cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),\n", 323 | " cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)\n", 324 | " cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)\n", 325 | "\n", 326 | " # construct a tuple of information we will be displaying on the\n", 327 | " # frame\n", 328 | " \n", 329 | " info = [\n", 330 | " (\"Total Counting\", totalSemua),\n", 331 | " (\"Status\", status),\n", 332 | " ]\n", 333 | "\n", 334 | " # loop over the info tuples and draw them on our frame\n", 335 | " for (i, (k, v)) in enumerate(info):\n", 336 | " text = \"{}: {}\".format(k, v)\n", 337 | " cv2.putText(frame, text, (10, H - ((i * 20) + 20)),\n", 338 | " cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)\n", 339 | "\n", 340 | " # check to see if we should write the frame to disk\n", 341 | " if writer is not None:\n", 342 | " writer.write(frame)\n", 343 | "\n", 344 | " # show the output frame\n", 345 | " cv2.imshow(\"Frame\", frame)\n", 346 | " key = cv2.waitKey(1) & 0xFF\n", 347 | "\n", 348 | " # if the `q` key was pressed, break from the loop\n", 349 | " if key == ord(\"q\"):\n", 350 | " break\n", 351 | "\n", 352 | " # increment the total number of frames processed thus far and\n", 353 | " # then update the FPS counter\n", 354 | " totalFrames += 1\n", 355 | " fps.update()\n", 356 | " elapsed_time = time.time() - start_time\n", 357 | " \n", 358 | " # new dataframe with same columns\n", 359 | " #df = pd.DataFrame({'no_data': [totalFrames],\n", 360 | " # 'nama_CCTV': ['Sample'],\n", 361 | " # 'menit': [\"Jam\"],\n", 362 | " # 'jumlah_counting': [totalSemua]})\n", 363 | " #writerEx = pd.ExcelWriter('data output.xlsx', engine='openpyxl')\n", 364 | " # try to open an existing workbook\n", 365 | " #writerEx.book = load_workbook('data output.xlsx')\n", 366 | " # copy existing sheets\n", 367 | " #writerEx.sheets = dict((ws.title, ws) for ws in writerEx.book.worksheets)\n", 368 | " # read existing file\n", 369 | " #reader = pd.read_excel(r'data output.xlsx')\n", 370 | " # write out the new sheet\n", 371 | " #df.to_excel(writerEx,sheet_name='Data Jumlah Peserta CFD',index=False,header=False,startrow=len(reader)+1)\n", 372 | " #writerEx.close()\n", 373 | " #print (elapsed_time)\n", 374 | " \n", 375 | " \n", 376 | " if (totalFrames == 1 or totalFrames == 18000 or totalFrames == 36000 or totalFrames == 54000 or totalFrames == 72000 \n", 377 | " or totalFrames == 90000 or totalFrames == 108000 or totalFrames == 126000 or totalFrames == 144000 \n", 378 | " or totalFrames == 162000 or totalFrames == 180000 or totalFrames == 198000 or totalFrames == 216000 \n", 379 | " or totalFrames == 234000 or totalFrames == 252000 or totalFrames == 270000 or totalFrames == 288000 \n", 380 | " or totalFrames == 306000 or totalFrames == 324000) :\n", 381 | " #new dataframe with same columns\n", 382 | " df = pd.DataFrame({'NO': [totalFrames],\n", 383 | " 'CCTV': [nama_cctv],\n", 384 | " 'Menit': [elapsed_time],\n", 385 | " 'Jumlah Counting': [totalSemua]})\n", 386 | " writerEx = pd.ExcelWriter('data output.xlsx', engine='openpyxl')\n", 387 | " # try to open an existing workbook\n", 388 | " writerEx.book = load_workbook('data output.xlsx')\n", 389 | " # copy existing sheets\n", 390 | " writerEx.sheets = dict((ws.title, ws) for ws in writerEx.book.worksheets)\n", 391 | " # read existing file\n", 392 | " reader = pd.read_excel(r'data output.xlsx')\n", 393 | " # write out the new sheet\n", 394 | " df.to_excel(writerEx,sheet_name='Data Jumlah Peserta CFD',index=False,header=False,startrow=len(reader)+1)\n", 395 | " writerEx.close()\n", 396 | " print (elapsed_time)\n", 397 | " \n", 398 | "# stop the timer and display FPS information\n", 399 | "fps.stop()\n", 400 | "print(\"[INFO] elapsed time: {:.2f}\".format(fps.elapsed()))\n", 401 | "print(\"[INFO] approx. FPS: {:.2f}\".format(fps.fps()))\n", 402 | "\n", 403 | "# check to see if we need to release the video writer pointer\n", 404 | "if writer is not None:\n", 405 | " writer.release()\n", 406 | "\n", 407 | "# if we are not using a video file, stop the camera video stream\n", 408 | "if not (input_file, False):\n", 409 | " vs.stop()\n", 410 | "\n", 411 | "# otherwise, release the video file pointer\n", 412 | "else:\n", 413 | " vs.release()\n", 414 | "\n", 415 | "# close any open windows\n", 416 | "cv2.destroyAllWindows()" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 37, 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [ 425 | "import pandas as pd\n", 426 | "\n", 427 | "data = {\n", 428 | " \"Sales Month 1\": [10, 20],\n", 429 | " \"Sales Month 2\": [5, 35],\n", 430 | "}\n", 431 | "df = pd.DataFrame(data)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 40, 437 | "metadata": {}, 438 | "outputs": [], 439 | "source": [ 440 | "import pandas as pd\n", 441 | "\n", 442 | "# dataframe Name and Age columns\n", 443 | "df = pd.DataFrame({'Name': ['A', 'B', 'C', 'D'],\n", 444 | " 'Age': [10, 0, 30, 50]})\n", 445 | "\n", 446 | "# Create a Pandas Excel writer using XlsxWriter as the engine.\n", 447 | "writer = pd.ExcelWriter('demo.xlsx', engine='xlsxwriter')\n", 448 | "\n", 449 | "# Convert the dataframe to an XlsxWriter Excel object.\n", 450 | "df.to_excel(writer, sheet_name='Sheet1', index=False)\n", 451 | "\n", 452 | "# Close the Pandas Excel writer and output the Excel file.\n", 453 | "writer.save()" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 41, 459 | "metadata": {}, 460 | "outputs": [ 461 | { 462 | "name": "stdout", 463 | "output_type": "stream", 464 | "text": [ 465 | " Name Age\n", 466 | "0 A 10\n", 467 | "1 B 0\n", 468 | "2 C 30\n", 469 | "3 D 50\n" 470 | ] 471 | } 472 | ], 473 | "source": [ 474 | "import pandas as pd\n", 475 | "reader = pd.read_excel(r'demo.xlsx')\n", 476 | "print(reader)" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": 63, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [ 485 | "import pandas as pd\n", 486 | "from openpyxl import load_workbook\n", 487 | "\n", 488 | "df = pd.DataFrame({'no_data': [1],\n", 489 | " 'nama_CCTV': ['Gelora'],\n", 490 | " 'menit': [8.10],\n", 491 | " 'jumlah_counting': [100]})\n", 492 | "writerEx = pd.ExcelWriter('data output.xlsx', engine='openpyxl')\n", 493 | "# try to open an existing workbook\n", 494 | "writerEx.book = load_workbook('data output.xlsx')\n", 495 | "# copy existing sheets\n", 496 | "writerEx.sheets = dict((ws.title, ws) for ws in writerEx.book.worksheets)\n", 497 | "# read existing file\n", 498 | "reader = pd.read_excel(r'data output.xlsx')\n", 499 | "# write out the new sheet\n", 500 | "df.to_excel(writerEx,sheet_name='Data Jumlah Peserta CFD',index=False,header=False,startrow=len(reader)+1)\n", 501 | "writerEx.close()" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": null, 507 | "metadata": {}, 508 | "outputs": [], 509 | "source": [] 510 | } 511 | ], 512 | "metadata": { 513 | "kernelspec": { 514 | "display_name": "Python 3", 515 | "language": "python", 516 | "name": "python3" 517 | }, 518 | "language_info": { 519 | "codemirror_mode": { 520 | "name": "ipython", 521 | "version": 3 522 | }, 523 | "file_extension": ".py", 524 | "mimetype": "text/x-python", 525 | "name": "python", 526 | "nbconvert_exporter": "python", 527 | "pygments_lexer": "ipython3", 528 | "version": "3.8.6" 529 | } 530 | }, 531 | "nbformat": 4, 532 | "nbformat_minor": 4 533 | } 534 | -------------------------------------------------------------------------------- /mobilenet_ssd/MobileNetSSD_deploy.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fulankun1412/People-Counting-OpenCV-Object-Detection/e5949294f1975141a815b6ca2228d025b118a430/mobilenet_ssd/MobileNetSSD_deploy.caffemodel -------------------------------------------------------------------------------- /mobilenet_ssd/MobileNetSSD_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "MobileNet-SSD" 2 | input: "data" 3 | input_shape { 4 | dim: 1 5 | dim: 3 6 | dim: 300 7 | dim: 300 8 | } 9 | layer { 10 | name: "conv0" 11 | type: "Convolution" 12 | bottom: "data" 13 | top: "conv0" 14 | param { 15 | lr_mult: 1.0 16 | decay_mult: 1.0 17 | } 18 | param { 19 | lr_mult: 2.0 20 | decay_mult: 0.0 21 | } 22 | convolution_param { 23 | num_output: 32 24 | pad: 1 25 | kernel_size: 3 26 | stride: 2 27 | weight_filler { 28 | type: "msra" 29 | } 30 | bias_filler { 31 | type: "constant" 32 | value: 0.0 33 | } 34 | } 35 | } 36 | layer { 37 | name: "conv0/relu" 38 | type: "ReLU" 39 | bottom: "conv0" 40 | top: "conv0" 41 | } 42 | layer { 43 | name: "conv1/dw" 44 | type: "Convolution" 45 | bottom: "conv0" 46 | top: "conv1/dw" 47 | param { 48 | lr_mult: 1.0 49 | decay_mult: 1.0 50 | } 51 | param { 52 | lr_mult: 2.0 53 | decay_mult: 0.0 54 | } 55 | convolution_param { 56 | num_output: 32 57 | pad: 1 58 | kernel_size: 3 59 | group: 32 60 | engine: CAFFE 61 | weight_filler { 62 | type: "msra" 63 | } 64 | bias_filler { 65 | type: "constant" 66 | value: 0.0 67 | } 68 | } 69 | } 70 | layer { 71 | name: "conv1/dw/relu" 72 | type: "ReLU" 73 | bottom: "conv1/dw" 74 | top: "conv1/dw" 75 | } 76 | layer { 77 | name: "conv1" 78 | type: "Convolution" 79 | bottom: "conv1/dw" 80 | top: "conv1" 81 | param { 82 | lr_mult: 1.0 83 | decay_mult: 1.0 84 | } 85 | param { 86 | lr_mult: 2.0 87 | decay_mult: 0.0 88 | } 89 | convolution_param { 90 | num_output: 64 91 | kernel_size: 1 92 | weight_filler { 93 | type: "msra" 94 | } 95 | bias_filler { 96 | type: "constant" 97 | value: 0.0 98 | } 99 | } 100 | } 101 | layer { 102 | name: "conv1/relu" 103 | type: "ReLU" 104 | bottom: "conv1" 105 | top: "conv1" 106 | } 107 | layer { 108 | name: "conv2/dw" 109 | type: "Convolution" 110 | bottom: "conv1" 111 | top: "conv2/dw" 112 | param { 113 | lr_mult: 1.0 114 | decay_mult: 1.0 115 | } 116 | param { 117 | lr_mult: 2.0 118 | decay_mult: 0.0 119 | } 120 | convolution_param { 121 | num_output: 64 122 | pad: 1 123 | kernel_size: 3 124 | stride: 2 125 | group: 64 126 | engine: CAFFE 127 | weight_filler { 128 | type: "msra" 129 | } 130 | bias_filler { 131 | type: "constant" 132 | value: 0.0 133 | } 134 | } 135 | } 136 | layer { 137 | name: "conv2/dw/relu" 138 | type: "ReLU" 139 | bottom: "conv2/dw" 140 | top: "conv2/dw" 141 | } 142 | layer { 143 | name: "conv2" 144 | type: "Convolution" 145 | bottom: "conv2/dw" 146 | top: "conv2" 147 | param { 148 | lr_mult: 1.0 149 | decay_mult: 1.0 150 | } 151 | param { 152 | lr_mult: 2.0 153 | decay_mult: 0.0 154 | } 155 | convolution_param { 156 | num_output: 128 157 | kernel_size: 1 158 | weight_filler { 159 | type: "msra" 160 | } 161 | bias_filler { 162 | type: "constant" 163 | value: 0.0 164 | } 165 | } 166 | } 167 | layer { 168 | name: "conv2/relu" 169 | type: "ReLU" 170 | bottom: "conv2" 171 | top: "conv2" 172 | } 173 | layer { 174 | name: "conv3/dw" 175 | type: "Convolution" 176 | bottom: "conv2" 177 | top: "conv3/dw" 178 | param { 179 | lr_mult: 1.0 180 | decay_mult: 1.0 181 | } 182 | param { 183 | lr_mult: 2.0 184 | decay_mult: 0.0 185 | } 186 | convolution_param { 187 | num_output: 128 188 | pad: 1 189 | kernel_size: 3 190 | group: 128 191 | engine: CAFFE 192 | weight_filler { 193 | type: "msra" 194 | } 195 | bias_filler { 196 | type: "constant" 197 | value: 0.0 198 | } 199 | } 200 | } 201 | layer { 202 | name: "conv3/dw/relu" 203 | type: "ReLU" 204 | bottom: "conv3/dw" 205 | top: "conv3/dw" 206 | } 207 | layer { 208 | name: "conv3" 209 | type: "Convolution" 210 | bottom: "conv3/dw" 211 | top: "conv3" 212 | param { 213 | lr_mult: 1.0 214 | decay_mult: 1.0 215 | } 216 | param { 217 | lr_mult: 2.0 218 | decay_mult: 0.0 219 | } 220 | convolution_param { 221 | num_output: 128 222 | kernel_size: 1 223 | weight_filler { 224 | type: "msra" 225 | } 226 | bias_filler { 227 | type: "constant" 228 | value: 0.0 229 | } 230 | } 231 | } 232 | layer { 233 | name: "conv3/relu" 234 | type: "ReLU" 235 | bottom: "conv3" 236 | top: "conv3" 237 | } 238 | layer { 239 | name: "conv4/dw" 240 | type: "Convolution" 241 | bottom: "conv3" 242 | top: "conv4/dw" 243 | param { 244 | lr_mult: 1.0 245 | decay_mult: 1.0 246 | } 247 | param { 248 | lr_mult: 2.0 249 | decay_mult: 0.0 250 | } 251 | convolution_param { 252 | num_output: 128 253 | pad: 1 254 | kernel_size: 3 255 | stride: 2 256 | group: 128 257 | engine: CAFFE 258 | weight_filler { 259 | type: "msra" 260 | } 261 | bias_filler { 262 | type: "constant" 263 | value: 0.0 264 | } 265 | } 266 | } 267 | layer { 268 | name: "conv4/dw/relu" 269 | type: "ReLU" 270 | bottom: "conv4/dw" 271 | top: "conv4/dw" 272 | } 273 | layer { 274 | name: "conv4" 275 | type: "Convolution" 276 | bottom: "conv4/dw" 277 | top: "conv4" 278 | param { 279 | lr_mult: 1.0 280 | decay_mult: 1.0 281 | } 282 | param { 283 | lr_mult: 2.0 284 | decay_mult: 0.0 285 | } 286 | convolution_param { 287 | num_output: 256 288 | kernel_size: 1 289 | weight_filler { 290 | type: "msra" 291 | } 292 | bias_filler { 293 | type: "constant" 294 | value: 0.0 295 | } 296 | } 297 | } 298 | layer { 299 | name: "conv4/relu" 300 | type: "ReLU" 301 | bottom: "conv4" 302 | top: "conv4" 303 | } 304 | layer { 305 | name: "conv5/dw" 306 | type: "Convolution" 307 | bottom: "conv4" 308 | top: "conv5/dw" 309 | param { 310 | lr_mult: 1.0 311 | decay_mult: 1.0 312 | } 313 | param { 314 | lr_mult: 2.0 315 | decay_mult: 0.0 316 | } 317 | convolution_param { 318 | num_output: 256 319 | pad: 1 320 | kernel_size: 3 321 | group: 256 322 | engine: CAFFE 323 | weight_filler { 324 | type: "msra" 325 | } 326 | bias_filler { 327 | type: "constant" 328 | value: 0.0 329 | } 330 | } 331 | } 332 | layer { 333 | name: "conv5/dw/relu" 334 | type: "ReLU" 335 | bottom: "conv5/dw" 336 | top: "conv5/dw" 337 | } 338 | layer { 339 | name: "conv5" 340 | type: "Convolution" 341 | bottom: "conv5/dw" 342 | top: "conv5" 343 | param { 344 | lr_mult: 1.0 345 | decay_mult: 1.0 346 | } 347 | param { 348 | lr_mult: 2.0 349 | decay_mult: 0.0 350 | } 351 | convolution_param { 352 | num_output: 256 353 | kernel_size: 1 354 | weight_filler { 355 | type: "msra" 356 | } 357 | bias_filler { 358 | type: "constant" 359 | value: 0.0 360 | } 361 | } 362 | } 363 | layer { 364 | name: "conv5/relu" 365 | type: "ReLU" 366 | bottom: "conv5" 367 | top: "conv5" 368 | } 369 | layer { 370 | name: "conv6/dw" 371 | type: "Convolution" 372 | bottom: "conv5" 373 | top: "conv6/dw" 374 | param { 375 | lr_mult: 1.0 376 | decay_mult: 1.0 377 | } 378 | param { 379 | lr_mult: 2.0 380 | decay_mult: 0.0 381 | } 382 | convolution_param { 383 | num_output: 256 384 | pad: 1 385 | kernel_size: 3 386 | stride: 2 387 | group: 256 388 | engine: CAFFE 389 | weight_filler { 390 | type: "msra" 391 | } 392 | bias_filler { 393 | type: "constant" 394 | value: 0.0 395 | } 396 | } 397 | } 398 | layer { 399 | name: "conv6/dw/relu" 400 | type: "ReLU" 401 | bottom: "conv6/dw" 402 | top: "conv6/dw" 403 | } 404 | layer { 405 | name: "conv6" 406 | type: "Convolution" 407 | bottom: "conv6/dw" 408 | top: "conv6" 409 | param { 410 | lr_mult: 1.0 411 | decay_mult: 1.0 412 | } 413 | param { 414 | lr_mult: 2.0 415 | decay_mult: 0.0 416 | } 417 | convolution_param { 418 | num_output: 512 419 | kernel_size: 1 420 | weight_filler { 421 | type: "msra" 422 | } 423 | bias_filler { 424 | type: "constant" 425 | value: 0.0 426 | } 427 | } 428 | } 429 | layer { 430 | name: "conv6/relu" 431 | type: "ReLU" 432 | bottom: "conv6" 433 | top: "conv6" 434 | } 435 | layer { 436 | name: "conv7/dw" 437 | type: "Convolution" 438 | bottom: "conv6" 439 | top: "conv7/dw" 440 | param { 441 | lr_mult: 1.0 442 | decay_mult: 1.0 443 | } 444 | param { 445 | lr_mult: 2.0 446 | decay_mult: 0.0 447 | } 448 | convolution_param { 449 | num_output: 512 450 | pad: 1 451 | kernel_size: 3 452 | group: 512 453 | engine: CAFFE 454 | weight_filler { 455 | type: "msra" 456 | } 457 | bias_filler { 458 | type: "constant" 459 | value: 0.0 460 | } 461 | } 462 | } 463 | layer { 464 | name: "conv7/dw/relu" 465 | type: "ReLU" 466 | bottom: "conv7/dw" 467 | top: "conv7/dw" 468 | } 469 | layer { 470 | name: "conv7" 471 | type: "Convolution" 472 | bottom: "conv7/dw" 473 | top: "conv7" 474 | param { 475 | lr_mult: 1.0 476 | decay_mult: 1.0 477 | } 478 | param { 479 | lr_mult: 2.0 480 | decay_mult: 0.0 481 | } 482 | convolution_param { 483 | num_output: 512 484 | kernel_size: 1 485 | weight_filler { 486 | type: "msra" 487 | } 488 | bias_filler { 489 | type: "constant" 490 | value: 0.0 491 | } 492 | } 493 | } 494 | layer { 495 | name: "conv7/relu" 496 | type: "ReLU" 497 | bottom: "conv7" 498 | top: "conv7" 499 | } 500 | layer { 501 | name: "conv8/dw" 502 | type: "Convolution" 503 | bottom: "conv7" 504 | top: "conv8/dw" 505 | param { 506 | lr_mult: 1.0 507 | decay_mult: 1.0 508 | } 509 | param { 510 | lr_mult: 2.0 511 | decay_mult: 0.0 512 | } 513 | convolution_param { 514 | num_output: 512 515 | pad: 1 516 | kernel_size: 3 517 | group: 512 518 | engine: CAFFE 519 | weight_filler { 520 | type: "msra" 521 | } 522 | bias_filler { 523 | type: "constant" 524 | value: 0.0 525 | } 526 | } 527 | } 528 | layer { 529 | name: "conv8/dw/relu" 530 | type: "ReLU" 531 | bottom: "conv8/dw" 532 | top: "conv8/dw" 533 | } 534 | layer { 535 | name: "conv8" 536 | type: "Convolution" 537 | bottom: "conv8/dw" 538 | top: "conv8" 539 | param { 540 | lr_mult: 1.0 541 | decay_mult: 1.0 542 | } 543 | param { 544 | lr_mult: 2.0 545 | decay_mult: 0.0 546 | } 547 | convolution_param { 548 | num_output: 512 549 | kernel_size: 1 550 | weight_filler { 551 | type: "msra" 552 | } 553 | bias_filler { 554 | type: "constant" 555 | value: 0.0 556 | } 557 | } 558 | } 559 | layer { 560 | name: "conv8/relu" 561 | type: "ReLU" 562 | bottom: "conv8" 563 | top: "conv8" 564 | } 565 | layer { 566 | name: "conv9/dw" 567 | type: "Convolution" 568 | bottom: "conv8" 569 | top: "conv9/dw" 570 | param { 571 | lr_mult: 1.0 572 | decay_mult: 1.0 573 | } 574 | param { 575 | lr_mult: 2.0 576 | decay_mult: 0.0 577 | } 578 | convolution_param { 579 | num_output: 512 580 | pad: 1 581 | kernel_size: 3 582 | group: 512 583 | engine: CAFFE 584 | weight_filler { 585 | type: "msra" 586 | } 587 | bias_filler { 588 | type: "constant" 589 | value: 0.0 590 | } 591 | } 592 | } 593 | layer { 594 | name: "conv9/dw/relu" 595 | type: "ReLU" 596 | bottom: "conv9/dw" 597 | top: "conv9/dw" 598 | } 599 | layer { 600 | name: "conv9" 601 | type: "Convolution" 602 | bottom: "conv9/dw" 603 | top: "conv9" 604 | param { 605 | lr_mult: 1.0 606 | decay_mult: 1.0 607 | } 608 | param { 609 | lr_mult: 2.0 610 | decay_mult: 0.0 611 | } 612 | convolution_param { 613 | num_output: 512 614 | kernel_size: 1 615 | weight_filler { 616 | type: "msra" 617 | } 618 | bias_filler { 619 | type: "constant" 620 | value: 0.0 621 | } 622 | } 623 | } 624 | layer { 625 | name: "conv9/relu" 626 | type: "ReLU" 627 | bottom: "conv9" 628 | top: "conv9" 629 | } 630 | layer { 631 | name: "conv10/dw" 632 | type: "Convolution" 633 | bottom: "conv9" 634 | top: "conv10/dw" 635 | param { 636 | lr_mult: 1.0 637 | decay_mult: 1.0 638 | } 639 | param { 640 | lr_mult: 2.0 641 | decay_mult: 0.0 642 | } 643 | convolution_param { 644 | num_output: 512 645 | pad: 1 646 | kernel_size: 3 647 | group: 512 648 | engine: CAFFE 649 | weight_filler { 650 | type: "msra" 651 | } 652 | bias_filler { 653 | type: "constant" 654 | value: 0.0 655 | } 656 | } 657 | } 658 | layer { 659 | name: "conv10/dw/relu" 660 | type: "ReLU" 661 | bottom: "conv10/dw" 662 | top: "conv10/dw" 663 | } 664 | layer { 665 | name: "conv10" 666 | type: "Convolution" 667 | bottom: "conv10/dw" 668 | top: "conv10" 669 | param { 670 | lr_mult: 1.0 671 | decay_mult: 1.0 672 | } 673 | param { 674 | lr_mult: 2.0 675 | decay_mult: 0.0 676 | } 677 | convolution_param { 678 | num_output: 512 679 | kernel_size: 1 680 | weight_filler { 681 | type: "msra" 682 | } 683 | bias_filler { 684 | type: "constant" 685 | value: 0.0 686 | } 687 | } 688 | } 689 | layer { 690 | name: "conv10/relu" 691 | type: "ReLU" 692 | bottom: "conv10" 693 | top: "conv10" 694 | } 695 | layer { 696 | name: "conv11/dw" 697 | type: "Convolution" 698 | bottom: "conv10" 699 | top: "conv11/dw" 700 | param { 701 | lr_mult: 1.0 702 | decay_mult: 1.0 703 | } 704 | param { 705 | lr_mult: 2.0 706 | decay_mult: 0.0 707 | } 708 | convolution_param { 709 | num_output: 512 710 | pad: 1 711 | kernel_size: 3 712 | group: 512 713 | engine: CAFFE 714 | weight_filler { 715 | type: "msra" 716 | } 717 | bias_filler { 718 | type: "constant" 719 | value: 0.0 720 | } 721 | } 722 | } 723 | layer { 724 | name: "conv11/dw/relu" 725 | type: "ReLU" 726 | bottom: "conv11/dw" 727 | top: "conv11/dw" 728 | } 729 | layer { 730 | name: "conv11" 731 | type: "Convolution" 732 | bottom: "conv11/dw" 733 | top: "conv11" 734 | param { 735 | lr_mult: 1.0 736 | decay_mult: 1.0 737 | } 738 | param { 739 | lr_mult: 2.0 740 | decay_mult: 0.0 741 | } 742 | convolution_param { 743 | num_output: 512 744 | kernel_size: 1 745 | weight_filler { 746 | type: "msra" 747 | } 748 | bias_filler { 749 | type: "constant" 750 | value: 0.0 751 | } 752 | } 753 | } 754 | layer { 755 | name: "conv11/relu" 756 | type: "ReLU" 757 | bottom: "conv11" 758 | top: "conv11" 759 | } 760 | layer { 761 | name: "conv12/dw" 762 | type: "Convolution" 763 | bottom: "conv11" 764 | top: "conv12/dw" 765 | param { 766 | lr_mult: 1.0 767 | decay_mult: 1.0 768 | } 769 | param { 770 | lr_mult: 2.0 771 | decay_mult: 0.0 772 | } 773 | convolution_param { 774 | num_output: 512 775 | pad: 1 776 | kernel_size: 3 777 | stride: 2 778 | group: 512 779 | engine: CAFFE 780 | weight_filler { 781 | type: "msra" 782 | } 783 | bias_filler { 784 | type: "constant" 785 | value: 0.0 786 | } 787 | } 788 | } 789 | layer { 790 | name: "conv12/dw/relu" 791 | type: "ReLU" 792 | bottom: "conv12/dw" 793 | top: "conv12/dw" 794 | } 795 | layer { 796 | name: "conv12" 797 | type: "Convolution" 798 | bottom: "conv12/dw" 799 | top: "conv12" 800 | param { 801 | lr_mult: 1.0 802 | decay_mult: 1.0 803 | } 804 | param { 805 | lr_mult: 2.0 806 | decay_mult: 0.0 807 | } 808 | convolution_param { 809 | num_output: 1024 810 | kernel_size: 1 811 | weight_filler { 812 | type: "msra" 813 | } 814 | bias_filler { 815 | type: "constant" 816 | value: 0.0 817 | } 818 | } 819 | } 820 | layer { 821 | name: "conv12/relu" 822 | type: "ReLU" 823 | bottom: "conv12" 824 | top: "conv12" 825 | } 826 | layer { 827 | name: "conv13/dw" 828 | type: "Convolution" 829 | bottom: "conv12" 830 | top: "conv13/dw" 831 | param { 832 | lr_mult: 1.0 833 | decay_mult: 1.0 834 | } 835 | param { 836 | lr_mult: 2.0 837 | decay_mult: 0.0 838 | } 839 | convolution_param { 840 | num_output: 1024 841 | pad: 1 842 | kernel_size: 3 843 | group: 1024 844 | engine: CAFFE 845 | weight_filler { 846 | type: "msra" 847 | } 848 | bias_filler { 849 | type: "constant" 850 | value: 0.0 851 | } 852 | } 853 | } 854 | layer { 855 | name: "conv13/dw/relu" 856 | type: "ReLU" 857 | bottom: "conv13/dw" 858 | top: "conv13/dw" 859 | } 860 | layer { 861 | name: "conv13" 862 | type: "Convolution" 863 | bottom: "conv13/dw" 864 | top: "conv13" 865 | param { 866 | lr_mult: 1.0 867 | decay_mult: 1.0 868 | } 869 | param { 870 | lr_mult: 2.0 871 | decay_mult: 0.0 872 | } 873 | convolution_param { 874 | num_output: 1024 875 | kernel_size: 1 876 | weight_filler { 877 | type: "msra" 878 | } 879 | bias_filler { 880 | type: "constant" 881 | value: 0.0 882 | } 883 | } 884 | } 885 | layer { 886 | name: "conv13/relu" 887 | type: "ReLU" 888 | bottom: "conv13" 889 | top: "conv13" 890 | } 891 | layer { 892 | name: "conv14_1" 893 | type: "Convolution" 894 | bottom: "conv13" 895 | top: "conv14_1" 896 | param { 897 | lr_mult: 1.0 898 | decay_mult: 1.0 899 | } 900 | param { 901 | lr_mult: 2.0 902 | decay_mult: 0.0 903 | } 904 | convolution_param { 905 | num_output: 256 906 | kernel_size: 1 907 | weight_filler { 908 | type: "msra" 909 | } 910 | bias_filler { 911 | type: "constant" 912 | value: 0.0 913 | } 914 | } 915 | } 916 | layer { 917 | name: "conv14_1/relu" 918 | type: "ReLU" 919 | bottom: "conv14_1" 920 | top: "conv14_1" 921 | } 922 | layer { 923 | name: "conv14_2" 924 | type: "Convolution" 925 | bottom: "conv14_1" 926 | top: "conv14_2" 927 | param { 928 | lr_mult: 1.0 929 | decay_mult: 1.0 930 | } 931 | param { 932 | lr_mult: 2.0 933 | decay_mult: 0.0 934 | } 935 | convolution_param { 936 | num_output: 512 937 | pad: 1 938 | kernel_size: 3 939 | stride: 2 940 | weight_filler { 941 | type: "msra" 942 | } 943 | bias_filler { 944 | type: "constant" 945 | value: 0.0 946 | } 947 | } 948 | } 949 | layer { 950 | name: "conv14_2/relu" 951 | type: "ReLU" 952 | bottom: "conv14_2" 953 | top: "conv14_2" 954 | } 955 | layer { 956 | name: "conv15_1" 957 | type: "Convolution" 958 | bottom: "conv14_2" 959 | top: "conv15_1" 960 | param { 961 | lr_mult: 1.0 962 | decay_mult: 1.0 963 | } 964 | param { 965 | lr_mult: 2.0 966 | decay_mult: 0.0 967 | } 968 | convolution_param { 969 | num_output: 128 970 | kernel_size: 1 971 | weight_filler { 972 | type: "msra" 973 | } 974 | bias_filler { 975 | type: "constant" 976 | value: 0.0 977 | } 978 | } 979 | } 980 | layer { 981 | name: "conv15_1/relu" 982 | type: "ReLU" 983 | bottom: "conv15_1" 984 | top: "conv15_1" 985 | } 986 | layer { 987 | name: "conv15_2" 988 | type: "Convolution" 989 | bottom: "conv15_1" 990 | top: "conv15_2" 991 | param { 992 | lr_mult: 1.0 993 | decay_mult: 1.0 994 | } 995 | param { 996 | lr_mult: 2.0 997 | decay_mult: 0.0 998 | } 999 | convolution_param { 1000 | num_output: 256 1001 | pad: 1 1002 | kernel_size: 3 1003 | stride: 2 1004 | weight_filler { 1005 | type: "msra" 1006 | } 1007 | bias_filler { 1008 | type: "constant" 1009 | value: 0.0 1010 | } 1011 | } 1012 | } 1013 | layer { 1014 | name: "conv15_2/relu" 1015 | type: "ReLU" 1016 | bottom: "conv15_2" 1017 | top: "conv15_2" 1018 | } 1019 | layer { 1020 | name: "conv16_1" 1021 | type: "Convolution" 1022 | bottom: "conv15_2" 1023 | top: "conv16_1" 1024 | param { 1025 | lr_mult: 1.0 1026 | decay_mult: 1.0 1027 | } 1028 | param { 1029 | lr_mult: 2.0 1030 | decay_mult: 0.0 1031 | } 1032 | convolution_param { 1033 | num_output: 128 1034 | kernel_size: 1 1035 | weight_filler { 1036 | type: "msra" 1037 | } 1038 | bias_filler { 1039 | type: "constant" 1040 | value: 0.0 1041 | } 1042 | } 1043 | } 1044 | layer { 1045 | name: "conv16_1/relu" 1046 | type: "ReLU" 1047 | bottom: "conv16_1" 1048 | top: "conv16_1" 1049 | } 1050 | layer { 1051 | name: "conv16_2" 1052 | type: "Convolution" 1053 | bottom: "conv16_1" 1054 | top: "conv16_2" 1055 | param { 1056 | lr_mult: 1.0 1057 | decay_mult: 1.0 1058 | } 1059 | param { 1060 | lr_mult: 2.0 1061 | decay_mult: 0.0 1062 | } 1063 | convolution_param { 1064 | num_output: 256 1065 | pad: 1 1066 | kernel_size: 3 1067 | stride: 2 1068 | weight_filler { 1069 | type: "msra" 1070 | } 1071 | bias_filler { 1072 | type: "constant" 1073 | value: 0.0 1074 | } 1075 | } 1076 | } 1077 | layer { 1078 | name: "conv16_2/relu" 1079 | type: "ReLU" 1080 | bottom: "conv16_2" 1081 | top: "conv16_2" 1082 | } 1083 | layer { 1084 | name: "conv17_1" 1085 | type: "Convolution" 1086 | bottom: "conv16_2" 1087 | top: "conv17_1" 1088 | param { 1089 | lr_mult: 1.0 1090 | decay_mult: 1.0 1091 | } 1092 | param { 1093 | lr_mult: 2.0 1094 | decay_mult: 0.0 1095 | } 1096 | convolution_param { 1097 | num_output: 64 1098 | kernel_size: 1 1099 | weight_filler { 1100 | type: "msra" 1101 | } 1102 | bias_filler { 1103 | type: "constant" 1104 | value: 0.0 1105 | } 1106 | } 1107 | } 1108 | layer { 1109 | name: "conv17_1/relu" 1110 | type: "ReLU" 1111 | bottom: "conv17_1" 1112 | top: "conv17_1" 1113 | } 1114 | layer { 1115 | name: "conv17_2" 1116 | type: "Convolution" 1117 | bottom: "conv17_1" 1118 | top: "conv17_2" 1119 | param { 1120 | lr_mult: 1.0 1121 | decay_mult: 1.0 1122 | } 1123 | param { 1124 | lr_mult: 2.0 1125 | decay_mult: 0.0 1126 | } 1127 | convolution_param { 1128 | num_output: 128 1129 | pad: 1 1130 | kernel_size: 3 1131 | stride: 2 1132 | weight_filler { 1133 | type: "msra" 1134 | } 1135 | bias_filler { 1136 | type: "constant" 1137 | value: 0.0 1138 | } 1139 | } 1140 | } 1141 | layer { 1142 | name: "conv17_2/relu" 1143 | type: "ReLU" 1144 | bottom: "conv17_2" 1145 | top: "conv17_2" 1146 | } 1147 | layer { 1148 | name: "conv11_mbox_loc" 1149 | type: "Convolution" 1150 | bottom: "conv11" 1151 | top: "conv11_mbox_loc" 1152 | param { 1153 | lr_mult: 1.0 1154 | decay_mult: 1.0 1155 | } 1156 | param { 1157 | lr_mult: 2.0 1158 | decay_mult: 0.0 1159 | } 1160 | convolution_param { 1161 | num_output: 12 1162 | kernel_size: 1 1163 | weight_filler { 1164 | type: "msra" 1165 | } 1166 | bias_filler { 1167 | type: "constant" 1168 | value: 0.0 1169 | } 1170 | } 1171 | } 1172 | layer { 1173 | name: "conv11_mbox_loc_perm" 1174 | type: "Permute" 1175 | bottom: "conv11_mbox_loc" 1176 | top: "conv11_mbox_loc_perm" 1177 | permute_param { 1178 | order: 0 1179 | order: 2 1180 | order: 3 1181 | order: 1 1182 | } 1183 | } 1184 | layer { 1185 | name: "conv11_mbox_loc_flat" 1186 | type: "Flatten" 1187 | bottom: "conv11_mbox_loc_perm" 1188 | top: "conv11_mbox_loc_flat" 1189 | flatten_param { 1190 | axis: 1 1191 | } 1192 | } 1193 | layer { 1194 | name: "conv11_mbox_conf" 1195 | type: "Convolution" 1196 | bottom: "conv11" 1197 | top: "conv11_mbox_conf" 1198 | param { 1199 | lr_mult: 1.0 1200 | decay_mult: 1.0 1201 | } 1202 | param { 1203 | lr_mult: 2.0 1204 | decay_mult: 0.0 1205 | } 1206 | convolution_param { 1207 | num_output: 63 1208 | kernel_size: 1 1209 | weight_filler { 1210 | type: "msra" 1211 | } 1212 | bias_filler { 1213 | type: "constant" 1214 | value: 0.0 1215 | } 1216 | } 1217 | } 1218 | layer { 1219 | name: "conv11_mbox_conf_perm" 1220 | type: "Permute" 1221 | bottom: "conv11_mbox_conf" 1222 | top: "conv11_mbox_conf_perm" 1223 | permute_param { 1224 | order: 0 1225 | order: 2 1226 | order: 3 1227 | order: 1 1228 | } 1229 | } 1230 | layer { 1231 | name: "conv11_mbox_conf_flat" 1232 | type: "Flatten" 1233 | bottom: "conv11_mbox_conf_perm" 1234 | top: "conv11_mbox_conf_flat" 1235 | flatten_param { 1236 | axis: 1 1237 | } 1238 | } 1239 | layer { 1240 | name: "conv11_mbox_priorbox" 1241 | type: "PriorBox" 1242 | bottom: "conv11" 1243 | bottom: "data" 1244 | top: "conv11_mbox_priorbox" 1245 | prior_box_param { 1246 | min_size: 60.0 1247 | aspect_ratio: 2.0 1248 | flip: true 1249 | clip: false 1250 | variance: 0.1 1251 | variance: 0.1 1252 | variance: 0.2 1253 | variance: 0.2 1254 | offset: 0.5 1255 | } 1256 | } 1257 | layer { 1258 | name: "conv13_mbox_loc" 1259 | type: "Convolution" 1260 | bottom: "conv13" 1261 | top: "conv13_mbox_loc" 1262 | param { 1263 | lr_mult: 1.0 1264 | decay_mult: 1.0 1265 | } 1266 | param { 1267 | lr_mult: 2.0 1268 | decay_mult: 0.0 1269 | } 1270 | convolution_param { 1271 | num_output: 24 1272 | kernel_size: 1 1273 | weight_filler { 1274 | type: "msra" 1275 | } 1276 | bias_filler { 1277 | type: "constant" 1278 | value: 0.0 1279 | } 1280 | } 1281 | } 1282 | layer { 1283 | name: "conv13_mbox_loc_perm" 1284 | type: "Permute" 1285 | bottom: "conv13_mbox_loc" 1286 | top: "conv13_mbox_loc_perm" 1287 | permute_param { 1288 | order: 0 1289 | order: 2 1290 | order: 3 1291 | order: 1 1292 | } 1293 | } 1294 | layer { 1295 | name: "conv13_mbox_loc_flat" 1296 | type: "Flatten" 1297 | bottom: "conv13_mbox_loc_perm" 1298 | top: "conv13_mbox_loc_flat" 1299 | flatten_param { 1300 | axis: 1 1301 | } 1302 | } 1303 | layer { 1304 | name: "conv13_mbox_conf" 1305 | type: "Convolution" 1306 | bottom: "conv13" 1307 | top: "conv13_mbox_conf" 1308 | param { 1309 | lr_mult: 1.0 1310 | decay_mult: 1.0 1311 | } 1312 | param { 1313 | lr_mult: 2.0 1314 | decay_mult: 0.0 1315 | } 1316 | convolution_param { 1317 | num_output: 126 1318 | kernel_size: 1 1319 | weight_filler { 1320 | type: "msra" 1321 | } 1322 | bias_filler { 1323 | type: "constant" 1324 | value: 0.0 1325 | } 1326 | } 1327 | } 1328 | layer { 1329 | name: "conv13_mbox_conf_perm" 1330 | type: "Permute" 1331 | bottom: "conv13_mbox_conf" 1332 | top: "conv13_mbox_conf_perm" 1333 | permute_param { 1334 | order: 0 1335 | order: 2 1336 | order: 3 1337 | order: 1 1338 | } 1339 | } 1340 | layer { 1341 | name: "conv13_mbox_conf_flat" 1342 | type: "Flatten" 1343 | bottom: "conv13_mbox_conf_perm" 1344 | top: "conv13_mbox_conf_flat" 1345 | flatten_param { 1346 | axis: 1 1347 | } 1348 | } 1349 | layer { 1350 | name: "conv13_mbox_priorbox" 1351 | type: "PriorBox" 1352 | bottom: "conv13" 1353 | bottom: "data" 1354 | top: "conv13_mbox_priorbox" 1355 | prior_box_param { 1356 | min_size: 105.0 1357 | max_size: 150.0 1358 | aspect_ratio: 2.0 1359 | aspect_ratio: 3.0 1360 | flip: true 1361 | clip: false 1362 | variance: 0.1 1363 | variance: 0.1 1364 | variance: 0.2 1365 | variance: 0.2 1366 | offset: 0.5 1367 | } 1368 | } 1369 | layer { 1370 | name: "conv14_2_mbox_loc" 1371 | type: "Convolution" 1372 | bottom: "conv14_2" 1373 | top: "conv14_2_mbox_loc" 1374 | param { 1375 | lr_mult: 1.0 1376 | decay_mult: 1.0 1377 | } 1378 | param { 1379 | lr_mult: 2.0 1380 | decay_mult: 0.0 1381 | } 1382 | convolution_param { 1383 | num_output: 24 1384 | kernel_size: 1 1385 | weight_filler { 1386 | type: "msra" 1387 | } 1388 | bias_filler { 1389 | type: "constant" 1390 | value: 0.0 1391 | } 1392 | } 1393 | } 1394 | layer { 1395 | name: "conv14_2_mbox_loc_perm" 1396 | type: "Permute" 1397 | bottom: "conv14_2_mbox_loc" 1398 | top: "conv14_2_mbox_loc_perm" 1399 | permute_param { 1400 | order: 0 1401 | order: 2 1402 | order: 3 1403 | order: 1 1404 | } 1405 | } 1406 | layer { 1407 | name: "conv14_2_mbox_loc_flat" 1408 | type: "Flatten" 1409 | bottom: "conv14_2_mbox_loc_perm" 1410 | top: "conv14_2_mbox_loc_flat" 1411 | flatten_param { 1412 | axis: 1 1413 | } 1414 | } 1415 | layer { 1416 | name: "conv14_2_mbox_conf" 1417 | type: "Convolution" 1418 | bottom: "conv14_2" 1419 | top: "conv14_2_mbox_conf" 1420 | param { 1421 | lr_mult: 1.0 1422 | decay_mult: 1.0 1423 | } 1424 | param { 1425 | lr_mult: 2.0 1426 | decay_mult: 0.0 1427 | } 1428 | convolution_param { 1429 | num_output: 126 1430 | kernel_size: 1 1431 | weight_filler { 1432 | type: "msra" 1433 | } 1434 | bias_filler { 1435 | type: "constant" 1436 | value: 0.0 1437 | } 1438 | } 1439 | } 1440 | layer { 1441 | name: "conv14_2_mbox_conf_perm" 1442 | type: "Permute" 1443 | bottom: "conv14_2_mbox_conf" 1444 | top: "conv14_2_mbox_conf_perm" 1445 | permute_param { 1446 | order: 0 1447 | order: 2 1448 | order: 3 1449 | order: 1 1450 | } 1451 | } 1452 | layer { 1453 | name: "conv14_2_mbox_conf_flat" 1454 | type: "Flatten" 1455 | bottom: "conv14_2_mbox_conf_perm" 1456 | top: "conv14_2_mbox_conf_flat" 1457 | flatten_param { 1458 | axis: 1 1459 | } 1460 | } 1461 | layer { 1462 | name: "conv14_2_mbox_priorbox" 1463 | type: "PriorBox" 1464 | bottom: "conv14_2" 1465 | bottom: "data" 1466 | top: "conv14_2_mbox_priorbox" 1467 | prior_box_param { 1468 | min_size: 150.0 1469 | max_size: 195.0 1470 | aspect_ratio: 2.0 1471 | aspect_ratio: 3.0 1472 | flip: true 1473 | clip: false 1474 | variance: 0.1 1475 | variance: 0.1 1476 | variance: 0.2 1477 | variance: 0.2 1478 | offset: 0.5 1479 | } 1480 | } 1481 | layer { 1482 | name: "conv15_2_mbox_loc" 1483 | type: "Convolution" 1484 | bottom: "conv15_2" 1485 | top: "conv15_2_mbox_loc" 1486 | param { 1487 | lr_mult: 1.0 1488 | decay_mult: 1.0 1489 | } 1490 | param { 1491 | lr_mult: 2.0 1492 | decay_mult: 0.0 1493 | } 1494 | convolution_param { 1495 | num_output: 24 1496 | kernel_size: 1 1497 | weight_filler { 1498 | type: "msra" 1499 | } 1500 | bias_filler { 1501 | type: "constant" 1502 | value: 0.0 1503 | } 1504 | } 1505 | } 1506 | layer { 1507 | name: "conv15_2_mbox_loc_perm" 1508 | type: "Permute" 1509 | bottom: "conv15_2_mbox_loc" 1510 | top: "conv15_2_mbox_loc_perm" 1511 | permute_param { 1512 | order: 0 1513 | order: 2 1514 | order: 3 1515 | order: 1 1516 | } 1517 | } 1518 | layer { 1519 | name: "conv15_2_mbox_loc_flat" 1520 | type: "Flatten" 1521 | bottom: "conv15_2_mbox_loc_perm" 1522 | top: "conv15_2_mbox_loc_flat" 1523 | flatten_param { 1524 | axis: 1 1525 | } 1526 | } 1527 | layer { 1528 | name: "conv15_2_mbox_conf" 1529 | type: "Convolution" 1530 | bottom: "conv15_2" 1531 | top: "conv15_2_mbox_conf" 1532 | param { 1533 | lr_mult: 1.0 1534 | decay_mult: 1.0 1535 | } 1536 | param { 1537 | lr_mult: 2.0 1538 | decay_mult: 0.0 1539 | } 1540 | convolution_param { 1541 | num_output: 126 1542 | kernel_size: 1 1543 | weight_filler { 1544 | type: "msra" 1545 | } 1546 | bias_filler { 1547 | type: "constant" 1548 | value: 0.0 1549 | } 1550 | } 1551 | } 1552 | layer { 1553 | name: "conv15_2_mbox_conf_perm" 1554 | type: "Permute" 1555 | bottom: "conv15_2_mbox_conf" 1556 | top: "conv15_2_mbox_conf_perm" 1557 | permute_param { 1558 | order: 0 1559 | order: 2 1560 | order: 3 1561 | order: 1 1562 | } 1563 | } 1564 | layer { 1565 | name: "conv15_2_mbox_conf_flat" 1566 | type: "Flatten" 1567 | bottom: "conv15_2_mbox_conf_perm" 1568 | top: "conv15_2_mbox_conf_flat" 1569 | flatten_param { 1570 | axis: 1 1571 | } 1572 | } 1573 | layer { 1574 | name: "conv15_2_mbox_priorbox" 1575 | type: "PriorBox" 1576 | bottom: "conv15_2" 1577 | bottom: "data" 1578 | top: "conv15_2_mbox_priorbox" 1579 | prior_box_param { 1580 | min_size: 195.0 1581 | max_size: 240.0 1582 | aspect_ratio: 2.0 1583 | aspect_ratio: 3.0 1584 | flip: true 1585 | clip: false 1586 | variance: 0.1 1587 | variance: 0.1 1588 | variance: 0.2 1589 | variance: 0.2 1590 | offset: 0.5 1591 | } 1592 | } 1593 | layer { 1594 | name: "conv16_2_mbox_loc" 1595 | type: "Convolution" 1596 | bottom: "conv16_2" 1597 | top: "conv16_2_mbox_loc" 1598 | param { 1599 | lr_mult: 1.0 1600 | decay_mult: 1.0 1601 | } 1602 | param { 1603 | lr_mult: 2.0 1604 | decay_mult: 0.0 1605 | } 1606 | convolution_param { 1607 | num_output: 24 1608 | kernel_size: 1 1609 | weight_filler { 1610 | type: "msra" 1611 | } 1612 | bias_filler { 1613 | type: "constant" 1614 | value: 0.0 1615 | } 1616 | } 1617 | } 1618 | layer { 1619 | name: "conv16_2_mbox_loc_perm" 1620 | type: "Permute" 1621 | bottom: "conv16_2_mbox_loc" 1622 | top: "conv16_2_mbox_loc_perm" 1623 | permute_param { 1624 | order: 0 1625 | order: 2 1626 | order: 3 1627 | order: 1 1628 | } 1629 | } 1630 | layer { 1631 | name: "conv16_2_mbox_loc_flat" 1632 | type: "Flatten" 1633 | bottom: "conv16_2_mbox_loc_perm" 1634 | top: "conv16_2_mbox_loc_flat" 1635 | flatten_param { 1636 | axis: 1 1637 | } 1638 | } 1639 | layer { 1640 | name: "conv16_2_mbox_conf" 1641 | type: "Convolution" 1642 | bottom: "conv16_2" 1643 | top: "conv16_2_mbox_conf" 1644 | param { 1645 | lr_mult: 1.0 1646 | decay_mult: 1.0 1647 | } 1648 | param { 1649 | lr_mult: 2.0 1650 | decay_mult: 0.0 1651 | } 1652 | convolution_param { 1653 | num_output: 126 1654 | kernel_size: 1 1655 | weight_filler { 1656 | type: "msra" 1657 | } 1658 | bias_filler { 1659 | type: "constant" 1660 | value: 0.0 1661 | } 1662 | } 1663 | } 1664 | layer { 1665 | name: "conv16_2_mbox_conf_perm" 1666 | type: "Permute" 1667 | bottom: "conv16_2_mbox_conf" 1668 | top: "conv16_2_mbox_conf_perm" 1669 | permute_param { 1670 | order: 0 1671 | order: 2 1672 | order: 3 1673 | order: 1 1674 | } 1675 | } 1676 | layer { 1677 | name: "conv16_2_mbox_conf_flat" 1678 | type: "Flatten" 1679 | bottom: "conv16_2_mbox_conf_perm" 1680 | top: "conv16_2_mbox_conf_flat" 1681 | flatten_param { 1682 | axis: 1 1683 | } 1684 | } 1685 | layer { 1686 | name: "conv16_2_mbox_priorbox" 1687 | type: "PriorBox" 1688 | bottom: "conv16_2" 1689 | bottom: "data" 1690 | top: "conv16_2_mbox_priorbox" 1691 | prior_box_param { 1692 | min_size: 240.0 1693 | max_size: 285.0 1694 | aspect_ratio: 2.0 1695 | aspect_ratio: 3.0 1696 | flip: true 1697 | clip: false 1698 | variance: 0.1 1699 | variance: 0.1 1700 | variance: 0.2 1701 | variance: 0.2 1702 | offset: 0.5 1703 | } 1704 | } 1705 | layer { 1706 | name: "conv17_2_mbox_loc" 1707 | type: "Convolution" 1708 | bottom: "conv17_2" 1709 | top: "conv17_2_mbox_loc" 1710 | param { 1711 | lr_mult: 1.0 1712 | decay_mult: 1.0 1713 | } 1714 | param { 1715 | lr_mult: 2.0 1716 | decay_mult: 0.0 1717 | } 1718 | convolution_param { 1719 | num_output: 24 1720 | kernel_size: 1 1721 | weight_filler { 1722 | type: "msra" 1723 | } 1724 | bias_filler { 1725 | type: "constant" 1726 | value: 0.0 1727 | } 1728 | } 1729 | } 1730 | layer { 1731 | name: "conv17_2_mbox_loc_perm" 1732 | type: "Permute" 1733 | bottom: "conv17_2_mbox_loc" 1734 | top: "conv17_2_mbox_loc_perm" 1735 | permute_param { 1736 | order: 0 1737 | order: 2 1738 | order: 3 1739 | order: 1 1740 | } 1741 | } 1742 | layer { 1743 | name: "conv17_2_mbox_loc_flat" 1744 | type: "Flatten" 1745 | bottom: "conv17_2_mbox_loc_perm" 1746 | top: "conv17_2_mbox_loc_flat" 1747 | flatten_param { 1748 | axis: 1 1749 | } 1750 | } 1751 | layer { 1752 | name: "conv17_2_mbox_conf" 1753 | type: "Convolution" 1754 | bottom: "conv17_2" 1755 | top: "conv17_2_mbox_conf" 1756 | param { 1757 | lr_mult: 1.0 1758 | decay_mult: 1.0 1759 | } 1760 | param { 1761 | lr_mult: 2.0 1762 | decay_mult: 0.0 1763 | } 1764 | convolution_param { 1765 | num_output: 126 1766 | kernel_size: 1 1767 | weight_filler { 1768 | type: "msra" 1769 | } 1770 | bias_filler { 1771 | type: "constant" 1772 | value: 0.0 1773 | } 1774 | } 1775 | } 1776 | layer { 1777 | name: "conv17_2_mbox_conf_perm" 1778 | type: "Permute" 1779 | bottom: "conv17_2_mbox_conf" 1780 | top: "conv17_2_mbox_conf_perm" 1781 | permute_param { 1782 | order: 0 1783 | order: 2 1784 | order: 3 1785 | order: 1 1786 | } 1787 | } 1788 | layer { 1789 | name: "conv17_2_mbox_conf_flat" 1790 | type: "Flatten" 1791 | bottom: "conv17_2_mbox_conf_perm" 1792 | top: "conv17_2_mbox_conf_flat" 1793 | flatten_param { 1794 | axis: 1 1795 | } 1796 | } 1797 | layer { 1798 | name: "conv17_2_mbox_priorbox" 1799 | type: "PriorBox" 1800 | bottom: "conv17_2" 1801 | bottom: "data" 1802 | top: "conv17_2_mbox_priorbox" 1803 | prior_box_param { 1804 | min_size: 285.0 1805 | max_size: 300.0 1806 | aspect_ratio: 2.0 1807 | aspect_ratio: 3.0 1808 | flip: true 1809 | clip: false 1810 | variance: 0.1 1811 | variance: 0.1 1812 | variance: 0.2 1813 | variance: 0.2 1814 | offset: 0.5 1815 | } 1816 | } 1817 | layer { 1818 | name: "mbox_loc" 1819 | type: "Concat" 1820 | bottom: "conv11_mbox_loc_flat" 1821 | bottom: "conv13_mbox_loc_flat" 1822 | bottom: "conv14_2_mbox_loc_flat" 1823 | bottom: "conv15_2_mbox_loc_flat" 1824 | bottom: "conv16_2_mbox_loc_flat" 1825 | bottom: "conv17_2_mbox_loc_flat" 1826 | top: "mbox_loc" 1827 | concat_param { 1828 | axis: 1 1829 | } 1830 | } 1831 | layer { 1832 | name: "mbox_conf" 1833 | type: "Concat" 1834 | bottom: "conv11_mbox_conf_flat" 1835 | bottom: "conv13_mbox_conf_flat" 1836 | bottom: "conv14_2_mbox_conf_flat" 1837 | bottom: "conv15_2_mbox_conf_flat" 1838 | bottom: "conv16_2_mbox_conf_flat" 1839 | bottom: "conv17_2_mbox_conf_flat" 1840 | top: "mbox_conf" 1841 | concat_param { 1842 | axis: 1 1843 | } 1844 | } 1845 | layer { 1846 | name: "mbox_priorbox" 1847 | type: "Concat" 1848 | bottom: "conv11_mbox_priorbox" 1849 | bottom: "conv13_mbox_priorbox" 1850 | bottom: "conv14_2_mbox_priorbox" 1851 | bottom: "conv15_2_mbox_priorbox" 1852 | bottom: "conv16_2_mbox_priorbox" 1853 | bottom: "conv17_2_mbox_priorbox" 1854 | top: "mbox_priorbox" 1855 | concat_param { 1856 | axis: 2 1857 | } 1858 | } 1859 | layer { 1860 | name: "mbox_conf_reshape" 1861 | type: "Reshape" 1862 | bottom: "mbox_conf" 1863 | top: "mbox_conf_reshape" 1864 | reshape_param { 1865 | shape { 1866 | dim: 0 1867 | dim: -1 1868 | dim: 21 1869 | } 1870 | } 1871 | } 1872 | layer { 1873 | name: "mbox_conf_softmax" 1874 | type: "Softmax" 1875 | bottom: "mbox_conf_reshape" 1876 | top: "mbox_conf_softmax" 1877 | softmax_param { 1878 | axis: 2 1879 | } 1880 | } 1881 | layer { 1882 | name: "mbox_conf_flatten" 1883 | type: "Flatten" 1884 | bottom: "mbox_conf_softmax" 1885 | top: "mbox_conf_flatten" 1886 | flatten_param { 1887 | axis: 1 1888 | } 1889 | } 1890 | layer { 1891 | name: "detection_out" 1892 | type: "DetectionOutput" 1893 | bottom: "mbox_loc" 1894 | bottom: "mbox_conf_flatten" 1895 | bottom: "mbox_priorbox" 1896 | top: "detection_out" 1897 | include { 1898 | phase: TEST 1899 | } 1900 | detection_output_param { 1901 | num_classes: 21 1902 | share_location: true 1903 | background_label_id: 0 1904 | nms_param { 1905 | nms_threshold: 0.45 1906 | top_k: 100 1907 | } 1908 | code_type: CENTER_SIZE 1909 | keep_top_k: 100 1910 | confidence_threshold: 0.25 1911 | } 1912 | } 1913 | -------------------------------------------------------------------------------- /mods/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fulankun1412/People-Counting-OpenCV-Object-Detection/e5949294f1975141a815b6ca2228d025b118a430/mods/__init__.py -------------------------------------------------------------------------------- /mods/centroidtracker.py: -------------------------------------------------------------------------------- 1 | # import the necessary packages 2 | from scipy.spatial import distance as dist 3 | from collections import OrderedDict 4 | import numpy as np 5 | 6 | class CentroidTracker: 7 | def __init__(self, maxDisappeared=50, maxDistance=50): 8 | # initialize the next unique object ID along with two ordered 9 | # dictionaries used to keep track of mapping a given object 10 | # ID to its centroid and number of consecutive frames it has 11 | # been marked as "disappeared", respectively 12 | self.nextObjectID = 0 13 | self.objects = OrderedDict() 14 | self.disappeared = OrderedDict() 15 | 16 | # store the number of maximum consecutive frames a given 17 | # object is allowed to be marked as "disappeared" until we 18 | # need to deregister the object from tracking 19 | self.maxDisappeared = maxDisappeared 20 | 21 | # store the maximum distance between centroids to associate 22 | # an object -- if the distance is larger than this maximum 23 | # distance we'll start to mark the object as "disappeared" 24 | self.maxDistance = maxDistance 25 | 26 | def register(self, centroid): 27 | # when registering an object we use the next available object 28 | # ID to store the centroid 29 | self.objects[self.nextObjectID] = centroid 30 | self.disappeared[self.nextObjectID] = 0 31 | self.nextObjectID += 1 32 | 33 | def deregister(self, objectID): 34 | # to deregister an object ID we delete the object ID from 35 | # both of our respective dictionaries 36 | del self.objects[objectID] 37 | del self.disappeared[objectID] 38 | 39 | def update(self, rects): 40 | # check to see if the list of input bounding box rectangles 41 | # is empty 42 | if len(rects) == 0: 43 | # loop over any existing tracked objects and mark them 44 | # as disappeared 45 | for objectID in list(self.disappeared.keys()): 46 | self.disappeared[objectID] += 1 47 | 48 | # if we have reached a maximum number of consecutive 49 | # frames where a given object has been marked as 50 | # missing, deregister it 51 | if self.disappeared[objectID] > self.maxDisappeared: 52 | self.deregister(objectID) 53 | 54 | # return early as there are no centroids or tracking info 55 | # to update 56 | return self.objects 57 | 58 | # initialize an array of input centroids for the current frame 59 | inputCentroids = np.zeros((len(rects), 2), dtype="int") 60 | 61 | # loop over the bounding box rectangles 62 | for (i, (startX, startY, endX, endY)) in enumerate(rects): 63 | # use the bounding box coordinates to derive the centroid 64 | cX = int((startX + endX) / 2.0) 65 | cY = int((startY + endY) / 2.0) 66 | inputCentroids[i] = (cX, cY) 67 | 68 | # if we are currently not tracking any objects take the input 69 | # centroids and register each of them 70 | if len(self.objects) == 0: 71 | for i in range(0, len(inputCentroids)): 72 | self.register(inputCentroids[i]) 73 | 74 | # otherwise, are are currently tracking objects so we need to 75 | # try to match the input centroids to existing object 76 | # centroids 77 | else: 78 | # grab the set of object IDs and corresponding centroids 79 | objectIDs = list(self.objects.keys()) 80 | objectCentroids = list(self.objects.values()) 81 | 82 | # compute the distance between each pair of object 83 | # centroids and input centroids, respectively -- our 84 | # goal will be to match an input centroid to an existing 85 | # object centroid 86 | D = dist.cdist(np.array(objectCentroids), inputCentroids) 87 | 88 | # in order to perform this matching we must (1) find the 89 | # smallest value in each row and then (2) sort the row 90 | # indexes based on their minimum values so that the row 91 | # with the smallest value as at the *front* of the index 92 | # list 93 | rows = D.min(axis=1).argsort() 94 | 95 | # next, we perform a similar process on the columns by 96 | # finding the smallest value in each column and then 97 | # sorting using the previously computed row index list 98 | cols = D.argmin(axis=1)[rows] 99 | 100 | # in order to determine if we need to update, register, 101 | # or deregister an object we need to keep track of which 102 | # of the rows and column indexes we have already examined 103 | usedRows = set() 104 | usedCols = set() 105 | 106 | # loop over the combination of the (row, column) index 107 | # tuples 108 | for (row, col) in zip(rows, cols): 109 | # if we have already examined either the row or 110 | # column value before, ignore it 111 | if row in usedRows or col in usedCols: 112 | continue 113 | 114 | # if the distance between centroids is greater than 115 | # the maximum distance, do not associate the two 116 | # centroids to the same object 117 | if D[row, col] > self.maxDistance: 118 | continue 119 | 120 | # otherwise, grab the object ID for the current row, 121 | # set its new centroid, and reset the disappeared 122 | # counter 123 | objectID = objectIDs[row] 124 | self.objects[objectID] = inputCentroids[col] 125 | self.disappeared[objectID] = 0 126 | 127 | # indicate that we have examined each of the row and 128 | # column indexes, respectively 129 | usedRows.add(row) 130 | usedCols.add(col) 131 | 132 | # compute both the row and column index we have NOT yet 133 | # examined 134 | unusedRows = set(range(0, D.shape[0])).difference(usedRows) 135 | unusedCols = set(range(0, D.shape[1])).difference(usedCols) 136 | 137 | # in the event that the number of object centroids is 138 | # equal or greater than the number of input centroids 139 | # we need to check and see if some of these objects have 140 | # potentially disappeared 141 | if D.shape[0] >= D.shape[1]: 142 | # loop over the unused row indexes 143 | for row in unusedRows: 144 | # grab the object ID for the corresponding row 145 | # index and increment the disappeared counter 146 | objectID = objectIDs[row] 147 | self.disappeared[objectID] += 1 148 | 149 | # check to see if the number of consecutive 150 | # frames the object has been marked "disappeared" 151 | # for warrants deregistering the object 152 | if self.disappeared[objectID] > self.maxDisappeared: 153 | self.deregister(objectID) 154 | 155 | # otherwise, if the number of input centroids is greater 156 | # than the number of existing object centroids we need to 157 | # register each new input centroid as a trackable object 158 | else: 159 | for col in unusedCols: 160 | self.register(inputCentroids[col]) 161 | 162 | # return the set of trackable objects 163 | return self.objects -------------------------------------------------------------------------------- /mods/trackableobject.py: -------------------------------------------------------------------------------- 1 | class TrackableObject: 2 | def __init__(self, objectID, centroid): 3 | # store the object ID, then initialize a list of centroids 4 | # using the current centroid 5 | self.objectID = objectID 6 | self.centroids = [centroid] 7 | 8 | # initialize a boolean used to indicate if the object has 9 | # already been counted or not 10 | self.counted = False -------------------------------------------------------------------------------- /people_counter.py: -------------------------------------------------------------------------------- 1 | # USAGE 2 | # To read and write back out to video: 3 | # python people_counter.py --prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt --model mobilenet_ssd/MobileNetSSD_deploy.caffemodel --input videos/example_01.mp4 --output output/output_01.avi 4 | # 5 | # To read from webcam and write back out to disk: 6 | # python people_counter.py --prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt \ 7 | # --model mobilenet_ssd/MobileNetSSD_deploy.caffemodel \ 8 | # --output output/webcam_output.avi 9 | 10 | # import the necessary packages 11 | from pyimagesearch.centroidtracker import CentroidTracker 12 | from pyimagesearch.trackableobject import TrackableObject 13 | from imutils.video import VideoStream 14 | from imutils.video import FPS 15 | import numpy as np 16 | import argparse 17 | import imutils 18 | import time 19 | import dlib 20 | import cv2 21 | import pandas as pd 22 | 23 | 24 | # construct the argument parse and parse the arguments 25 | ap = argparse.ArgumentParser() 26 | ap.add_argument("-p", "--prototxt", required=True, 27 | help="path to Caffe 'deploy' prototxt file") 28 | ap.add_argument("-m", "--model", required=True, 29 | help="path to Caffe pre-trained model") 30 | ap.add_argument("-i", "--input", type=str, 31 | help="path to optional input video file") 32 | ap.add_argument("-o", "--output", type=str, 33 | help="path to optional output video file") 34 | ap.add_argument("-c", "--confidence", type=float, default=0.4, 35 | help="minimum probability to filter weak detections") 36 | ap.add_argument("-s", "--skip-frames", type=int, default=30, 37 | help="# of skip frames between detections") 38 | args = vars(ap.parse_args()) 39 | 40 | # Kalau Belumm Bikin Excel nya Run Dulu yang ini, kalau udh tidak usah 41 | # Framedata kolom data-data yang dibikin di excel 42 | df = pd.DataFrame({'No': ['Null'], 43 | 'CCTV': ['Null'], 44 | 'Menit': ['Null'], 45 | 'Jumlah Counting': ['Null']}) 46 | 47 | # Create a Pandas Excel writer using XlsxWriter as the engine. 48 | writer = pd.ExcelWriter('data output.xlsx', engine='xlsxwriter') 49 | 50 | # Convert the dataframe to an XlsxWriter Excel object. 51 | df.to_excel(writer, sheet_name='Data Jumlah Peserta CFD', index=False) 52 | 53 | # Close the Pandas Excel writer and output the Excel file. 54 | writer.save() 55 | 56 | # initialize the list of class labels MobileNet SSD was trained to 57 | # detect 58 | CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", 59 | "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", 60 | "dog", "horse", "motorbike", "person", "pottedplant", "sheep", 61 | "sofa", "train", "tvmonitor"] 62 | 63 | # load our serialized model from disk 64 | print("[INFO] loading model...") 65 | net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"]) 66 | 67 | # if a video path was not supplied, grab a reference to the webcam 68 | if not args.get("input", False): 69 | print("[INFO] starting video stream...") 70 | vs = VideoStream(src=0).start() 71 | time.sleep(2.0) 72 | 73 | # otherwise, grab a reference to the video file 74 | else: 75 | print("[INFO] opening video file...") 76 | vs = cv2.VideoCapture(args["input"]) 77 | 78 | # initialize the video writer (we'll instantiate later if need be) 79 | writer = None 80 | 81 | # initialize the frame dimensions (we'll set them as soon as we read 82 | # the first frame from the video) 83 | W = None 84 | H = None 85 | 86 | # instantiate our centroid tracker, then initialize a list to store 87 | # each of our dlib correlation trackers, followed by a dictionary to 88 | # map each unique object ID to a TrackableObject 89 | ct = CentroidTracker(maxDisappeared=40, maxDistance=50) 90 | trackers = [] 91 | trackableObjects = {} 92 | 93 | # initialize the total number of frames processed thus far, along 94 | # with the total number of objects that have moved either up or down 95 | totalFrames = 0 96 | totalDown = 0 97 | totalUp = 0 98 | 99 | # start the frames per second throughput estimator 100 | fps = FPS().start() 101 | 102 | # loop over frames from the video stream 103 | while True: 104 | # grab the next frame and handle if we are reading from either 105 | # VideoCapture or VideoStream 106 | frame = vs.read() 107 | frame = frame[1] if args.get("input", False) else frame 108 | 109 | # if we are viewing a video and we did not grab a frame then we 110 | # have reached the end of the video 111 | if args["input"] is not None and frame is None: 112 | break 113 | 114 | # resize the frame to have a maximum width of 500 pixels (the 115 | # less data we have, the faster we can process it), then convert 116 | # the frame from BGR to RGB for dlib 117 | frame = imutils.resize(frame, width=1000) 118 | rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 119 | 120 | # if the frame dimensions are empty, set them 121 | if W is None or H is None: 122 | (H, W) = frame.shape[:2] 123 | 124 | # if we are supposed to be writing a video to disk, initialize 125 | # the writer 126 | if args["output"] is not None and writer is None: 127 | fourcc = cv2.VideoWriter_fourcc(*"MJPG") 128 | writer = cv2.VideoWriter(args["output"], fourcc, 30, 129 | (W, H), True) 130 | 131 | # initialize the current status along with our list of bounding 132 | # box rectangles returned by either (1) our object detector or 133 | # (2) the correlation trackers 134 | status = "Waiting" 135 | rects = [] 136 | 137 | # check to see if we should run a more computationally expensive 138 | # object detection method to aid our tracker 139 | if totalFrames % args["skip_frames"] == 0: 140 | # set the status and initialize our new set of object trackers 141 | status = "Detecting" 142 | trackers = [] 143 | 144 | # convert the frame to a blob and pass the blob through the 145 | # network and obtain the detections 146 | blob = cv2.dnn.blobFromImage(frame, 0.007843, (W, H), 127.5) 147 | net.setInput(blob) 148 | detections = net.forward() 149 | 150 | # loop over the detections 151 | for i in np.arange(0, detections.shape[2]): 152 | # extract the confidence (i.e., probability) associated 153 | # with the prediction 154 | confidence = detections[0, 0, i, 2] 155 | 156 | # filter out weak detections by requiring a minimum 157 | # confidence 158 | if confidence > args["confidence"]: 159 | # extract the index of the class label from the 160 | # detections list 161 | idx = int(detections[0, 0, i, 1]) 162 | 163 | # if the class label is not a person, ignore it 164 | if CLASSES[idx] != "person": 165 | continue 166 | 167 | # compute the (x, y)-coordinates of the bounding box 168 | # for the object 169 | box = detections[0, 0, i, 3:7] * np.array([W, H, W, H]) 170 | (startX, startY, endX, endY) = box.astype("int") 171 | 172 | # construct a dlib rectangle object from the bounding 173 | # box coordinates and then start the dlib correlation 174 | # tracker 175 | tracker = dlib.correlation_tracker() 176 | rect = dlib.rectangle(startX, startY, endX, endY) 177 | tracker.start_track(rgb, rect) 178 | 179 | # add the tracker to our list of trackers so we can 180 | # utilize it during skip frames 181 | trackers.append(tracker) 182 | 183 | # otherwise, we should utilize our object *trackers* rather than 184 | # object *detectors* to obtain a higher frame processing throughput 185 | else: 186 | # loop over the trackers 187 | for tracker in trackers: 188 | # set the status of our system to be 'tracking' rather 189 | # than 'waiting' or 'detecting' 190 | status = "Tracking" 191 | 192 | # update the tracker and grab the updated position 193 | tracker.update(rgb) 194 | pos = tracker.get_position() 195 | 196 | # unpack the position object 197 | startX = int(pos.left()) 198 | startY = int(pos.top()) 199 | endX = int(pos.right()) 200 | endY = int(pos.bottom()) 201 | 202 | # add the bounding box coordinates to the rectangles list 203 | rects.append((startX, startY, endX, endY)) 204 | 205 | # draw a horizontal line in the center of the frame -- once an 206 | # object crosses this line we will determine whether they were 207 | # moving 'up' or 'down' 208 | cv2.line(frame, (0, H // 2), (W, H // 2), (0, 255, 255), 2) 209 | 210 | # use the centroid tracker to associate the (1) old object 211 | # centroids with (2) the newly computed object centroids 212 | objects = ct.update(rects) 213 | 214 | # loop over the tracked objects 215 | for (objectID, centroid) in objects.items(): 216 | # check to see if a trackable object exists for the current 217 | # object ID 218 | to = trackableObjects.get(objectID, None) 219 | 220 | # if there is no existing trackable object, create one 221 | if to is None: 222 | to = TrackableObject(objectID, centroid) 223 | 224 | # otherwise, there is a trackable object so we can utilize it 225 | # to determine direction 226 | else: 227 | # the difference between the y-coordinate of the *current* 228 | # centroid and the mean of *previous* centroids will tell 229 | # us in which direction the object is moving (negative for 230 | # 'up' and positive for 'down') 231 | y = [c[1] for c in to.centroids] 232 | direction = centroid[1] - np.mean(y) 233 | to.centroids.append(centroid) 234 | 235 | # check to see if the object has been counted or not 236 | if not to.counted: 237 | # if the direction is negative (indicating the object 238 | # is moving up) AND the centroid is above the center 239 | # line, count the object 240 | if direction < 0 and centroid[1] < H // 2: 241 | totalUp += 1 242 | to.counted = True 243 | 244 | # if the direction is positive (indicating the object 245 | # is moving down) AND the centroid is below the 246 | # center line, count the object 247 | elif direction > 0 and centroid[1] > H // 2: 248 | totalDown += 1 249 | to.counted = True 250 | 251 | # store the trackable object in our dictionary 252 | trackableObjects[objectID] = to 253 | 254 | # draw both the ID of the object and the centroid of the 255 | # object on the output frame 256 | text = "ID {}".format(objectID) 257 | cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), 258 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) 259 | cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) 260 | 261 | # construct a tuple of information we will be displaying on the 262 | # frame 263 | info = [ 264 | ("Up", totalUp), 265 | ("Down", totalDown), 266 | ("Status", status), 267 | ] 268 | 269 | # loop over the info tuples and draw them on our frame 270 | for (i, (k, v)) in enumerate(info): 271 | text = "{}: {}".format(k, v) 272 | cv2.putText(frame, text, (10, H - ((i * 20) + 20)), 273 | cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) 274 | 275 | # check to see if we should write the frame to disk 276 | if writer is not None: 277 | writer.write(frame) 278 | 279 | # show the output frame 280 | cv2.imshow("Frame", frame) 281 | key = cv2.waitKey(1) & 0xFF 282 | 283 | # if the `q` key was pressed, break from the loop 284 | if key == ord("q"): 285 | break 286 | 287 | # increment the total number of frames processed thus far and 288 | # then update the FPS counter 289 | totalFrames += 1 290 | fps.update() 291 | elapsed_time = time.time() - start_time 292 | 293 | if none is none: 294 | print(yes) 295 | 296 | # new dataframe with same columns 297 | #df = pd.DataFrame({'no_data': [totalFrames], 298 | # 'nama_CCTV': ['Sample'], 299 | # 'menit': ["Jam"], 300 | # 'jumlah_counting': [totalSemua]}) 301 | #writerEx = pd.ExcelWriter('data output.xlsx', engine='openpyxl') 302 | # try to open an existing workbook 303 | #writerEx.book = load_workbook('data output.xlsx') 304 | # copy existing sheets 305 | #writerEx.sheets = dict((ws.title, ws) for ws in writerEx.book.worksheets) 306 | # read existing file 307 | #reader = pd.read_excel(r'data output.xlsx') 308 | # write out the new sheet 309 | #df.to_excel(writerEx,sheet_name='Data Jumlah Peserta CFD',index=False,header=False,startrow=len(reader)+1) 310 | #writerEx.close() 311 | #print (elapsed_time) 312 | 313 | #Assume 1 minutes contain 600 Frame then we will use 18000 frame increment as per 10 minutes 314 | if(totalFrames == 1 or totalFrames == 18000 or totalFrames == 36000 or totalFrames == 54000 or totalFrames == 72000 315 | or totalFrames == 90000 or totalFrames == 108000 or totalFrames == 126000 or totalFrames == 144000 316 | or totalFrames == 162000 or totalFrames == 180000 or totalFrames == 198000 or totalFrames == 216000 317 | or totalFrames == 234000 or totalFrames == 252000 or totalFrames == 270000 or totalFrames == 288000 318 | or totalFrames == 306000 or totalFrames == 324000): 319 | #new dataframe with same columns 320 | print(totalFrames) 321 | df = pd.DataFrame({'NO': [totalFrames], 322 | 'CCTV': [nama_cctv], 323 | 'Menit': [elapsed_time], 324 | 'Jumlah Counting': [totalSemua]}) 325 | writerEx = pd.ExcelWriter('data output.xlsx', engine='openpyxl') 326 | # try to open an existing workbook 327 | writerEx.book = load_workbook('data output.xlsx') 328 | # copy existing sheets 329 | writerEx.sheets = dict((ws.title, ws) for ws in writerEx.book.worksheets) 330 | # read existing file 331 | reader = pd.read_excel(r'data output.xlsx') 332 | # write out the new sheet 333 | df.to_excel(writerEx,sheet_name='Data Jumlah Peserta CFD',index=False,header=False,startrow=len(reader)+1) 334 | writerEx.close() 335 | print (elapsed_time) 336 | 337 | # stop the timer and display FPS information 338 | fps.stop() 339 | print("[INFO] elapsed time: {:.2f}".format(fps.elapsed())) 340 | print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) 341 | 342 | # check to see if we need to release the video writer pointer 343 | if writer is not None: 344 | writer.release() 345 | 346 | # if we are not using a video file, stop the camera video stream 347 | if not args.get("input", False): 348 | vs.stop() 349 | 350 | # otherwise, release the video file pointer 351 | else: 352 | vs.release() 353 | 354 | # close any open windows 355 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /videos/sample.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fulankun1412/People-Counting-OpenCV-Object-Detection/e5949294f1975141a815b6ca2228d025b118a430/videos/sample.mp4 --------------------------------------------------------------------------------