├── README.md
├── client_camera_main.py
├── communication.py
├── edge_server_main.py
├── keyFrameDetection.py
├── models
    ├── Golden_Retriever_Hund_Dog.jpg
    ├── imagenet_class_index.json
    ├── tiny_yolo.py
    ├── utils.py
    ├── vgg16.py
    ├── vgg16FrontEndDelay.pkl
    ├── voc.names
    └── yoloFrontEndDelay.pkl
├── muLinUCB.py
└── yolo_utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | ## Autodidactic Neurosurgeon Collaborative Deep Inference for Mobile Edge Intelligence via Online Learning
 2 | Autodidactic Neurosurgeon (ANS) is an online learning module to automatically learn the optimal DNN partition point on the fly. The details of ANS are in our WWW21 paper "Autodidactic Neurosurgeon Collaborative Deep Inference for Mobile Edge Intelligence via Online Learning".
 3 | 
 4 | ### PyTorch
 5 | We modify the *forward* function in the PyTorch to partition the DNN model.
 6 | 
 7 | You can run "**vgg16.py**" as an example to see the partition.
 8 | 
 9 | ### TensorFlow 2.0+
10 | Although we don't provide the code for TensorFlow, you can modify the *\__call\__* function in the TensorFlow to partition the DNN model.
11 | 
12 | ### Two examples:
13 | - vgg16
14 | - tiny yolo v2
15 |   - Download tiny yolo weight, here https://pjreddie.com/media/files/yolov2-tiny-voc.weights
16 |   - Put yolov2-tiny-voc.weights in the folder "models"
17 | 
18 | ### How to run code:
19 | - First run *edge_server_main.py* on edge server
20 | - Then run *client_camera_main.py* on Nvidia Jetson TX2
21 | 


--------------------------------------------------------------------------------
/client_camera_main.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import argparse
  3 | import subprocess
  4 | import cv2
  5 | import torchvision.transforms as transforms
  6 | import torch
  7 | import numpy as np
  8 | from PIL import Image
  9 | import time
 10 | import pickle
 11 | import json
 12 | 
 13 | from models.vgg16 import vgg16
 14 | from models.tiny_yolo import tinyYolo
 15 | from keyFrameDetection import KeyFrameDetection
 16 | from communication import clientCommunication
 17 | from muLinUCB import muLinUCB
 18 | from yolo_utils import load_class_names, get_boxes, plot_boxes_cv2
 19 | 
 20 | WINDOW_NAME = 'CameraDemo'
 21 | 
 22 | vgg_info = { # action No. : [layer type num{1: conv, 2: fc, 3: act}, total mac{1: conv, 2: fc, 3: act}, mid_data_size, partition point]
 23 |                 0: [13, 3, 24, 15346630656, 123633664, 26208256, 4818272, 0],
 24 |                 1: [12, 3, 23, 15259926528, 123633664, 22996992, 102761824, 1],
 25 |                 2: [11, 3, 22, 13410238464, 123633664, 19785728, 102761824, 2],
 26 |                 3: [11, 3, 21, 13410238464, 123633664, 16574464, 25691488, 3],
 27 |                 4: [10, 3, 20, 12485394432, 123633664, 13363200, 51381600, 4],
 28 |                 5: [9, 3, 19, 10635706368, 123633664, 10151936, 51381600, 5],
 29 |                 6: [9, 3, 18, 10635706368, 123633664, 8546304, 12846432, 6],
 30 |                 7: [8, 3, 17, 9710862336, 123633664, 6940672, 25691496, 7],
 31 |                 8: [7, 3, 16, 7861174272, 123633664, 5335040, 25691496, 8],
 32 |                 9: [6, 3, 15, 6011486208, 123633664, 4532224, 25691496, 9],
 33 |                 10: [6, 3, 14, 6011486208, 123633664, 3729408, 6423912, 10],
 34 |                 11: [5, 3, 13, 5086642176, 123633664, 2926592, 12846440, 11],
 35 |                 12: [4, 3, 12, 3236954112, 123633664, 2123776, 12846440, 12],
 36 |                 13: [3, 3, 11, 1387266048, 123633664, 1320960, 12846440, 13],
 37 |                 14: [3, 3, 10, 1387266048, 123633664, 919552, 3212648, 14],
 38 |                 15: [2, 3, 9, 924844032, 123633664, 518144, 3212648, 15],
 39 |                 16: [1, 3, 8, 462422016, 123633664, 417792, 3212648, 16],
 40 |                 17: [0, 3, 7, 0, 123633664, 317440, 3212648, 17],
 41 |                 18: [0, 3, 6, 0, 123633664, 217088, 3212648, 18],
 42 |                 19: [0, 3, 4, 0, 123633664, 16384, 804200, 19],
 43 |                 20: [0, 2, 2, 0, 20873216, 12288, 804200, 20],
 44 |                 21: [0, 1, 0, 0, 4096000, 0, 132416, 21],
 45 |                 22: [0, 0, 0, 0, 0, 0, 0, 22]
 46 |                 }
 47 | 
 48 | yolo_info = {
 49 |                 0: [9, 0, 22, 3537437696, 0, 28640768, 16614800, 0],
 50 |                 1: [8, 0, 22, 3462677504, 0, 28640768, 88606096, 1],
 51 |                 2: [8, 0, 21, 3462677504, 0, 23102976, 88606096, 2],
 52 |                 3: [8, 0, 20, 3462677504, 0, 17565184, 88606096, 3],
 53 |                 4: [8, 0, 19, 3462677504, 0, 14796288, 22152576, 4],
 54 |                 5: [7, 0, 19, 3263316992, 0, 14796288, 44303744, 5],
 55 |                 6: [7, 0, 18, 3263316992, 0, 12027392, 44303744, 6],
 56 |                 7: [7, 0, 17, 3263316992, 0, 9258496, 44303744, 7],
 57 |                 8: [7, 0, 16, 3263316992, 0, 7874048, 11076992, 8],
 58 |                 9: [6, 0, 16, 3063956480, 0, 7874048, 22152576, 9],
 59 |                 10: [6, 0, 15, 3063956480, 0, 6489600, 22152576, 10],
 60 |                 11: [6, 0, 14, 3063956480, 0, 5105152, 22152576, 11],
 61 |                 12: [6, 0, 13, 3063956480, 0, 4412928, 5539200, 12],
 62 |                 13: [5, 0, 13, 2864595968, 0, 4412928, 11076992, 13],
 63 |                 14: [5, 0, 12, 2864595968, 0, 3720704, 11076992, 14],
 64 |                 15: [5, 0, 11, 2864595968, 0, 3028480, 11076992, 15],
 65 |                 16: [5, 0, 10, 2864595968, 0, 2682368, 2770304, 16],
 66 |                 17: [4, 0, 10, 2665235456, 0, 2682368, 5539208, 17],
 67 |                 18: [4, 0, 9, 2665235456, 0, 2336256, 5539208, 18],
 68 |                 19: [4, 0, 8, 2665235456, 0, 1990144, 5539208, 19],
 69 |                 20: [4, 0, 7, 2665235456, 0, 1817088, 1385864, 20],
 70 |                 21: [3, 0, 7, 2465874944, 0, 1817088, 2770312, 21],
 71 |                 22: [3, 0, 6, 2465874944, 0, 1644032, 2770312, 22],
 72 |                 23: [3, 0, 5, 2465874944, 0, 1470976, 2770312, 23],
 73 |                 24: [3, 0, 4, 2465874944, 0, 1384448, 2770312, 24],
 74 |                 25: [2, 0, 4, 1668432896, 0, 1384448, 5539208, 25],
 75 |                 26: [2, 0, 3, 1668432896, 0, 1038336, 5539208, 26],
 76 |                 27: [2, 0, 2, 1668432896, 0, 692224, 5539208, 27],
 77 |                 28: [1, 0, 2, 73548800, 0, 692224, 2770312, 28],
 78 |                 29: [1, 0, 1, 73548800, 0, 346112, 2770312, 29],
 79 |                 30: [1, 0, 0, 73548800, 0, 0, 2770312, 30],
 80 |                 31: [0, 0, 0, 0, 0, 0, 0, 31]
 81 |                 }
 82 | 
 83 | 
 84 | def parse_args():
 85 |     # Parse input arguments
 86 |     desc = 'Capture and display live camera video on Jetson TX2/TX1'
 87 |     parser = argparse.ArgumentParser(description=desc)
 88 |     parser.add_argument('--rtsp', dest='use_rtsp',
 89 |                         help='use IP CAM (remember to also set --uri)',
 90 |                         action='store_true')
 91 |     parser.add_argument('--uri', dest='rtsp_uri',
 92 |                         help='RTSP URI, e.g. rtsp://192.168.1.64:554',
 93 |                         default=None, type=str)
 94 |     parser.add_argument('--latency', dest='rtsp_latency',
 95 |                         help='latency in ms for RTSP [200]',
 96 |                         default=200, type=int)
 97 |     parser.add_argument('--usb', dest='use_usb',
 98 |                         help='use USB webcam (remember to also set --vid)',
 99 |                         action='store_true')
100 |     parser.add_argument('--vid', dest='video_dev',
101 |                         help='device # of USB webcam (/dev/video?) [1]',
102 |                         default=1, type=int)
103 |     parser.add_argument('--width', dest='image_width',
104 |                         help='image width',
105 |                         default=640, type=int)
106 |     parser.add_argument('--height', dest='image_height',
107 |                         help='image height',
108 |                         default=480, type=int)
109 |     parser.add_argument('--dnn', dest='dnn_model',
110 |                         help='vgg, yolo',
111 |                         default='yolo', type=str)
112 |     parser.add_argument('--host', dest='host',
113 |                         help='Ip address',
114 |                         default='192.168.1.72', type=str)
115 |     parser.add_argument('--port', dest='port',
116 |                         help='Ip port',
117 |                         default=8080, type=int)
118 |     args = parser.parse_args()
119 |     return args
120 | 
121 | 
122 | def open_cam_rtsp(uri, width, height, latency):
123 |     gst_str = ('rtspsrc location={} latency={} ! '
124 |                'rtph264depay ! h264parse ! omxh264dec ! '
125 |                'nvvidconv ! '
126 |                'video/x-raw, width=(int){}, height=(int){}, '
127 |                'format=(string)BGRx ! '
128 |                'videoconvert ! appsink').format(uri, latency, width, height)
129 |     return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
130 | 
131 | 
132 | def open_cam_usb(dev, width, height):
133 |     # We want to set width and height here, otherwise we could just do:
134 |     #     return cv2.VideoCapture(dev)
135 |     gst_str = ('v4l2src device=/dev/video{} ! '
136 |                'video/x-raw, width=(int){}, height=(int){} ! '
137 |                'videoconvert ! appsink').format(dev, width, height)
138 |     return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
139 | 
140 | 
141 | def open_cam_onboard(width, height):
142 |     gst_elements = str(subprocess.check_output('gst-inspect-1.0'))
143 |     if 'nvcamerasrc' in gst_elements:
144 |         # On versions of L4T prior to 28.1, add 'flip-method=2' into gst_str
145 |         gst_str = ('nvcamerasrc ! '
146 |                    'video/x-raw(memory:NVMM), '
147 |                    'width=(int)2592, height=(int)1458, '
148 |                    'format=(string)I420, framerate=(fraction)30/1 ! '
149 |                    'nvvidconv ! '
150 |                    'video/x-raw, width=(int){}, height=(int){}, '
151 |                    'format=(string)BGRx ! '
152 |                    'videoconvert ! appsink').format(width, height)
153 |     elif 'nvarguscamerasrc' in gst_elements:
154 |         gst_str = ('nvarguscamerasrc ! '
155 |                    'video/x-raw(memory:NVMM), '
156 |                    'width=(int)640, height=(int)480,'
157 |                    'format=(string)NV12, framerate=(fraction)30/1 ! '
158 |                    'nvvidconv flip-method=0 ! '
159 |                    'video/x-raw, width=(int){}, height=(int){}, '
160 |                    'format=(string)BGRx ! '
161 |                    'videoconvert ! appsink').format(width, height)
162 |     else:
163 |         raise RuntimeError('onboard camera source not found!')
164 |     return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
165 | 
166 | 
167 | def open_window(width, height):
168 |     cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
169 |     cv2.resizeWindow(WINDOW_NAME, width, height)
170 |     cv2.moveWindow(WINDOW_NAME, 0, 0)
171 |     cv2.setWindowTitle(WINDOW_NAME, 'Camera Demo for Jetson TX2/TX1')
172 | 
173 | def prepare_image_vgg(frame):
174 |     min_img_size = 224
175 |     transform_pipeline = transforms.Compose([transforms.Resize((min_img_size, min_img_size)),
176 |                                              transforms.ToTensor(),
177 |                                              transforms.Normalize(mean=[0.485, 0.456, 0.406],
178 |                                                                   std=[0.229, 0.224, 0.225])])
179 |     img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
180 |     img = Image.fromarray(img_rgb)
181 |     img = transform_pipeline(img)
182 |     img = img.unsqueeze(0)
183 |     return img
184 | 
185 | def prepare_image_yolo(frame):
186 |     min_img_size = 416
187 |     image = cv2.resize(frame, (min_img_size, min_img_size), interpolation=cv2.INTER_CUBIC)
188 |     image = np.array(image, dtype='float32')
189 |     img = torch.from_numpy(image.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
190 |     return img
191 | 
192 | def show_preds(img, label, averageTime):
193 |     x = 10
194 |     y = 50
195 | 
196 |     font = cv2.FONT_HERSHEY_PLAIN
197 | 
198 |     pred = '{:20s}'.format(label[1])
199 |     cv2.putText(img, pred, (x, y), font, 2, (0, 0, 240), 2, cv2.LINE_AA)
200 |     y += 30
201 |     timeShow = 'AvgTime: {:.4f}'.format(averageTime)
202 |     cv2.putText(img, timeShow, (x, y), font, 2, (0, 0, 240), 2, cv2.LINE_AA)
203 | 
204 |     return img
205 | 
206 | def getVggLabelDic(class_file):
207 |     with open(class_file, "r") as read_file:
208 |         class_idx = json.load(read_file)
209 |         labels = {int(key): value for key, value in class_idx.items()}
210 |     return labels
211 | 
212 | def decodePrediction_vgg(res, labels):
213 |     res = torch.autograd.Variable(res)
214 |     label_index = torch.argmax(res).item()
215 |     return labels[label_index]
216 | 
217 | def getActualDelay(action, model, preprocessed_image, totallayerNo, communication):
218 |     if action == totallayerNo - 1: # local mobile process
219 |         prediction = model(preprocessed_image.cuda())
220 |         return 0, prediction.item()
221 |     else:
222 |         intermediate_output = model(preprocessed_image.cuda(), server=False, partition=action)
223 | 
224 |     data_to_server = [action, intermediate_output.data]
225 |     del intermediate_output
226 | 
227 |     start_time = time.time()
228 |     communication.send_msg(data_to_server)
229 | 
230 |     result = communication.receive_msg()
231 | 
232 |     communication.close_channel()
233 |     end_time = time.time()
234 | 
235 |     return end_time - start_time,  result
236 | 
237 | def load_obj(name):
238 |     with open(name + '.pkl', 'rb') as f:
239 |         return pickle.load(f)
240 | 
241 | if __name__ == '__main__':
242 |     args = parse_args()
243 |     print('Called with args:')
244 |     print(args)
245 |     print('OpenCV version: {}'.format(cv2.__version__))
246 | 
247 |     if args.dnn_model == 'vgg':
248 |         model = vgg16()
249 |         model.eval()
250 |         frontEndDelay = load_obj('models/vgg16FrontEndDelay')
251 |         labels = getVggLabelDic('models/imagenet_class_index.json')
252 |         partitionInfo = vgg_info
253 |     else:
254 |         model = tinyYolo()
255 |         model.eval()
256 |         frontEndDelay = load_obj('models/yoloFrontEndDelay')
257 |         labels = load_class_names('models/voc.names')
258 |         partitionInfo = yolo_info
259 | 
260 |     model.cuda()
261 |     Action_num = len(partitionInfo)
262 | 
263 |     muLinUCB = muLinUCB(mu=0.25, layerInfo=partitionInfo, frontDelay=frontEndDelay)
264 |     communication = clientCommunication(args.host, args.port)
265 | 
266 |     if args.use_rtsp:
267 |         cap = open_cam_rtsp(args.rtsp_uri,
268 |                             args.image_width,
269 |                             args.image_height,
270 |                             args.rtsp_latency)
271 |     elif args.use_usb:
272 |         cap = open_cam_usb(args.video_dev,
273 |                            args.image_width,
274 |                            args.image_height)
275 |     else:  # by default, use the Jetson onboard camera
276 |         cap = open_cam_onboard(args.image_width, args.image_height)
277 |         # cap = cv2.VideoCapture(0)
278 | 
279 |     if not cap.isOpened():
280 |         sys.exit('Failed to open camera!')
281 | 
282 |     open_window(args.image_width, args.image_height)
283 | 
284 |     show_help = True
285 |     full_scrn = False
286 |     help_text = '"Esc" to Quit, "H" for Help, "F" to Toggle Fullscreen'
287 |     font = cv2.FONT_HERSHEY_PLAIN
288 | 
289 |     total_time = 0
290 |     total_frame_num = 0
291 |     currentFrameNum = 0
292 |     keyflag = False
293 |     KeyFrame = KeyFrameDetection(threshold=0.8)
294 | 
295 |     while True:
296 |         if cv2.getWindowProperty(WINDOW_NAME, 0) < 0:
297 |             # Check to see if the user has closed the window
298 |             # If yes, terminate the program
299 |             break
300 |         _, img = cap.read()  # grab the next image frame from camera
301 | 
302 |         if args.dnn_model == 'vgg':
303 |             preprocessed_image = prepare_image_vgg(img)
304 |         else:
305 |             preprocessed_image = prepare_image_yolo(img)
306 | 
307 |         # doubling trick is here.
308 |         currentFrameNum = currentFrameNum + 1
309 |         if muLinUCB.updateDoublingTrickFrameNum(currentFrameNum):
310 |             currentFrameNum = 0
311 | 
312 |         # key frame detection
313 |         if total_frame_num == 0:
314 |             keyflag = False
315 |             old_frame = np.copy(img)
316 |         else:
317 |             keyflag = KeyFrame.compare_images(old_frame, img)
318 |             old_frame = np.copy(img)
319 | 
320 |         # print('keyflag', keyflag)
321 | 
322 |         partitionPoint = muLinUCB.getEstimationAction(keyflag, currentFrameNum)
323 |         # print('partitionPoint', partitionPoint)
324 | 
325 |         end2endtime_start = time.time()
326 |         actual_delay, res = getActualDelay(partitionPoint, model, preprocessed_image, Action_num, communication)
327 | 
328 |         end2endtime_end = time.time()
329 | 
330 |         total_frame_num = total_frame_num + 1
331 |         total_time = total_time + (end2endtime_end - end2endtime_start)
332 |         average_time = total_time/total_frame_num
333 | 
334 |         # update A and b
335 |         muLinUCB.updateA_b(partitionPoint, actual_delay)
336 | 
337 |         # print results on  the screen
338 |         if args.dnn_model == 'vgg':
339 |             label = decodePrediction_vgg(res, labels)
340 |             img = show_preds(img, label, average_time)
341 |         else:
342 |             boxes = get_boxes(res, model, conf_thresh=0.5, nms_thresh=0.5)
343 |             img = plot_boxes_cv2(img, boxes, class_names=labels)
344 | 
345 |         if show_help:
346 |             cv2.putText(img, help_text, (11, 20), font,
347 |                         1.0, (32, 32, 32), 4, cv2.LINE_AA)
348 |             cv2.putText(img, help_text, (10, 20), font,
349 |                         1.0, (240, 240, 240), 1, cv2.LINE_AA)
350 | 
351 |         cv2.imshow(WINDOW_NAME, img)
352 | 
353 |         key = cv2.waitKey(10)
354 |         if key == 27: # ESC key: quit program
355 |             break
356 |         elif key == ord('H') or key == ord('h'): # toggle help message
357 |             show_help = not show_help
358 |         elif key == ord('F') or key == ord('f'): # toggle fullscreen
359 |             full_scrn = not full_scrn
360 |             if full_scrn:
361 |                 cv2.setWindowProperty(WINDOW_NAME, cv2.WND_PROP_FULLSCREEN,
362 |                                       cv2.WINDOW_FULLSCREEN)
363 |             else:
364 |                 cv2.setWindowProperty(WINDOW_NAME, cv2.WND_PROP_FULLSCREEN,
365 |                                       cv2.WINDOW_NORMAL)
366 |         
367 | 
368 |     cap.release()
369 |     cv2.destroyAllWindows()
370 | 


--------------------------------------------------------------------------------
/communication.py:
--------------------------------------------------------------------------------
 1 | import struct
 2 | import pickle
 3 | import socket
 4 | 
 5 | def recv_msg(sock):
 6 |     # Read message length and unpack it into an integer
 7 |     raw_msglen = recvall(sock, 4)
 8 |     if not raw_msglen:
 9 |         return None
10 |     msglen = struct.unpack('>I', raw_msglen)[0]
11 |     # Read the message data
12 |     return recvall(sock, msglen)
13 | 
14 | def recvall(sock, n):
15 |     # Helper function to recv n bytes or return None if EOF is hit
16 |     data = b''
17 |     while len(data) < n:
18 |         packet = sock.recv(n - len(data))
19 |         if not packet:
20 |             return None
21 |         data += packet
22 |     return data
23 | 
24 | def send_msg(sock, msg):
25 |     # Prefix each message with a 4-byte length (network byte order)
26 |     msg = struct.pack('>I', len(msg)) + msg
27 |     sock.sendall(msg)
28 | 
29 | def decode_msg(msg):
30 |     res = pickle.loads(msg)
31 |     return res
32 | 
33 | def encode_msg(data):
34 |     msg = pickle.dumps(data)
35 |     return msg
36 | 
37 | class clientCommunication():
38 |     def __init__(self, host, port):
39 |         self.host = host
40 |         self.port = port
41 |         self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
42 | 
43 |     def send_msg(self, msg):
44 |         msg = encode_msg(msg)
45 |         self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
46 |         self.s.connect((self.host, self.port))
47 |         send_msg(self.s, msg)
48 | 
49 |     def receive_msg(self):
50 |         received = recv_msg(self.s)
51 |         received = decode_msg(received)
52 |         return received
53 | 
54 |     def close_channel(self):
55 |         self.s.close()
56 | 
57 | class serverCommunication():
58 |     def __init__(self, host, port):
59 |         self.host = host
60 |         self.port = port
61 |         self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
62 |         self.s.bind((self.host, self.port))
63 |         self.s.listen()
64 | 
65 |     def send_msg(self, conn, msg):
66 |         msg = encode_msg(msg)
67 |         send_msg(conn, msg)
68 | 
69 |     def receive_msg(self, conn):
70 |         received = recv_msg(conn)
71 |         received = decode_msg(received)
72 |         return received
73 | 
74 |     def accept_conn(self):
75 |         conn, addr = self.s.accept()
76 |         return conn, addr
77 | 
78 |     def close_channel(self):
79 |         self.s.close()
80 | 


--------------------------------------------------------------------------------
/edge_server_main.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import cv2
 3 | import torchvision.transforms as transforms
 4 | import torch
 5 | 
 6 | from PIL import Image
 7 | 
 8 | from models.vgg16 import vgg16
 9 | from models.tiny_yolo import tinyYolo
10 | from communication import serverCommunication
11 | 
12 | 
13 | WINDOW_NAME = 'CameraDemo'
14 | 
15 | 
16 | def parse_args():
17 |     # Parse input arguments
18 |     desc = 'ANS in edge server side'
19 |     parser = argparse.ArgumentParser(description=desc)
20 |     parser.add_argument('--dnn', dest='dnn_model',
21 |                         help='vgg, yolo',
22 |                         default='yolo', type=str)
23 |     parser.add_argument('--host', dest='host',
24 |                         help='Ip address',
25 |                         default='192.168.1.72', type=str)
26 |     parser.add_argument('--port', dest='port',
27 |                         help='Ip port',
28 |                         default=8080, type=int)
29 |     args = parser.parse_args()
30 |     return args
31 | 
32 | if __name__ == '__main__':
33 |     args = parse_args()
34 |     print('Called with args:')
35 |     print(args)
36 |     print('OpenCV version: {}'.format(cv2.__version__))
37 | 
38 |     if args.dnn_model == 'vgg':
39 |         model = vgg16()
40 |         model.eval()
41 |     else:
42 |         model = tinyYolo()
43 |         model.eval()
44 | 
45 |     model.cuda()
46 | 
47 |     communication = serverCommunication(args.host, args.port)
48 | 
49 |     while True:
50 |         try:
51 |             conn, addr = communication.accept_conn()
52 |             with conn:
53 |                 recv_data = communication.receive_msg(conn)
54 |                 print('receive data from mobile device !!!')
55 |                 partition_point = recv_data[0]
56 |                 data = recv_data[1]
57 |                 data = torch.autograd.Variable(data)
58 |                 prediction = model(data.cuda(), server=True, partition=partition_point)
59 |                 res = prediction.data
60 | 
61 |                 msg = communication.send_msg(conn, res)
62 | 
63 |         except KeyboardInterrupt or TypeError or OSError:
64 |             communication.close_channel()
65 | 


--------------------------------------------------------------------------------
/keyFrameDetection.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from skimage.metrics import structural_similarity as ssim
 3 | import cv2
 4 | 
 5 | class KeyFrameDetection:
 6 |     def __init__(self, threshold):
 7 |         self.threshold = threshold
 8 | 
 9 |     def compare_images(self, imgA, imgB):
10 |         imgA = cv2.cvtColor(imgA, cv2.COLOR_BGR2GRAY)
11 |         imgB = cv2.cvtColor(imgB, cv2.COLOR_BGR2GRAY)
12 |         s = ssim(imgA, imgB)
13 |         # print('ssim:', s)
14 |         if s < self.threshold:
15 |             KeyFlag = True
16 |             return KeyFlag
17 |         KeyFlag = False
18 |         return KeyFlag
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     keyFrameDetection = KeyFrameDetection(0.6)
23 |     imgA = cv2.imread("Golden_Retriever_Hund_Dog.jpg")
24 |     imgB = cv2.imread("Golden_Retriever_Hund_Dog.jpg")
25 | 
26 |     flag = keyFrameDetection.compare_images(imgA, imgB)
27 |     print(flag)
28 | 


--------------------------------------------------------------------------------
/models/Golden_Retriever_Hund_Dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/letian-zhang/ANS/6895eedc0f66c46fed87658d4dc0d34b432a0ec1/models/Golden_Retriever_Hund_Dog.jpg


--------------------------------------------------------------------------------
/models/imagenet_class_index.json:
--------------------------------------------------------------------------------
1 | {"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]}


--------------------------------------------------------------------------------
/models/tiny_yolo.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | import torch
  6 | import math
  7 | from collections import OrderedDict
  8 | 
  9 | 
 10 | def convert2cpu(gpu_matrix):
 11 |     return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
 12 | 
 13 | def load_conv_bn(buf, start, conv_model, bn_model):
 14 |     num_w = conv_model.weight.numel()
 15 |     # print('num_w', num_w)
 16 |     num_b = bn_model.bias.numel()
 17 |     # print('num_b', num_b)
 18 |     bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]))
 19 |     start = start + num_b
 20 |     bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b]))
 21 |     start = start + num_b
 22 |     bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b]))
 23 |     start = start + num_b
 24 |     bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b]))
 25 |     start = start + num_b
 26 |     conv_model.weight.data.copy_(torch.reshape(torch.from_numpy(buf[start:start + num_w]), (
 27 |         conv_model.weight.shape[0], conv_model.weight.shape[1], conv_model.weight.shape[2],
 28 |         conv_model.weight.shape[3])))
 29 |     start = start + num_w
 30 |     # conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])); start = start + num_w
 31 |     return start
 32 | 
 33 | def load_conv(buf, start, conv_model):
 34 |     num_w = conv_model.weight.numel()
 35 |     num_b = conv_model.bias.numel()
 36 |     conv_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b]));   start = start + num_b
 37 |     conv_model.weight.data.copy_(torch.reshape(torch.from_numpy(buf[start:start + num_w]), (
 38 |         conv_model.weight.shape[0], conv_model.weight.shape[1], conv_model.weight.shape[2],
 39 |         conv_model.weight.shape[3])))
 40 |     start = start + num_w
 41 |     return start
 42 | 
 43 | class MaxPoolStride1(nn.Module):
 44 |     def __init__(self):
 45 |         super(MaxPoolStride1, self).__init__()
 46 | 
 47 |     def forward(self, x):
 48 |         x = F.max_pool2d(F.pad(x, (0, 1, 0, 1), mode='replicate'), 2, stride=1)
 49 |         return x
 50 | 
 51 | def bbox_iou(box1, box2, x1y1x2y2=True):
 52 |     if x1y1x2y2:
 53 |         mx = min(box1[0], box2[0])
 54 |         Mx = max(box1[2], box2[2])
 55 |         my = min(box1[1], box2[1])
 56 |         My = max(box1[3], box2[3])
 57 |         w1 = box1[2] - box1[0]
 58 |         h1 = box1[3] - box1[1]
 59 |         w2 = box2[2] - box2[0]
 60 |         h2 = box2[3] - box2[1]
 61 |     else:
 62 |         mx = min(box1[0]-box1[2]/2.0, box2[0]-box2[2]/2.0)
 63 |         Mx = max(box1[0]+box1[2]/2.0, box2[0]+box2[2]/2.0)
 64 |         my = min(box1[1]-box1[3]/2.0, box2[1]-box2[3]/2.0)
 65 |         My = max(box1[1]+box1[3]/2.0, box2[1]+box2[3]/2.0)
 66 |         w1 = box1[2]
 67 |         h1 = box1[3]
 68 |         w2 = box2[2]
 69 |         h2 = box2[3]
 70 |     uw = Mx - mx
 71 |     uh = My - my
 72 |     cw = w1 + w2 - uw
 73 |     ch = h1 + h2 - uh
 74 |     if cw <= 0 or ch <= 0:
 75 |         return 0.0
 76 | 
 77 |     area1 = w1 * h1
 78 |     area2 = w2 * h2
 79 |     carea = cw * ch
 80 |     uarea = area1 + area2 - carea
 81 |     return carea/uarea
 82 | 
 83 | def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
 84 |     if x1y1x2y2:
 85 |         mx = torch.min(boxes1[0], boxes2[0])
 86 |         Mx = torch.max(boxes1[2], boxes2[2])
 87 |         my = torch.min(boxes1[1], boxes2[1])
 88 |         My = torch.max(boxes1[3], boxes2[3])
 89 |         w1 = boxes1[2] - boxes1[0]
 90 |         h1 = boxes1[3] - boxes1[1]
 91 |         w2 = boxes2[2] - boxes2[0]
 92 |         h2 = boxes2[3] - boxes2[1]
 93 |     else:
 94 |         mx = torch.min(boxes1[0]-boxes1[2]/2.0, boxes2[0]-boxes2[2]/2.0)
 95 |         Mx = torch.max(boxes1[0]+boxes1[2]/2.0, boxes2[0]+boxes2[2]/2.0)
 96 |         my = torch.min(boxes1[1]-boxes1[3]/2.0, boxes2[1]-boxes2[3]/2.0)
 97 |         My = torch.max(boxes1[1]+boxes1[3]/2.0, boxes2[1]+boxes2[3]/2.0)
 98 |         w1 = boxes1[2]
 99 |         h1 = boxes1[3]
100 |         w2 = boxes2[2]
101 |         h2 = boxes2[3]
102 |     uw = Mx - mx
103 |     uh = My - my
104 |     cw = w1 + w2 - uw
105 |     ch = h1 + h2 - uh
106 |     mask = ((cw <= 0) + (ch <= 0) > 0)
107 |     area1 = w1 * h1
108 |     area2 = w2 * h2
109 |     carea = cw * ch
110 |     carea[mask] = 0
111 |     uarea = area1 + area2 - carea
112 |     return carea/uarea
113 | 
114 | def build_targets(pred_boxes, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale, sil_thresh, seen):
115 |     nB = target.size(0)
116 |     nA = num_anchors
117 |     nC = num_classes
118 |     anchor_step = len(anchors)/num_anchors
119 |     conf_mask  = torch.ones(nB, nA, nH, nW) * noobject_scale
120 |     coord_mask = torch.zeros(nB, nA, nH, nW)
121 |     cls_mask   = torch.zeros(nB, nA, nH, nW)
122 |     tx         = torch.zeros(nB, nA, nH, nW)
123 |     ty         = torch.zeros(nB, nA, nH, nW)
124 |     tw         = torch.zeros(nB, nA, nH, nW)
125 |     th         = torch.zeros(nB, nA, nH, nW)
126 |     tconf      = torch.zeros(nB, nA, nH, nW)
127 |     tcls       = torch.zeros(nB, nA, nH, nW)
128 | 
129 |     nAnchors = nA*nH*nW
130 |     nPixels  = nH*nW
131 |     for b in range(nB):
132 |         cur_pred_boxes = pred_boxes[b*nAnchors:(b+1)*nAnchors].t()
133 |         cur_ious = torch.zeros(nAnchors)
134 |         for t in range(50):
135 |             if target[b][t*5+1] == 0:
136 |                 break
137 |             gx = target[b][t*5+1]*nW
138 |             gy = target[b][t*5+2]*nH
139 |             gw = target[b][t*5+3]*nW
140 |             gh = target[b][t*5+4]*nH
141 |             cur_gt_boxes = torch.FloatTensor([gx,gy,gw,gh]).repeat(nAnchors,1).t()
142 |             cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
143 |         conf_mask[b][cur_ious>sil_thresh] = 0
144 |     if seen < 12800:
145 |        if anchor_step == 4:
146 |            tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1,nA,1,1).repeat(nB,1,nH,nW)
147 |            ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view(1,nA,1,1).repeat(nB,1,nH,nW)
148 |        else:
149 |            tx.fill_(0.5)
150 |            ty.fill_(0.5)
151 |        tw.zero_()
152 |        th.zero_()
153 |        coord_mask.fill_(1)
154 | 
155 |     nGT = 0
156 |     nCorrect = 0
157 |     for b in range(nB):
158 |         for t in range(50):
159 |             if target[b][t*5+1] == 0:
160 |                 break
161 |             nGT = nGT + 1
162 |             best_iou = 0.0
163 |             best_n = -1
164 |             min_dist = 10000
165 |             gx = target[b][t*5+1] * nW
166 |             gy = target[b][t*5+2] * nH
167 |             gi = int(gx)
168 |             gj = int(gy)
169 |             gw = target[b][t*5+3]*nW
170 |             gh = target[b][t*5+4]*nH
171 |             gt_box = [0, 0, gw, gh]
172 |             for n in range(nA):
173 |                 aw = anchors[anchor_step*n]
174 |                 ah = anchors[anchor_step*n+1]
175 |                 anchor_box = [0, 0, aw, ah]
176 |                 iou  = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
177 |                 if anchor_step == 4:
178 |                     ax = anchors[anchor_step*n+2]
179 |                     ay = anchors[anchor_step*n+3]
180 |                     dist = pow(((gi+ax) - gx), 2) + pow(((gj+ay) - gy), 2)
181 |                 if iou > best_iou:
182 |                     best_iou = iou
183 |                     best_n = n
184 |                 elif anchor_step==4 and iou == best_iou and dist < min_dist:
185 |                     best_iou = iou
186 |                     best_n = n
187 |                     min_dist = dist
188 | 
189 |             gt_box = [gx, gy, gw, gh]
190 |             pred_box = pred_boxes[b*nAnchors+best_n*nPixels+gj*nW+gi]
191 | 
192 |             coord_mask[b][best_n][gj][gi] = 1
193 |             cls_mask[b][best_n][gj][gi] = 1
194 |             conf_mask[b][best_n][gj][gi] = object_scale
195 |             tx[b][best_n][gj][gi] = target[b][t*5+1] * nW - gi
196 |             ty[b][best_n][gj][gi] = target[b][t*5+2] * nH - gj
197 |             tw[b][best_n][gj][gi] = math.log(gw/anchors[anchor_step*best_n])
198 |             th[b][best_n][gj][gi] = math.log(gh/anchors[anchor_step*best_n+1])
199 |             iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) # best_iou
200 |             tconf[b][best_n][gj][gi] = iou
201 |             tcls[b][best_n][gj][gi] = target[b][t*5]
202 |             if iou > 0.5:
203 |                 nCorrect = nCorrect + 1
204 | 
205 |     return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls
206 | 
207 | class RegionLoss(nn.Module):
208 |     def __init__(self, num_classes=0, anchors=[], num_anchors=1):
209 |         super(RegionLoss, self).__init__()
210 |         self.num_classes = num_classes
211 |         self.anchors = anchors
212 |         self.num_anchors = num_anchors
213 |         self.anchor_step = len(anchors)/num_anchors
214 |         self.coord_scale = 1
215 |         self.noobject_scale = 1
216 |         self.object_scale = 5
217 |         self.class_scale = 1
218 |         self.thresh = 0.6
219 |         self.seen = 0
220 | 
221 |     def forward(self, output, target):
222 |         #output : BxAs*(4+1+num_classes)*H*W
223 |         nB = output.data.size(0)
224 |         nA = self.num_anchors
225 |         nC = self.num_classes
226 |         nH = output.data.size(2)
227 |         nW = output.data.size(3)
228 | 
229 |         output   = output.view(nB, nA, (5+nC), nH, nW)
230 |         x    = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
231 |         y    = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
232 |         w    = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
233 |         h    = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
234 |         conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
235 |         cls  = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))
236 |         cls  = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
237 | 
238 |         pred_boxes = torch.cuda.FloatTensor(4, nB*nA*nH*nW)
239 |         grid_x = torch.linspace(0, nW-1, nW).repeat(nH,1).repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
240 |         grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
241 |         anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
242 |         anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
243 |         anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH*nW).view(nB*nA*nH*nW)
244 |         anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH*nW).view(nB*nA*nH*nW)
245 |         pred_boxes[0] = x.data + grid_x
246 |         pred_boxes[1] = y.data + grid_y
247 |         pred_boxes[2] = torch.exp(w.data) * anchor_w
248 |         pred_boxes[3] = torch.exp(h.data) * anchor_h
249 |         pred_boxes = convert2cpu(pred_boxes.transpose(0,1).contiguous().view(-1,4))
250 | 
251 |         nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf,tcls = build_targets(pred_boxes, target.data, self.anchors, nA, nC, \
252 |                                                                nH, nW, self.noobject_scale, self.object_scale, self.thresh, self.seen)
253 |         cls_mask = (cls_mask == 1)
254 |         nProposals = int((conf > 0.25).sum().data[0])
255 | 
256 |         tx    = Variable(tx.cuda())
257 |         ty    = Variable(ty.cuda())
258 |         tw    = Variable(tw.cuda())
259 |         th    = Variable(th.cuda())
260 |         tconf = Variable(tconf.cuda())
261 |         tcls  = Variable(tcls.view(-1)[cls_mask].long().cuda())
262 | 
263 |         coord_mask = Variable(coord_mask.cuda())
264 |         conf_mask  = Variable(conf_mask.cuda().sqrt())
265 |         cls_mask   = Variable(cls_mask.view(-1, 1).repeat(1,nC).cuda())
266 |         cls        = cls[cls_mask].view(-1, nC)
267 | 
268 |         loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x*coord_mask, tx*coord_mask)/2.0
269 |         loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y*coord_mask, ty*coord_mask)/2.0
270 |         loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w*coord_mask, tw*coord_mask)/2.0
271 |         loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h*coord_mask, th*coord_mask)/2.0
272 |         loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2.0
273 |         loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
274 |         loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
275 | 
276 |         return loss
277 | 
278 | 
279 | class TinyYoloNet(nn.Module):
280 |     def __init__(self):
281 |         super(TinyYoloNet, self).__init__()
282 |         self.seen = 0
283 |         self.num_classes = 20
284 |         self.anchors = [1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52]
285 |         self.num_anchors = len(self.anchors)/2
286 |         self.num_output = (5+self.num_classes)*self.num_anchors
287 | 
288 |         self.loss = RegionLoss(self.num_classes, self.anchors, self.num_anchors)
289 |         self.cnn = nn.Sequential(OrderedDict([
290 |             # conv1
291 |             ('conv1', nn.Conv2d(3, 16, 3, 1, 1, bias=False)),
292 |             ('bn1', nn.BatchNorm2d(16)),
293 |             ('leaky1', nn.LeakyReLU(0.1, inplace=True)),
294 |             ('pool1', nn.MaxPool2d(2, 2)),
295 | 
296 |             # conv2
297 |             ('conv2', nn.Conv2d(16, 32, 3, 1, 1, bias=False)),
298 |             ('bn2', nn.BatchNorm2d(32)),
299 |             ('leaky2', nn.LeakyReLU(0.1, inplace=True)),
300 |             ('pool2', nn.MaxPool2d(2, 2)),
301 | 
302 |             # conv3
303 |             ('conv3', nn.Conv2d(32, 64, 3, 1, 1, bias=False)),
304 |             ('bn3', nn.BatchNorm2d(64)),
305 |             ('leaky3', nn.LeakyReLU(0.1, inplace=True)),
306 |             ('pool3', nn.MaxPool2d(2, 2)),
307 | 
308 |             # conv4
309 |             ('conv4', nn.Conv2d(64, 128, 3, 1, 1, bias=False)),
310 |             ('bn4', nn.BatchNorm2d(128)),
311 |             ('leaky4', nn.LeakyReLU(0.1, inplace=True)),
312 |             ('pool4', nn.MaxPool2d(2, 2)),
313 | 
314 |             # conv5
315 |             ('conv5', nn.Conv2d(128, 256, 3, 1, 1, bias=False)),
316 |             ('bn5', nn.BatchNorm2d(256)),
317 |             ('leaky5', nn.LeakyReLU(0.1, inplace=True)),
318 |             ('pool5', nn.MaxPool2d(2, 2)),
319 | 
320 |             # conv6
321 |             ('conv6', nn.Conv2d(256, 512, 3, 1, 1, bias=False)),
322 |             ('bn6', nn.BatchNorm2d(512)),
323 |             ('leaky6', nn.LeakyReLU(0.1, inplace=True)),
324 |             ('pool6', MaxPoolStride1()),
325 | 
326 |             # conv7
327 |             ('conv7', nn.Conv2d(512, 1024, 3, 1, 1, bias=False)),
328 |             ('bn7', nn.BatchNorm2d(1024)),
329 |             ('leaky7', nn.LeakyReLU(0.1, inplace=True)),
330 | 
331 |             # conv8
332 |             ('conv8', nn.Conv2d(1024, 1024, 3, 1, 1, bias=False)),
333 |             ('bn8', nn.BatchNorm2d(1024)),
334 |             ('leaky8', nn.LeakyReLU(0.1, inplace=True)),
335 | 
336 |             # output
337 |             ('output', nn.Conv2d(1024, int(self.num_output), 1, 1, 0)),
338 |         ]))
339 | 
340 |     def forward(self, x, server=True, partition=0):
341 |         if server == True:
342 |             if partition == 0:
343 |                 x = self.cnn(x)
344 |             else:
345 |                 x = self.cnn[partition:](x)
346 |         else:
347 |             if partition == 0:
348 |                 x = x
349 |             else:
350 |                 x = self.cnn[0:partition](x)
351 |         return x
352 | 
353 |     def print_network(self):
354 |         print(self)
355 | 
356 |     def load_weights(self, path):
357 | 
358 |         buf = np.fromfile(path, dtype = np.float32)
359 |         start = 4
360 |         #print(buf.shape)
361 |         start = load_conv_bn(buf, start, self.cnn[0], self.cnn[1], )
362 |         start = load_conv_bn(buf, start, self.cnn[4], self.cnn[5])
363 |         start = load_conv_bn(buf, start, self.cnn[8], self.cnn[9])
364 |         start = load_conv_bn(buf, start, self.cnn[12], self.cnn[13])
365 |         start = load_conv_bn(buf, start, self.cnn[16], self.cnn[17])
366 |         start = load_conv_bn(buf, start, self.cnn[20], self.cnn[21])
367 | 
368 |         start = load_conv_bn(buf, start, self.cnn[24], self.cnn[25])
369 |         start = load_conv_bn(buf, start, self.cnn[27], self.cnn[28])
370 |         start = load_conv(buf, start, self.cnn[30])
371 | 
372 | def tinyYolo():
373 |     m = TinyYoloNet()
374 |     m.float()
375 |     m.load_weights('C:/Users/zhang/PycharmProjects/WorkBoard/partirion_inference/models/yolov2-tiny-voc.weights')
376 |     return m
377 | 
378 | 
379 | if __name__ == '__main__':
380 |     from PIL import Image
381 |     from utils import *
382 | 
383 |     m = tinyYolo()
384 |     m.eval()
385 |     # print(m)
386 |     
387 |     use_cuda = 1
388 |     if use_cuda:
389 |         m.cuda()
390 | 
391 |     img = Image.open('Golden_Retriever_Hund_Dog.jpg').convert('RGB')
392 |     sized = img.resize((416, 416))
393 | 
394 |     boxes = do_detect(m, sized, 0.5, 0.5, use_cuda)
395 | 
396 |     class_names = load_class_names('voc.names')
397 |     plot_boxes(img, boxes, 'predict1.jpg', class_names)  
398 | 
399 | 


--------------------------------------------------------------------------------
/models/utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import time
  4 | import math
  5 | import torch
  6 | import numpy as np
  7 | from PIL import Image, ImageDraw, ImageFont
  8 | from torch.autograd import Variable
  9 | 
 10 | import struct
 11 | import imghdr
 12 | 
 13 | def sigmoid(x):
 14 |     return 1.0/(math.exp(-x)+1.)
 15 | 
 16 | def softmax(x):
 17 |     x = torch.exp(x - torch.max(x))
 18 |     x = x/x.sum()
 19 |     return x
 20 | 
 21 | 
 22 | def bbox_iou(box1, box2, x1y1x2y2=True):
 23 |     if x1y1x2y2:
 24 |         mx = min(box1[0], box2[0])
 25 |         Mx = max(box1[2], box2[2])
 26 |         my = min(box1[1], box2[1])
 27 |         My = max(box1[3], box2[3])
 28 |         w1 = box1[2] - box1[0]
 29 |         h1 = box1[3] - box1[1]
 30 |         w2 = box2[2] - box2[0]
 31 |         h2 = box2[3] - box2[1]
 32 |     else:
 33 |         mx = min(box1[0]-box1[2]/2.0, box2[0]-box2[2]/2.0)
 34 |         Mx = max(box1[0]+box1[2]/2.0, box2[0]+box2[2]/2.0)
 35 |         my = min(box1[1]-box1[3]/2.0, box2[1]-box2[3]/2.0)
 36 |         My = max(box1[1]+box1[3]/2.0, box2[1]+box2[3]/2.0)
 37 |         w1 = box1[2]
 38 |         h1 = box1[3]
 39 |         w2 = box2[2]
 40 |         h2 = box2[3]
 41 |     uw = Mx - mx
 42 |     uh = My - my
 43 |     cw = w1 + w2 - uw
 44 |     ch = h1 + h2 - uh
 45 |     carea = 0
 46 |     if cw <= 0 or ch <= 0:
 47 |         return 0.0
 48 | 
 49 |     area1 = w1 * h1
 50 |     area2 = w2 * h2
 51 |     carea = cw * ch
 52 |     uarea = area1 + area2 - carea
 53 |     return carea/uarea
 54 | 
 55 | def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
 56 |     if x1y1x2y2:
 57 |         mx = torch.min(boxes1[0], boxes2[0])
 58 |         Mx = torch.max(boxes1[2], boxes2[2])
 59 |         my = torch.min(boxes1[1], boxes2[1])
 60 |         My = torch.max(boxes1[3], boxes2[3])
 61 |         w1 = boxes1[2] - boxes1[0]
 62 |         h1 = boxes1[3] - boxes1[1]
 63 |         w2 = boxes2[2] - boxes2[0]
 64 |         h2 = boxes2[3] - boxes2[1]
 65 |     else:
 66 |         mx = torch.min(boxes1[0]-boxes1[2]/2.0, boxes2[0]-boxes2[2]/2.0)
 67 |         Mx = torch.max(boxes1[0]+boxes1[2]/2.0, boxes2[0]+boxes2[2]/2.0)
 68 |         my = torch.min(boxes1[1]-boxes1[3]/2.0, boxes2[1]-boxes2[3]/2.0)
 69 |         My = torch.max(boxes1[1]+boxes1[3]/2.0, boxes2[1]+boxes2[3]/2.0)
 70 |         w1 = boxes1[2]
 71 |         h1 = boxes1[3]
 72 |         w2 = boxes2[2]
 73 |         h2 = boxes2[3]
 74 |     uw = Mx - mx
 75 |     uh = My - my
 76 |     cw = w1 + w2 - uw
 77 |     ch = h1 + h2 - uh
 78 |     mask = ((cw <= 0) + (ch <= 0) > 0)
 79 |     area1 = w1 * h1
 80 |     area2 = w2 * h2
 81 |     carea = cw * ch
 82 |     carea[mask] = 0
 83 |     uarea = area1 + area2 - carea
 84 |     return carea/uarea
 85 | 
 86 | def nms(boxes, nms_thresh):
 87 |     if len(boxes) == 0:
 88 |         return boxes
 89 | 
 90 |     det_confs = torch.zeros(len(boxes))
 91 |     for i in range(len(boxes)):
 92 |         det_confs[i] = 1-boxes[i][4]                
 93 | 
 94 |     _,sortIds = torch.sort(det_confs)
 95 |     out_boxes = []
 96 |     for i in range(len(boxes)):
 97 |         box_i = boxes[sortIds[i]]
 98 |         if box_i[4] > 0:
 99 |             out_boxes.append(box_i)
100 |             for j in range(i+1, len(boxes)):
101 |                 box_j = boxes[sortIds[j]]
102 |                 if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh:
103 |                     #print(box_i, box_j, bbox_iou(box_i, box_j, x1y1x2y2=False))
104 |                     box_j[4] = 0
105 |     return out_boxes
106 | 
107 | def convert2cpu(gpu_matrix):
108 |     return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
109 | 
110 | def convert2cpu_long(gpu_matrix):
111 |     return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
112 | 
113 | def get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False):
114 |     num_anchors = int(num_anchors)
115 |     anchor_step = int(len(anchors)/num_anchors)
116 |     if output.dim() == 3:
117 |         output = output.unsqueeze(0)
118 |     batch = output.size(0)
119 |     assert(output.size(1) == (5+num_classes)*num_anchors)
120 |     h = output.size(2)
121 |     w = output.size(3)
122 | 
123 |     t0 = time.time()
124 |     all_boxes = []
125 |     output = output.view(int(batch*num_anchors), int(5+num_classes), int(h*w)).transpose(0, 1).contiguous().view(int(5+num_classes), int(batch*num_anchors*h*w))
126 | 
127 |     grid_x = torch.linspace(0, w-1, w).repeat(h,1).repeat(int(batch*num_anchors), 1, 1).view(int(batch*num_anchors*h*w)).cuda()
128 |     grid_y = torch.linspace(0, h-1, h).repeat(w,1).t().repeat(int(batch*num_anchors), 1, 1).view(int(batch*num_anchors*h*w)).cuda()
129 |     xs = torch.sigmoid(output[0]) + grid_x
130 |     ys = torch.sigmoid(output[1]) + grid_y
131 | 
132 |     anchor_w = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([0]))
133 |     anchor_h = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([1]))
134 |     anchor_w = anchor_w.repeat(batch, 1).repeat(1, 1, h*w).view(batch*num_anchors*h*w).cuda()
135 |     anchor_h = anchor_h.repeat(batch, 1).repeat(1, 1, h*w).view(batch*num_anchors*h*w).cuda()
136 |     ws = torch.exp(output[2]) * anchor_w
137 |     hs = torch.exp(output[3]) * anchor_h
138 | 
139 |     det_confs = torch.sigmoid(output[4])
140 | 
141 |     cls_confs = torch.nn.Softmax()(Variable(output[5:5+num_classes].transpose(0, 1))).data
142 |     cls_max_confs, cls_max_ids = torch.max(cls_confs, 1)
143 |     cls_max_confs = cls_max_confs.view(-1)
144 |     cls_max_ids = cls_max_ids.view(-1)
145 |     t1 = time.time()
146 |     
147 |     sz_hw = h*w
148 |     sz_hwa = sz_hw*num_anchors
149 |     det_confs = convert2cpu(det_confs)
150 |     cls_max_confs = convert2cpu(cls_max_confs)
151 |     cls_max_ids = convert2cpu_long(cls_max_ids)
152 |     xs = convert2cpu(xs)
153 |     ys = convert2cpu(ys)
154 |     ws = convert2cpu(ws)
155 |     hs = convert2cpu(hs)
156 |     if validation:
157 |         cls_confs = convert2cpu(cls_confs.view(-1, num_classes))
158 |     t2 = time.time()
159 |     for b in range(batch):
160 |         boxes = []
161 |         for cy in range(h):
162 |             for cx in range(w):
163 |                 for i in range(num_anchors):
164 |                     ind = b*sz_hwa + i*sz_hw + cy*w + cx
165 |                     det_conf =  det_confs[ind]
166 |                     if only_objectness:
167 |                         conf =  det_confs[ind]
168 |                     else:
169 |                         conf = det_confs[ind] * cls_max_confs[ind]
170 |     
171 |                     if conf > conf_thresh:
172 |                         bcx = xs[ind]
173 |                         bcy = ys[ind]
174 |                         bw = ws[ind]
175 |                         bh = hs[ind]
176 |                         cls_max_conf = cls_max_confs[ind]
177 |                         cls_max_id = cls_max_ids[ind]
178 |                         box = [bcx/w, bcy/h, bw/w, bh/h, det_conf, cls_max_conf, cls_max_id]
179 |                         if (not only_objectness) and validation:
180 |                             for c in range(num_classes):
181 |                                 tmp_conf = cls_confs[ind][c]
182 |                                 if c != cls_max_id and det_confs[ind]*tmp_conf > conf_thresh:
183 |                                     box.append(tmp_conf)
184 |                                     box.append(c)
185 |                         boxes.append(box)
186 |         all_boxes.append(boxes)
187 |     t3 = time.time()
188 |     if False:
189 |         print('---------------------------------')
190 |         print('matrix computation : %f' % (t1-t0))
191 |         print('        gpu to cpu : %f' % (t2-t1))
192 |         print('      boxes filter : %f' % (t3-t2))
193 |         print('---------------------------------')
194 |     return all_boxes
195 | 
196 | def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
197 |     import cv2
198 |     colors = torch.FloatTensor([[1,0,1],[0,0,1],[0,1,1],[0,1,0],[1,1,0],[1,0,0]]);
199 |     def get_color(c, x, max_val):
200 |         ratio = float(x)/max_val * 5
201 |         i = int(math.floor(ratio))
202 |         j = int(math.ceil(ratio))
203 |         ratio = ratio - i
204 |         r = (1-ratio) * colors[i][c] + ratio*colors[j][c]
205 |         return int(r*255)
206 | 
207 |     width = img.shape[1]
208 |     height = img.shape[0]
209 |     for i in range(len(boxes)):
210 |         box = boxes[i]
211 |         x1 = int(((box[0] - box[2]/2.0) * width))
212 |         y1 = int(((box[1] - box[3]/2.0) * height))
213 |         x2 = int(((box[0] + box[2]/2.0) * width))
214 |         y2 = int(((box[1] + box[3]/2.0) * height))
215 | 
216 |         if color:
217 |             rgb = color
218 |         else:
219 |             rgb = (255, 0, 0)
220 |         if len(box) >= 7 and class_names:
221 |             cls_conf = box[5]
222 |             cls_id = box[6]
223 |             print('%s: %f' % (class_names[cls_id], cls_conf))
224 |             classes = len(class_names)
225 |             offset = cls_id * 123457 % classes
226 |             red   = get_color(2, offset, classes)
227 |             green = get_color(1, offset, classes)
228 |             blue  = get_color(0, offset, classes)
229 |             if color is None:
230 |                 rgb = (red, green, blue)
231 |             img = cv2.putText(img, class_names[cls_id], (x1,y1), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 240), 2)
232 |             #img = cv2.putText(img, class_names[cls_id], (x1,y1), cv2.FONT_HERSHEY_SIMPLEX, 2, rgb, 2)
233 |         #img = cv2.rectangle(img, (x1,y1), (x2,y2), rgb, 2)
234 |         img = cv2.rectangle(img, (x1,y1), (x2,y2), (0, 0, 240), 2)
235 |     if savename:
236 |         print("save plot results to %s" % savename)
237 |         cv2.imwrite(savename, img)
238 |     return img
239 | 
240 | def plot_boxes(img, boxes, savename=None, class_names=None):
241 |     colors = torch.FloatTensor([[1,0,1],[0,0,1],[0,1,1],[0,1,0],[1,1,0],[1,0,0]]);
242 |     def get_color(c, x, max_val):
243 |         ratio = float(x)/max_val * 5
244 |         i = int(math.floor(ratio))
245 |         j = int(math.ceil(ratio))
246 |         ratio = ratio - i
247 |         r = (1-ratio) * colors[i][c] + ratio*colors[j][c]
248 |         return int(r*255)
249 | 
250 |     width = img.width
251 |     height = img.height
252 |     draw = ImageDraw.Draw(img)
253 |     for i in range(len(boxes)):
254 |         box = boxes[i]
255 |         x1 = (box[0] - box[2]/2.0) * width
256 |         y1 = (box[1] - box[3]/2.0) * height
257 |         x2 = (box[0] + box[2]/2.0) * width
258 |         y2 = (box[1] + box[3]/2.0) * height
259 | 
260 |         rgb = (255, 0, 0)
261 |         if len(box) >= 7 and class_names:
262 |             cls_conf = box[5]
263 |             cls_id = box[6]
264 |             print('%s: %f' % (class_names[cls_id], cls_conf))
265 |             classes = len(class_names)
266 |             offset = cls_id * 123457 % classes
267 |             red   = get_color(2, offset, classes)
268 |             green = get_color(1, offset, classes)
269 |             blue  = get_color(0, offset, classes)
270 |             rgb = (red, green, blue)
271 |             draw.text((x1, y1), class_names[cls_id], fill=rgb)
272 |         draw.rectangle([x1, y1, x2, y2], outline = rgb)
273 |     if savename:
274 |         print("save plot results to %s" % savename)
275 |         img.save(savename)
276 |     return img
277 | 
278 | def read_truths(lab_path):
279 |     if not os.path.exists(lab_path):
280 |         return np.array([])
281 |     if os.path.getsize(lab_path):
282 |         truths = np.loadtxt(lab_path)
283 |         truths = truths.reshape(truths.size/5, 5) # to avoid single truth problem
284 |         return truths
285 |     else:
286 |         return np.array([])
287 | 
288 | def read_truths_args(lab_path, min_box_scale):
289 |     truths = read_truths(lab_path)
290 |     new_truths = []
291 |     for i in range(truths.shape[0]):
292 |         if truths[i][3] < min_box_scale:
293 |             continue
294 |         new_truths.append([truths[i][0], truths[i][1], truths[i][2], truths[i][3], truths[i][4]])
295 |     return np.array(new_truths)
296 | 
297 | def load_class_names(namesfile):
298 |     class_names = []
299 |     with open(namesfile, 'r') as fp:
300 |         lines = fp.readlines()
301 |     for line in lines:
302 |         line = line.rstrip()
303 |         class_names.append(line)
304 |     return class_names
305 | 
306 | def image2torch(img):
307 |     width = img.width
308 |     height = img.height
309 |     img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
310 |     img = img.view(height, width, 3).transpose(0,1).transpose(0,2).contiguous()
311 |     img = img.view(1, 3, height, width)
312 |     img = img.float().div(255.0)
313 |     return img
314 | 
315 | def partirion_output(model, img, action):
316 |     model.eval()
317 |     img = torch.from_numpy(img.transpose(2,0,1)).float().div(255.0).unsqueeze(0)
318 |     img = Variable(img)
319 |     with torch.no_grad():
320 |         output = model(img.cuda(), mode=action, client=True)
321 |         output = output.data
322 |         del img
323 |     return output
324 | 
325 | def get_boxes(res, model, conf_thresh, nms_thresh):
326 | 
327 |     boxes = get_region_boxes(res, conf_thresh, model.num_classes, model.anchors, model.num_anchors)[0]
328 | 
329 |     boxes = nms(boxes, nms_thresh)
330 | 
331 |     return boxes
332 | 
333 | def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1):
334 |     model.eval()
335 |     t0 = time.time()
336 | 
337 |     if isinstance(img, Image.Image):
338 |         width = img.width
339 |         height = img.height
340 |         img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
341 |         img = img.view(height, width, 3).transpose(0,1).transpose(0,2).contiguous()
342 |         img = img.view(1, 3, height, width)
343 |         img = img.float().div(255.0)
344 |     elif type(img) == np.ndarray: # cv2 image
345 |         img = torch.from_numpy(img.transpose(2,0,1)).float().div(255.0).unsqueeze(0)
346 |     else:
347 |         print("unknow image type")
348 |         exit(-1)
349 | 
350 |     t1 = time.time()
351 | 
352 |     if use_cuda:
353 |         img = img.cuda()
354 |     img = torch.autograd.Variable(img)
355 |     t2 = time.time()
356 | 
357 |     output = model(img)
358 |     output = output.data
359 |     #for j in range(100):
360 |     #    sys.stdout.write('%f ' % (output.storage()[j]))
361 |     #print('')
362 |     t3 = time.time()
363 | 
364 |     boxes = get_region_boxes(output, conf_thresh, model.num_classes, model.anchors, model.num_anchors)[0]
365 |     #for j in range(len(boxes)):
366 |     #    print(boxes[j])
367 |     t4 = time.time()
368 | 
369 |     boxes = nms(boxes, nms_thresh)
370 |     t5 = time.time()
371 | 
372 |     if False:
373 |         print('-----------------------------------')
374 |         print(' image to tensor : %f' % (t1 - t0))
375 |         print('  tensor to cuda : %f' % (t2 - t1))
376 |         print('         predict : %f' % (t3 - t2))
377 |         print('get_region_boxes : %f' % (t4 - t3))
378 |         print('             nms : %f' % (t5 - t4))
379 |         print('           total : %f' % (t5 - t0))
380 |         print('-----------------------------------')
381 |     return boxes
382 | 
383 | def read_data_cfg(datacfg):
384 |     options = dict()
385 |     options['gpus'] = '0,1,2,3'
386 |     options['num_workers'] = '10'
387 |     with open(datacfg, 'r') as fp:
388 |         lines = fp.readlines()
389 | 
390 |     for line in lines:
391 |         line = line.strip()
392 |         if line == '':
393 |             continue
394 |         key,value = line.split('=')
395 |         key = key.strip()
396 |         value = value.strip()
397 |         options[key] = value
398 |     return options
399 | 
400 | def scale_bboxes(bboxes, width, height):
401 |     import copy
402 |     dets = copy.deepcopy(bboxes)
403 |     for i in range(len(dets)):
404 |         dets[i][0] = dets[i][0] * width
405 |         dets[i][1] = dets[i][1] * height
406 |         dets[i][2] = dets[i][2] * width
407 |         dets[i][3] = dets[i][3] * height
408 |     return dets
409 |       
410 | def file_lines(thefilepath):
411 |     count = 0
412 |     thefile = open(thefilepath, 'rb')
413 |     while True:
414 |         buffer = thefile.read(8192*1024)
415 |         if not buffer:
416 |             break
417 |         count += buffer.count('\n')
418 |     thefile.close( )
419 |     return count
420 | 
421 | def get_image_size(fname):
422 |     '''Determine the image type of fhandle and return its size.
423 |     from draco'''
424 |     with open(fname, 'rb') as fhandle:
425 |         head = fhandle.read(24)
426 |         if len(head) != 24: 
427 |             return
428 |         if imghdr.what(fname) == 'png':
429 |             check = struct.unpack('>i', head[4:8])[0]
430 |             if check != 0x0d0a1a0a:
431 |                 return
432 |             width, height = struct.unpack('>ii', head[16:24])
433 |         elif imghdr.what(fname) == 'gif':
434 |             width, height = struct.unpack('<HH', head[6:10])
435 |         elif imghdr.what(fname) == 'jpeg' or imghdr.what(fname) == 'jpg':
436 |             try:
437 |                 fhandle.seek(0) # Read 0xff next
438 |                 size = 2 
439 |                 ftype = 0 
440 |                 while not 0xc0 <= ftype <= 0xcf:
441 |                     fhandle.seek(size, 1)
442 |                     byte = fhandle.read(1)
443 |                     while ord(byte) == 0xff:
444 |                         byte = fhandle.read(1)
445 |                     ftype = ord(byte)
446 |                     size = struct.unpack('>H', fhandle.read(2))[0] - 2 
447 |                 # We are at a SOFn block
448 |                 fhandle.seek(1, 1)  # Skip `precision' byte.
449 |                 height, width = struct.unpack('>HH', fhandle.read(4))
450 |             except Exception: #IGNORE:W0703
451 |                 return
452 |         else:
453 |             return
454 |         return width, height
455 | 
456 | def logging(message):
457 |     print('%s %s' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message))
458 | 


--------------------------------------------------------------------------------
/models/vgg16.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | 
  4 | __all__ = ['Vgg16', 'vgg16']
  5 | 
  6 | class Vgg16(nn.Module):
  7 |     def __init__(self, num_classes=1000, init_weights=True):
  8 |         super(Vgg16, self).__init__()
  9 |         self.features = nn.Sequential(
 10 |             nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 0
 11 |             nn.ReLU(inplace=True), # 1
 12 |             nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 2
 13 |             nn.ReLU(inplace=True), # 30
 14 |             nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), # 4
 15 |             nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 5
 16 |             nn.ReLU(inplace=True), # 6
 17 |             nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 7
 18 |             nn.ReLU(inplace=True), # 8
 19 |             nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), # 9
 20 |             nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 10
 21 |             nn.ReLU(inplace=True), # 11
 22 |             nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 12
 23 |             nn.ReLU(inplace=True), # 13
 24 |             nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 14
 25 |             nn.ReLU(inplace=True), # 15
 26 |             nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), # 16
 27 |             nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 17
 28 |             nn.ReLU(inplace=True), # 18
 29 |             nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 19
 30 |             nn.ReLU(inplace=True), # 20
 31 |             nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 21
 32 |             nn.ReLU(inplace=True), # 22
 33 |             nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), # 23
 34 |             nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 24
 35 |             nn.ReLU(inplace=True), # 25
 36 |             nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 26
 37 |             nn.ReLU(inplace=True), # 27
 38 |             nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 28
 39 |             nn.ReLU(inplace=True), # 29
 40 |             nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), # 30
 41 |         )
 42 |         self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) # 18
 43 |         # x = torch.flatten(x, 1)
 44 |         self.classifier = nn.Sequential(
 45 |             nn.Linear(512 * 7 * 7, 4096),  # 19
 46 |             nn.ReLU(True),
 47 |             nn.Dropout(),
 48 |             nn.Linear(4096, 4096), # 20
 49 |             nn.ReLU(True),
 50 |             nn.Dropout(),
 51 |             nn.Linear(4096, num_classes), # 21
 52 |         )
 53 | 
 54 |         if init_weights:
 55 |             self._initialize_weights()
 56 | 
 57 |     def forward(self, x, server=True, partition=0):
 58 |         if server == True:
 59 |             if partition == 0:
 60 |                 x = self.features(x)
 61 |                 x = self.avgpool(x)
 62 |                 x = torch.flatten(x, 1)
 63 |                 x = self.classifier(x)
 64 |             elif partition == 1:
 65 |                 x = self.features[2:](x)
 66 |                 x = self.avgpool(x)
 67 |                 x = torch.flatten(x, 1)
 68 |                 x = self.classifier(x)
 69 |             elif partition == 2:
 70 |                 x = self.features[4:](x)
 71 |                 x = self.avgpool(x)
 72 |                 x = torch.flatten(x, 1)
 73 |                 x = self.classifier(x)
 74 |             elif partition == 3:
 75 |                 x = self.features[5:](x)
 76 |                 x = self.avgpool(x)
 77 |                 x = torch.flatten(x, 1)
 78 |                 x = self.classifier(x)
 79 |             elif partition == 4:
 80 |                 x = self.features[7:](x)
 81 |                 x = self.avgpool(x)
 82 |                 x = torch.flatten(x, 1)
 83 |                 x = self.classifier(x)
 84 |             elif partition == 5:
 85 |                 x = self.features[9:](x)
 86 |                 x = self.avgpool(x)
 87 |                 x = torch.flatten(x, 1)
 88 |                 x = self.classifier(x)
 89 |             elif partition == 6:
 90 |                 x = self.features[10:](x)
 91 |                 x = self.avgpool(x)
 92 |                 x = torch.flatten(x, 1)
 93 |                 x = self.classifier(x)
 94 |             elif partition == 7:
 95 |                 x = self.features[12:](x)
 96 |                 x = self.avgpool(x)
 97 |                 x = torch.flatten(x, 1)
 98 |                 x = self.classifier(x)
 99 |             elif partition == 8:
100 |                 x = self.features[14:](x)
101 |                 x = self.avgpool(x)
102 |                 x = torch.flatten(x, 1)
103 |                 x = self.classifier(x)
104 |             elif partition == 9:
105 |                 x = self.features[16:](x)
106 |                 x = self.avgpool(x)
107 |                 x = torch.flatten(x, 1)
108 |                 x = self.classifier(x)
109 |             elif partition == 10:
110 |                 x = self.features[17:](x)
111 |                 x = self.avgpool(x)
112 |                 x = torch.flatten(x, 1)
113 |                 x = self.classifier(x)
114 |             elif partition == 11:
115 |                 x = self.features[19:](x)
116 |                 x = self.avgpool(x)
117 |                 x = torch.flatten(x, 1)
118 |                 x = self.classifier(x)
119 |             elif partition == 12:
120 |                 x = self.features[21:](x)
121 |                 x = self.avgpool(x)
122 |                 x = torch.flatten(x, 1)
123 |                 x = self.classifier(x)
124 |             elif partition == 13:
125 |                 x = self.features[23:](x)
126 |                 x = self.avgpool(x)
127 |                 x = torch.flatten(x, 1)
128 |                 x = self.classifier(x)
129 |             elif partition == 14:
130 |                 x = self.features[24:](x)
131 |                 x = self.avgpool(x)
132 |                 x = torch.flatten(x, 1)
133 |                 x = self.classifier(x)
134 |             elif partition == 15:
135 |                 x = self.features[26:](x)
136 |                 x = self.avgpool(x)
137 |                 x = torch.flatten(x, 1)
138 |                 x = self.classifier(x)
139 |             elif partition == 16:
140 |                 x = self.features[28:](x)
141 |                 x = self.avgpool(x)
142 |                 x = torch.flatten(x, 1)
143 |                 x = self.classifier(x)
144 |             elif partition == 17:
145 |                 x = self.features[30:](x)
146 |                 x = self.avgpool(x)
147 |                 x = torch.flatten(x, 1)
148 |                 x = self.classifier(x)
149 |             elif partition == 18:
150 |                 x = self.avgpool(x)
151 |                 x = torch.flatten(x, 1)
152 |                 x = self.classifier(x)
153 |             elif partition == 19:
154 |                 x = self.classifier(x)
155 |             elif partition == 20:
156 |                 x = self.classifier[3:](x)
157 |             elif partition == 21:
158 |                 x = self.classifier[6:](x)
159 |             elif partition == 22:
160 |                 x = x
161 |             else:
162 |                 print('Please give the right partition point.')
163 |         else:
164 |             if partition == 0:
165 |                 x = x
166 |             elif partition == 1:
167 |                 x = self.features[0:2](x)
168 |             elif partition == 2:
169 |                 x = self.features[0:4](x)
170 |             elif partition == 3:
171 |                 x = self.features[0:5](x)
172 |             elif partition == 4:
173 |                 x = self.features[0:7](x)
174 |             elif partition == 5:
175 |                 x = self.features[0:9](x)
176 |             elif partition == 6:
177 |                 x = self.features[0:10](x)
178 |             elif partition == 7:
179 |                 x = self.features[0:12](x)
180 |             elif partition == 8:
181 |                 x = self.features[0:14](x)
182 |             elif partition == 9:
183 |                 x = self.features[0:16](x)
184 |             elif partition == 10:
185 |                 x = self.features[0:17](x)
186 |             elif partition == 11:
187 |                 x = self.features[0:19](x)
188 |             elif partition == 12:
189 |                 x = self.features[0:21](x)
190 |             elif partition == 13:
191 |                 x = self.features[0:23](x)
192 |             elif partition == 14:
193 |                 x = self.features[0:24](x)
194 |             elif partition == 15:
195 |                 x = self.features[0:26](x)
196 |             elif partition == 16:
197 |                 x = self.features[0:28](x)
198 |             elif partition == 17:
199 |                 x = self.features[0:30](x)
200 |             elif partition == 18:
201 |                 x = self.features(x)
202 |             elif partition == 19:
203 |                 x = self.features(x)
204 |                 x = self.avgpool(x)
205 |                 x = torch.flatten(x, 1)
206 |             elif partition == 20:
207 |                 x = self.features(x)
208 |                 x = self.avgpool(x)
209 |                 x = torch.flatten(x, 1)
210 |                 x = self.classifier[0:3](x)
211 |             elif partition == 21:
212 |                 x = self.features(x)
213 |                 x = self.avgpool(x)
214 |                 x = torch.flatten(x, 1)
215 |                 x = self.classifier[0:6](x)
216 |             else:
217 |                 x = self.features(x)
218 |                 x = self.avgpool(x)
219 |                 x = torch.flatten(x, 1)
220 |                 x = self.classifier(x)
221 |         return x
222 | 
223 |     def _initialize_weights(self):
224 |         for m in self.modules():
225 |             if isinstance(m, nn.Conv2d):
226 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
227 |                 if m.bias is not None:
228 |                     nn.init.constant_(m.bias, 0)
229 |             elif isinstance(m, nn.BatchNorm2d):
230 |                 nn.init.constant_(m.weight, 1)
231 |                 nn.init.constant_(m.bias, 0)
232 |             elif isinstance(m, nn.Linear):
233 |                 nn.init.normal_(m.weight, 0, 0.01)
234 |                 nn.init.constant_(m.bias, 0)
235 | 
236 | 
237 | def vgg16(num_classes=1000, pretrained=True, progress=True):
238 |     file = 'https://download.pytorch.org/models/vgg16-397923af.pth'
239 |     model = Vgg16(num_classes)
240 |     if pretrained:
241 |         state_dict = torch.hub.load_state_dict_from_url(file, progress=progress)
242 |         model.load_state_dict(state_dict)
243 | 
244 |     return model
245 | 
246 | if __name__ == '__main__':
247 |     print('test partition points in vgg16!!!')
248 | 
249 |     import json
250 |     import torchvision.transforms as transforms
251 |     from PIL import Image
252 | 
253 |     with open("imagenet_class_index.json", "r") as read_file:
254 |         class_idx = json.load(read_file)
255 |         labels = {int(key): value for key, value in class_idx.items()}
256 | 
257 |     model = vgg16()
258 |     model.eval()
259 |     if torch.cuda.is_available():
260 |         model.cuda()
261 | 
262 |     min_img_size = 224
263 |     transform_pipeline = transforms.Compose([transforms.Resize((min_img_size, min_img_size)),
264 |                                              transforms.ToTensor(),
265 |                                              transforms.Normalize(mean=[0.485, 0.456, 0.406],
266 |                                                                   std=[0.229, 0.224, 0.225])])
267 | 
268 |     img = Image.open('Golden_Retriever_Hund_Dog.jpg')
269 |     img = transform_pipeline(img)
270 |     img = img.unsqueeze(0)
271 | 
272 |     for partition in range(23):
273 |         with torch.no_grad():
274 |             intermediate = model(img.cuda(), server=False, partition=partition)
275 |             prediction = model(intermediate, server=True, partition=partition)
276 | 
277 |             prediction = torch.argmax(prediction)
278 | 
279 |             print('partition point ', partition, labels[prediction.item()])
280 | 


--------------------------------------------------------------------------------
/models/vgg16FrontEndDelay.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/letian-zhang/ANS/6895eedc0f66c46fed87658d4dc0d34b432a0ec1/models/vgg16FrontEndDelay.pkl


--------------------------------------------------------------------------------
/models/voc.names:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 | 


--------------------------------------------------------------------------------
/models/yoloFrontEndDelay.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/letian-zhang/ANS/6895eedc0f66c46fed87658d4dc0d34b432a0ec1/models/yoloFrontEndDelay.pkl


--------------------------------------------------------------------------------
/muLinUCB.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | def fillThetaContext(layerInfo, theta_context_dim):
  4 |     Action_num = len(layerInfo)
  5 |     x_theta = np.zeros((theta_context_dim, Action_num))
  6 |     actionList = []
  7 |     for i in range(Action_num):
  8 |         x_theta[0][i] = layerInfo[i][3]
  9 |         x_theta[1][i] = layerInfo[i][0]
 10 | 
 11 |         x_theta[2][i] = layerInfo[i][4]
 12 |         x_theta[3][i] = layerInfo[i][1]
 13 | 
 14 |         x_theta[4][i] = layerInfo[i][5]
 15 |         x_theta[5][i] = layerInfo[i][2]
 16 | 
 17 |         x_theta[6][i] = layerInfo[i][6]
 18 |         actionList.append(layerInfo[i][7])
 19 |     return x_theta, actionList
 20 | 
 21 | def getCx(x_theta, Action_num):
 22 |     listC_x = []
 23 |     for i in range(Action_num):
 24 |         temp = np.sqrt(np.matmul(x_theta[:, [i]].T, x_theta[:, [i]]))
 25 |         listC_x.append(temp[0][0])
 26 |     Cx = pow(max(listC_x), 2)
 27 |     return Cx
 28 | 
 29 | class muLinUCB():
 30 |     def __init__(self, mu, layerInfo, frontDelay):
 31 |         self.mu = mu
 32 |         self.numOfAction = len(layerInfo)
 33 |         self.thetaContextDim = 7
 34 |         self.x_theta, self.actionList = fillThetaContext(layerInfo, self.thetaContextDim)
 35 |         self.C_x = getCx(self.x_theta, self.numOfAction)
 36 |         self.frontDelay = frontDelay
 37 | 
 38 |         self.frameNum = 200
 39 |         self.delta = 0.1
 40 |         self.C_noise = 0.05
 41 |         self.l_key = 0.8
 42 |         self.l_nonkey = 0.2
 43 |         self.C_theta = 1
 44 |         self.A = np.diag(np.random.randint(1, 9, size=self.thetaContextDim))
 45 |         self.b = np.zeros((self.thetaContextDim, 1))
 46 |         self.alpha = (self.C_theta + np.sqrt(np.log((1 + self.frameNum * self.C_x * self.C_x)/self.delta) * self.thetaContextDim)*self.C_noise)/(1 - self.l_key)
 47 | 
 48 |         self.forceSamplingRate = 0.25
 49 |         self.forceSampleFrame = np.ceil(np.power(self.frameNum, self.forceSamplingRate))
 50 |         print('forceSampleFrame:', self.forceSampleFrame)
 51 | 
 52 |     def updateDoublingTrickFrameNum(self, current_frame):
 53 |         if current_frame > self.frameNum:
 54 |             self.frameNum = self.frameNum * 2
 55 |             self.alpha = (self.C_theta + np.sqrt(np.log((1 + self.frameNum * self.C_x * self.C_x) / self.delta) * self.thetaContextDim) * self.C_noise) / (1 - self.l_key)
 56 |             self.forceSampleFrame = np.ceil(np.power(self.frameNum, self.forceSamplingRate))
 57 |             return True
 58 |         return False
 59 | 
 60 |     def getEstimationAction(self, key_frame, current_frame):
 61 |         A_inv = np.linalg.inv(self.A)
 62 |         theta = np.matmul(A_inv, self.b)
 63 | 
 64 |         if key_frame:
 65 |             L = self.l_key
 66 |         else:
 67 |             L = self.l_nonkey
 68 | 
 69 |         estimate_delay = []
 70 | 
 71 |         for action_index in range(self.numOfAction):
 72 |             x_1 = np.copy(self.x_theta[:, [action_index]])
 73 |             x_2 = np.copy(self.x_theta[:, [action_index]])
 74 | 
 75 |             temp_1 = np.matmul(x_1.T, theta)
 76 |             temp_2 = self.alpha * np.sqrt((1 - L) * np.matmul(np.matmul(x_1.T, A_inv), x_2))
 77 | 
 78 |             estimate_delay.append(temp_1 - temp_2 + self.frontDelay[action_index])
 79 | 
 80 |         if current_frame % self.forceSampleFrame == 0:
 81 |             estimate_action = estimate_delay.index(min(estimate_delay[0:-1]))
 82 |         else:
 83 |             estimate_action = estimate_delay.index(min(estimate_delay))
 84 |         return estimate_action
 85 | 
 86 |     def updateA_b(self, estimate_action, actual_delay):
 87 |         if estimate_action != self.numOfAction - 1:
 88 |             self.A = self.A + np.matmul(self.x_theta[:, [estimate_action]], self.x_theta[:, [estimate_action]].T)
 89 |             self.b = self.b + self.x_theta[:, [estimate_action]] * actual_delay
 90 | 
 91 | 
 92 | if __name__ == '__main__':
 93 |     partitionInfo = {
 94 |         0: [13, 3, 24, 15346630656, 123633664, 26208256, 4818272],
 95 |         1: [12, 3, 23, 15259926528, 123633664, 22996992, 102761824],
 96 |         2: [11, 3, 22, 13410238464, 123633664, 19785728, 102761824],
 97 |         3: [11, 3, 21, 13410238464, 123633664, 16574464, 25691488],
 98 |         4: [10, 3, 20, 12485394432, 123633664, 13363200, 51381600],
 99 |         5: [9, 3, 19, 10635706368, 123633664, 10151936, 51381600],
100 |         6: [9, 3, 18, 10635706368, 123633664, 8546304, 12846432],
101 |         7: [8, 3, 17, 9710862336, 123633664, 6940672, 25691496],
102 |         8: [7, 3, 16, 7861174272, 123633664, 5335040, 25691496],
103 |         9: [6, 3, 15, 6011486208, 123633664, 4532224, 25691496],
104 |         10: [6, 3, 14, 6011486208, 123633664, 3729408, 6423912],
105 |         11: [5, 3, 13, 5086642176, 123633664, 2926592, 12846440],
106 |         12: [4, 3, 12, 3236954112, 123633664, 2123776, 12846440],
107 |         13: [3, 3, 11, 1387266048, 123633664, 1320960, 12846440],
108 |         14: [3, 3, 10, 1387266048, 123633664, 919552, 3212648],
109 |         15: [2, 3, 9, 924844032, 123633664, 518144, 3212648],
110 |         16: [1, 3, 8, 462422016, 123633664, 417792, 3212648],
111 |         17: [0, 3, 7, 0, 123633664, 317440, 3212648],
112 |         18: [0, 3, 6, 0, 123633664, 217088, 3212648],
113 |         19: [0, 3, 4, 0, 123633664, 16384, 804200],
114 |         20: [0, 2, 2, 0, 20873216, 12288, 804200],
115 |         21: [0, 1, 0, 0, 4096000, 0, 132416],
116 |         22: [0, 0, 0, 0, 0, 0, 0]
117 |     }
118 | 
119 |     frontDelay = [0 for index in range(len(partitionInfo))]
120 |     muLinUCB = muLinUCB(0.25, partitionInfo, frontDelay)
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/yolo_utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import time
  4 | import math
  5 | import torch
  6 | import numpy as np
  7 | from PIL import Image, ImageDraw, ImageFont
  8 | from torch.autograd import Variable
  9 | 
 10 | import struct
 11 | import imghdr
 12 | 
 13 | def sigmoid(x):
 14 |     return 1.0/(math.exp(-x)+1.)
 15 | 
 16 | def softmax(x):
 17 |     x = torch.exp(x - torch.max(x))
 18 |     x = x/x.sum()
 19 |     return x
 20 | 
 21 | 
 22 | def bbox_iou(box1, box2, x1y1x2y2=True):
 23 |     if x1y1x2y2:
 24 |         mx = min(box1[0], box2[0])
 25 |         Mx = max(box1[2], box2[2])
 26 |         my = min(box1[1], box2[1])
 27 |         My = max(box1[3], box2[3])
 28 |         w1 = box1[2] - box1[0]
 29 |         h1 = box1[3] - box1[1]
 30 |         w2 = box2[2] - box2[0]
 31 |         h2 = box2[3] - box2[1]
 32 |     else:
 33 |         mx = min(box1[0]-box1[2]/2.0, box2[0]-box2[2]/2.0)
 34 |         Mx = max(box1[0]+box1[2]/2.0, box2[0]+box2[2]/2.0)
 35 |         my = min(box1[1]-box1[3]/2.0, box2[1]-box2[3]/2.0)
 36 |         My = max(box1[1]+box1[3]/2.0, box2[1]+box2[3]/2.0)
 37 |         w1 = box1[2]
 38 |         h1 = box1[3]
 39 |         w2 = box2[2]
 40 |         h2 = box2[3]
 41 |     uw = Mx - mx
 42 |     uh = My - my
 43 |     cw = w1 + w2 - uw
 44 |     ch = h1 + h2 - uh
 45 |     carea = 0
 46 |     if cw <= 0 or ch <= 0:
 47 |         return 0.0
 48 | 
 49 |     area1 = w1 * h1
 50 |     area2 = w2 * h2
 51 |     carea = cw * ch
 52 |     uarea = area1 + area2 - carea
 53 |     return carea/uarea
 54 | 
 55 | def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
 56 |     if x1y1x2y2:
 57 |         mx = torch.min(boxes1[0], boxes2[0])
 58 |         Mx = torch.max(boxes1[2], boxes2[2])
 59 |         my = torch.min(boxes1[1], boxes2[1])
 60 |         My = torch.max(boxes1[3], boxes2[3])
 61 |         w1 = boxes1[2] - boxes1[0]
 62 |         h1 = boxes1[3] - boxes1[1]
 63 |         w2 = boxes2[2] - boxes2[0]
 64 |         h2 = boxes2[3] - boxes2[1]
 65 |     else:
 66 |         mx = torch.min(boxes1[0]-boxes1[2]/2.0, boxes2[0]-boxes2[2]/2.0)
 67 |         Mx = torch.max(boxes1[0]+boxes1[2]/2.0, boxes2[0]+boxes2[2]/2.0)
 68 |         my = torch.min(boxes1[1]-boxes1[3]/2.0, boxes2[1]-boxes2[3]/2.0)
 69 |         My = torch.max(boxes1[1]+boxes1[3]/2.0, boxes2[1]+boxes2[3]/2.0)
 70 |         w1 = boxes1[2]
 71 |         h1 = boxes1[3]
 72 |         w2 = boxes2[2]
 73 |         h2 = boxes2[3]
 74 |     uw = Mx - mx
 75 |     uh = My - my
 76 |     cw = w1 + w2 - uw
 77 |     ch = h1 + h2 - uh
 78 |     mask = ((cw <= 0) + (ch <= 0) > 0)
 79 |     area1 = w1 * h1
 80 |     area2 = w2 * h2
 81 |     carea = cw * ch
 82 |     carea[mask] = 0
 83 |     uarea = area1 + area2 - carea
 84 |     return carea/uarea
 85 | 
 86 | def nms(boxes, nms_thresh):
 87 |     if len(boxes) == 0:
 88 |         return boxes
 89 | 
 90 |     det_confs = torch.zeros(len(boxes))
 91 |     for i in range(len(boxes)):
 92 |         det_confs[i] = 1-boxes[i][4]                
 93 | 
 94 |     _,sortIds = torch.sort(det_confs)
 95 |     out_boxes = []
 96 |     for i in range(len(boxes)):
 97 |         box_i = boxes[sortIds[i]]
 98 |         if box_i[4] > 0:
 99 |             out_boxes.append(box_i)
100 |             for j in range(i+1, len(boxes)):
101 |                 box_j = boxes[sortIds[j]]
102 |                 if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh:
103 |                     #print(box_i, box_j, bbox_iou(box_i, box_j, x1y1x2y2=False))
104 |                     box_j[4] = 0
105 |     return out_boxes
106 | 
107 | def convert2cpu(gpu_matrix):
108 |     return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
109 | 
110 | def convert2cpu_long(gpu_matrix):
111 |     return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
112 | 
113 | def get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False):
114 |     num_anchors = int(num_anchors)
115 |     anchor_step = int(len(anchors)/num_anchors)
116 |     if output.dim() == 3:
117 |         output = output.unsqueeze(0)
118 |     batch = output.size(0)
119 |     assert(output.size(1) == (5+num_classes)*num_anchors)
120 |     h = output.size(2)
121 |     w = output.size(3)
122 | 
123 |     t0 = time.time()
124 |     all_boxes = []
125 |     output = output.view(int(batch*num_anchors), int(5+num_classes), int(h*w)).transpose(0, 1).contiguous().view(int(5+num_classes), int(batch*num_anchors*h*w))
126 | 
127 |     grid_x = torch.linspace(0, w-1, w).repeat(h,1).repeat(int(batch*num_anchors), 1, 1).view(int(batch*num_anchors*h*w)).cuda()
128 |     grid_y = torch.linspace(0, h-1, h).repeat(w,1).t().repeat(int(batch*num_anchors), 1, 1).view(int(batch*num_anchors*h*w)).cuda()
129 |     xs = torch.sigmoid(output[0]) + grid_x
130 |     ys = torch.sigmoid(output[1]) + grid_y
131 | 
132 |     anchor_w = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([0]))
133 |     anchor_h = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([1]))
134 |     anchor_w = anchor_w.repeat(batch, 1).repeat(1, 1, h*w).view(batch*num_anchors*h*w).cuda()
135 |     anchor_h = anchor_h.repeat(batch, 1).repeat(1, 1, h*w).view(batch*num_anchors*h*w).cuda()
136 |     ws = torch.exp(output[2]) * anchor_w
137 |     hs = torch.exp(output[3]) * anchor_h
138 | 
139 |     det_confs = torch.sigmoid(output[4])
140 | 
141 |     cls_confs = torch.nn.Softmax()(Variable(output[5:5+num_classes].transpose(0, 1))).data
142 |     cls_max_confs, cls_max_ids = torch.max(cls_confs, 1)
143 |     cls_max_confs = cls_max_confs.view(-1)
144 |     cls_max_ids = cls_max_ids.view(-1)
145 |     t1 = time.time()
146 |     
147 |     sz_hw = h*w
148 |     sz_hwa = sz_hw*num_anchors
149 |     det_confs = convert2cpu(det_confs)
150 |     cls_max_confs = convert2cpu(cls_max_confs)
151 |     cls_max_ids = convert2cpu_long(cls_max_ids)
152 |     xs = convert2cpu(xs)
153 |     ys = convert2cpu(ys)
154 |     ws = convert2cpu(ws)
155 |     hs = convert2cpu(hs)
156 |     if validation:
157 |         cls_confs = convert2cpu(cls_confs.view(-1, num_classes))
158 |     t2 = time.time()
159 |     for b in range(batch):
160 |         boxes = []
161 |         for cy in range(h):
162 |             for cx in range(w):
163 |                 for i in range(num_anchors):
164 |                     ind = b*sz_hwa + i*sz_hw + cy*w + cx
165 |                     det_conf =  det_confs[ind]
166 |                     if only_objectness:
167 |                         conf =  det_confs[ind]
168 |                     else:
169 |                         conf = det_confs[ind] * cls_max_confs[ind]
170 |     
171 |                     if conf > conf_thresh:
172 |                         bcx = xs[ind]
173 |                         bcy = ys[ind]
174 |                         bw = ws[ind]
175 |                         bh = hs[ind]
176 |                         cls_max_conf = cls_max_confs[ind]
177 |                         cls_max_id = cls_max_ids[ind]
178 |                         box = [bcx/w, bcy/h, bw/w, bh/h, det_conf, cls_max_conf, cls_max_id]
179 |                         if (not only_objectness) and validation:
180 |                             for c in range(num_classes):
181 |                                 tmp_conf = cls_confs[ind][c]
182 |                                 if c != cls_max_id and det_confs[ind]*tmp_conf > conf_thresh:
183 |                                     box.append(tmp_conf)
184 |                                     box.append(c)
185 |                         boxes.append(box)
186 |         all_boxes.append(boxes)
187 |     t3 = time.time()
188 |     if False:
189 |         print('---------------------------------')
190 |         print('matrix computation : %f' % (t1-t0))
191 |         print('        gpu to cpu : %f' % (t2-t1))
192 |         print('      boxes filter : %f' % (t3-t2))
193 |         print('---------------------------------')
194 |     return all_boxes
195 | 
196 | def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
197 |     import cv2
198 |     colors = torch.FloatTensor([[1,0,1],[0,0,1],[0,1,1],[0,1,0],[1,1,0],[1,0,0]]);
199 |     def get_color(c, x, max_val):
200 |         ratio = float(x)/max_val * 5
201 |         i = int(math.floor(ratio))
202 |         j = int(math.ceil(ratio))
203 |         ratio = ratio - i
204 |         r = (1-ratio) * colors[i][c] + ratio*colors[j][c]
205 |         return int(r*255)
206 | 
207 |     width = img.shape[1]
208 |     height = img.shape[0]
209 |     for i in range(len(boxes)):
210 |         box = boxes[i]
211 |         x1 = int(((box[0] - box[2]/2.0) * width))
212 |         y1 = int(((box[1] - box[3]/2.0) * height))
213 |         x2 = int(((box[0] + box[2]/2.0) * width))
214 |         y2 = int(((box[1] + box[3]/2.0) * height))
215 | 
216 |         if color:
217 |             rgb = color
218 |         else:
219 |             rgb = (255, 0, 0)
220 |         if len(box) >= 7 and class_names:
221 |             cls_conf = box[5]
222 |             cls_id = box[6]
223 |             print('%s: %f' % (class_names[cls_id], cls_conf))
224 |             classes = len(class_names)
225 |             offset = cls_id * 123457 % classes
226 |             red   = get_color(2, offset, classes)
227 |             green = get_color(1, offset, classes)
228 |             blue  = get_color(0, offset, classes)
229 |             if color is None:
230 |                 rgb = (red, green, blue)
231 |             img = cv2.putText(img, class_names[cls_id], (x1,y1), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 240), 2)
232 |             #img = cv2.putText(img, class_names[cls_id], (x1,y1), cv2.FONT_HERSHEY_SIMPLEX, 2, rgb, 2)
233 |         #img = cv2.rectangle(img, (x1,y1), (x2,y2), rgb, 2)
234 |         img = cv2.rectangle(img, (x1,y1), (x2,y2), (0, 0, 240), 2)
235 |     if savename:
236 |         print("save plot results to %s" % savename)
237 |         cv2.imwrite(savename, img)
238 |     return img
239 | 
240 | def plot_boxes(img, boxes, savename=None, class_names=None):
241 |     colors = torch.FloatTensor([[1,0,1],[0,0,1],[0,1,1],[0,1,0],[1,1,0],[1,0,0]]);
242 |     def get_color(c, x, max_val):
243 |         ratio = float(x)/max_val * 5
244 |         i = int(math.floor(ratio))
245 |         j = int(math.ceil(ratio))
246 |         ratio = ratio - i
247 |         r = (1-ratio) * colors[i][c] + ratio*colors[j][c]
248 |         return int(r*255)
249 | 
250 |     width = img.width
251 |     height = img.height
252 |     draw = ImageDraw.Draw(img)
253 |     for i in range(len(boxes)):
254 |         box = boxes[i]
255 |         x1 = (box[0] - box[2]/2.0) * width
256 |         y1 = (box[1] - box[3]/2.0) * height
257 |         x2 = (box[0] + box[2]/2.0) * width
258 |         y2 = (box[1] + box[3]/2.0) * height
259 | 
260 |         rgb = (255, 0, 0)
261 |         if len(box) >= 7 and class_names:
262 |             cls_conf = box[5]
263 |             cls_id = box[6]
264 |             print('%s: %f' % (class_names[cls_id], cls_conf))
265 |             classes = len(class_names)
266 |             offset = cls_id * 123457 % classes
267 |             red   = get_color(2, offset, classes)
268 |             green = get_color(1, offset, classes)
269 |             blue  = get_color(0, offset, classes)
270 |             rgb = (red, green, blue)
271 |             draw.text((x1, y1), class_names[cls_id], fill=rgb)
272 |         draw.rectangle([x1, y1, x2, y2], outline = rgb)
273 |     if savename:
274 |         print("save plot results to %s" % savename)
275 |         img.save(savename)
276 |     return img
277 | 
278 | def read_truths(lab_path):
279 |     if not os.path.exists(lab_path):
280 |         return np.array([])
281 |     if os.path.getsize(lab_path):
282 |         truths = np.loadtxt(lab_path)
283 |         truths = truths.reshape(truths.size/5, 5) # to avoid single truth problem
284 |         return truths
285 |     else:
286 |         return np.array([])
287 | 
288 | def read_truths_args(lab_path, min_box_scale):
289 |     truths = read_truths(lab_path)
290 |     new_truths = []
291 |     for i in range(truths.shape[0]):
292 |         if truths[i][3] < min_box_scale:
293 |             continue
294 |         new_truths.append([truths[i][0], truths[i][1], truths[i][2], truths[i][3], truths[i][4]])
295 |     return np.array(new_truths)
296 | 
297 | def load_class_names(namesfile):
298 |     class_names = []
299 |     with open(namesfile, 'r') as fp:
300 |         lines = fp.readlines()
301 |     for line in lines:
302 |         line = line.rstrip()
303 |         class_names.append(line)
304 |     return class_names
305 | 
306 | def image2torch(img):
307 |     width = img.width
308 |     height = img.height
309 |     img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
310 |     img = img.view(height, width, 3).transpose(0,1).transpose(0,2).contiguous()
311 |     img = img.view(1, 3, height, width)
312 |     img = img.float().div(255.0)
313 |     return img
314 | 
315 | def partirion_output(model, img, action):
316 |     model.eval()
317 |     img = torch.from_numpy(img.transpose(2,0,1)).float().div(255.0).unsqueeze(0)
318 |     img = Variable(img)
319 |     with torch.no_grad():
320 |         output = model(img.cuda(), mode=action, client=True)
321 |         output = output.data
322 |         del img
323 |     return output
324 | 
325 | def get_boxes(res, model, conf_thresh, nms_thresh):
326 |     boxes = get_region_boxes(res, conf_thresh, model.num_classes, model.anchors, model.num_anchors)[0]
327 |     boxes = nms(boxes, nms_thresh)
328 |     return boxes
329 | 
330 | def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1):
331 |     model.eval()
332 |     t0 = time.time()
333 | 
334 |     if isinstance(img, Image.Image):
335 |         width = img.width
336 |         height = img.height
337 |         img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
338 |         img = img.view(height, width, 3).transpose(0,1).transpose(0,2).contiguous()
339 |         img = img.view(1, 3, height, width)
340 |         img = img.float().div(255.0)
341 |     elif type(img) == np.ndarray: # cv2 image
342 |         img = torch.from_numpy(img.transpose(2,0,1)).float().div(255.0).unsqueeze(0)
343 |     else:
344 |         print("unknow image type")
345 |         exit(-1)
346 | 
347 |     t1 = time.time()
348 | 
349 |     if use_cuda:
350 |         img = img.cuda()
351 |     img = torch.autograd.Variable(img)
352 |     t2 = time.time()
353 | 
354 |     output = model(img)
355 |     output = output.data
356 |     #for j in range(100):
357 |     #    sys.stdout.write('%f ' % (output.storage()[j]))
358 |     #print('')
359 |     t3 = time.time()
360 | 
361 |     boxes = get_region_boxes(output, conf_thresh, model.num_classes, model.anchors, model.num_anchors)[0]
362 |     #for j in range(len(boxes)):
363 |     #    print(boxes[j])
364 |     t4 = time.time()
365 | 
366 |     boxes = nms(boxes, nms_thresh)
367 |     t5 = time.time()
368 | 
369 |     if False:
370 |         print('-----------------------------------')
371 |         print(' image to tensor : %f' % (t1 - t0))
372 |         print('  tensor to cuda : %f' % (t2 - t1))
373 |         print('         predict : %f' % (t3 - t2))
374 |         print('get_region_boxes : %f' % (t4 - t3))
375 |         print('             nms : %f' % (t5 - t4))
376 |         print('           total : %f' % (t5 - t0))
377 |         print('-----------------------------------')
378 |     return boxes
379 | 
380 | def read_data_cfg(datacfg):
381 |     options = dict()
382 |     options['gpus'] = '0,1,2,3'
383 |     options['num_workers'] = '10'
384 |     with open(datacfg, 'r') as fp:
385 |         lines = fp.readlines()
386 | 
387 |     for line in lines:
388 |         line = line.strip()
389 |         if line == '':
390 |             continue
391 |         key,value = line.split('=')
392 |         key = key.strip()
393 |         value = value.strip()
394 |         options[key] = value
395 |     return options
396 | 
397 | def scale_bboxes(bboxes, width, height):
398 |     import copy
399 |     dets = copy.deepcopy(bboxes)
400 |     for i in range(len(dets)):
401 |         dets[i][0] = dets[i][0] * width
402 |         dets[i][1] = dets[i][1] * height
403 |         dets[i][2] = dets[i][2] * width
404 |         dets[i][3] = dets[i][3] * height
405 |     return dets
406 |       
407 | def file_lines(thefilepath):
408 |     count = 0
409 |     thefile = open(thefilepath, 'rb')
410 |     while True:
411 |         buffer = thefile.read(8192*1024)
412 |         if not buffer:
413 |             break
414 |         count += buffer.count('\n')
415 |     thefile.close( )
416 |     return count
417 | 
418 | def get_image_size(fname):
419 |     '''Determine the image type of fhandle and return its size.
420 |     from draco'''
421 |     with open(fname, 'rb') as fhandle:
422 |         head = fhandle.read(24)
423 |         if len(head) != 24: 
424 |             return
425 |         if imghdr.what(fname) == 'png':
426 |             check = struct.unpack('>i', head[4:8])[0]
427 |             if check != 0x0d0a1a0a:
428 |                 return
429 |             width, height = struct.unpack('>ii', head[16:24])
430 |         elif imghdr.what(fname) == 'gif':
431 |             width, height = struct.unpack('<HH', head[6:10])
432 |         elif imghdr.what(fname) == 'jpeg' or imghdr.what(fname) == 'jpg':
433 |             try:
434 |                 fhandle.seek(0) # Read 0xff next
435 |                 size = 2 
436 |                 ftype = 0 
437 |                 while not 0xc0 <= ftype <= 0xcf:
438 |                     fhandle.seek(size, 1)
439 |                     byte = fhandle.read(1)
440 |                     while ord(byte) == 0xff:
441 |                         byte = fhandle.read(1)
442 |                     ftype = ord(byte)
443 |                     size = struct.unpack('>H', fhandle.read(2))[0] - 2 
444 |                 # We are at a SOFn block
445 |                 fhandle.seek(1, 1)  # Skip `precision' byte.
446 |                 height, width = struct.unpack('>HH', fhandle.read(4))
447 |             except Exception: #IGNORE:W0703
448 |                 return
449 |         else:
450 |             return
451 |         return width, height
452 | 
453 | def logging(message):
454 |     print('%s %s' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message))
455 | 


--------------------------------------------------------------------------------