├── README.md ├── client_camera_main.py ├── communication.py ├── edge_server_main.py ├── keyFrameDetection.py ├── models ├── Golden_Retriever_Hund_Dog.jpg ├── imagenet_class_index.json ├── tiny_yolo.py ├── utils.py ├── vgg16.py ├── vgg16FrontEndDelay.pkl ├── voc.names └── yoloFrontEndDelay.pkl ├── muLinUCB.py └── yolo_utils.py /README.md: -------------------------------------------------------------------------------- 1 | ## Autodidactic Neurosurgeon Collaborative Deep Inference for Mobile Edge Intelligence via Online Learning 2 | Autodidactic Neurosurgeon (ANS) is an online learning module to automatically learn the optimal DNN partition point on the fly. The details of ANS are in our WWW21 paper "Autodidactic Neurosurgeon Collaborative Deep Inference for Mobile Edge Intelligence via Online Learning". 3 | 4 | ### PyTorch 5 | We modify the *forward* function in the PyTorch to partition the DNN model. 6 | 7 | You can run "**vgg16.py**" as an example to see the partition. 8 | 9 | ### TensorFlow 2.0+ 10 | Although we don't provide the code for TensorFlow, you can modify the *\__call\__* function in the TensorFlow to partition the DNN model. 11 | 12 | ### Two examples: 13 | - vgg16 14 | - tiny yolo v2 15 | - Download tiny yolo weight, here https://pjreddie.com/media/files/yolov2-tiny-voc.weights 16 | - Put yolov2-tiny-voc.weights in the folder "models" 17 | 18 | ### How to run code: 19 | - First run *edge_server_main.py* on edge server 20 | - Then run *client_camera_main.py* on Nvidia Jetson TX2 21 | -------------------------------------------------------------------------------- /client_camera_main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | import subprocess 4 | import cv2 5 | import torchvision.transforms as transforms 6 | import torch 7 | import numpy as np 8 | from PIL import Image 9 | import time 10 | import pickle 11 | import json 12 | 13 | from models.vgg16 import vgg16 14 | from models.tiny_yolo import tinyYolo 15 | from keyFrameDetection import KeyFrameDetection 16 | from communication import clientCommunication 17 | from muLinUCB import muLinUCB 18 | from yolo_utils import load_class_names, get_boxes, plot_boxes_cv2 19 | 20 | WINDOW_NAME = 'CameraDemo' 21 | 22 | vgg_info = { # action No. : [layer type num{1: conv, 2: fc, 3: act}, total mac{1: conv, 2: fc, 3: act}, mid_data_size, partition point] 23 | 0: [13, 3, 24, 15346630656, 123633664, 26208256, 4818272, 0], 24 | 1: [12, 3, 23, 15259926528, 123633664, 22996992, 102761824, 1], 25 | 2: [11, 3, 22, 13410238464, 123633664, 19785728, 102761824, 2], 26 | 3: [11, 3, 21, 13410238464, 123633664, 16574464, 25691488, 3], 27 | 4: [10, 3, 20, 12485394432, 123633664, 13363200, 51381600, 4], 28 | 5: [9, 3, 19, 10635706368, 123633664, 10151936, 51381600, 5], 29 | 6: [9, 3, 18, 10635706368, 123633664, 8546304, 12846432, 6], 30 | 7: [8, 3, 17, 9710862336, 123633664, 6940672, 25691496, 7], 31 | 8: [7, 3, 16, 7861174272, 123633664, 5335040, 25691496, 8], 32 | 9: [6, 3, 15, 6011486208, 123633664, 4532224, 25691496, 9], 33 | 10: [6, 3, 14, 6011486208, 123633664, 3729408, 6423912, 10], 34 | 11: [5, 3, 13, 5086642176, 123633664, 2926592, 12846440, 11], 35 | 12: [4, 3, 12, 3236954112, 123633664, 2123776, 12846440, 12], 36 | 13: [3, 3, 11, 1387266048, 123633664, 1320960, 12846440, 13], 37 | 14: [3, 3, 10, 1387266048, 123633664, 919552, 3212648, 14], 38 | 15: [2, 3, 9, 924844032, 123633664, 518144, 3212648, 15], 39 | 16: [1, 3, 8, 462422016, 123633664, 417792, 3212648, 16], 40 | 17: [0, 3, 7, 0, 123633664, 317440, 3212648, 17], 41 | 18: [0, 3, 6, 0, 123633664, 217088, 3212648, 18], 42 | 19: [0, 3, 4, 0, 123633664, 16384, 804200, 19], 43 | 20: [0, 2, 2, 0, 20873216, 12288, 804200, 20], 44 | 21: [0, 1, 0, 0, 4096000, 0, 132416, 21], 45 | 22: [0, 0, 0, 0, 0, 0, 0, 22] 46 | } 47 | 48 | yolo_info = { 49 | 0: [9, 0, 22, 3537437696, 0, 28640768, 16614800, 0], 50 | 1: [8, 0, 22, 3462677504, 0, 28640768, 88606096, 1], 51 | 2: [8, 0, 21, 3462677504, 0, 23102976, 88606096, 2], 52 | 3: [8, 0, 20, 3462677504, 0, 17565184, 88606096, 3], 53 | 4: [8, 0, 19, 3462677504, 0, 14796288, 22152576, 4], 54 | 5: [7, 0, 19, 3263316992, 0, 14796288, 44303744, 5], 55 | 6: [7, 0, 18, 3263316992, 0, 12027392, 44303744, 6], 56 | 7: [7, 0, 17, 3263316992, 0, 9258496, 44303744, 7], 57 | 8: [7, 0, 16, 3263316992, 0, 7874048, 11076992, 8], 58 | 9: [6, 0, 16, 3063956480, 0, 7874048, 22152576, 9], 59 | 10: [6, 0, 15, 3063956480, 0, 6489600, 22152576, 10], 60 | 11: [6, 0, 14, 3063956480, 0, 5105152, 22152576, 11], 61 | 12: [6, 0, 13, 3063956480, 0, 4412928, 5539200, 12], 62 | 13: [5, 0, 13, 2864595968, 0, 4412928, 11076992, 13], 63 | 14: [5, 0, 12, 2864595968, 0, 3720704, 11076992, 14], 64 | 15: [5, 0, 11, 2864595968, 0, 3028480, 11076992, 15], 65 | 16: [5, 0, 10, 2864595968, 0, 2682368, 2770304, 16], 66 | 17: [4, 0, 10, 2665235456, 0, 2682368, 5539208, 17], 67 | 18: [4, 0, 9, 2665235456, 0, 2336256, 5539208, 18], 68 | 19: [4, 0, 8, 2665235456, 0, 1990144, 5539208, 19], 69 | 20: [4, 0, 7, 2665235456, 0, 1817088, 1385864, 20], 70 | 21: [3, 0, 7, 2465874944, 0, 1817088, 2770312, 21], 71 | 22: [3, 0, 6, 2465874944, 0, 1644032, 2770312, 22], 72 | 23: [3, 0, 5, 2465874944, 0, 1470976, 2770312, 23], 73 | 24: [3, 0, 4, 2465874944, 0, 1384448, 2770312, 24], 74 | 25: [2, 0, 4, 1668432896, 0, 1384448, 5539208, 25], 75 | 26: [2, 0, 3, 1668432896, 0, 1038336, 5539208, 26], 76 | 27: [2, 0, 2, 1668432896, 0, 692224, 5539208, 27], 77 | 28: [1, 0, 2, 73548800, 0, 692224, 2770312, 28], 78 | 29: [1, 0, 1, 73548800, 0, 346112, 2770312, 29], 79 | 30: [1, 0, 0, 73548800, 0, 0, 2770312, 30], 80 | 31: [0, 0, 0, 0, 0, 0, 0, 31] 81 | } 82 | 83 | 84 | def parse_args(): 85 | # Parse input arguments 86 | desc = 'Capture and display live camera video on Jetson TX2/TX1' 87 | parser = argparse.ArgumentParser(description=desc) 88 | parser.add_argument('--rtsp', dest='use_rtsp', 89 | help='use IP CAM (remember to also set --uri)', 90 | action='store_true') 91 | parser.add_argument('--uri', dest='rtsp_uri', 92 | help='RTSP URI, e.g. rtsp://192.168.1.64:554', 93 | default=None, type=str) 94 | parser.add_argument('--latency', dest='rtsp_latency', 95 | help='latency in ms for RTSP [200]', 96 | default=200, type=int) 97 | parser.add_argument('--usb', dest='use_usb', 98 | help='use USB webcam (remember to also set --vid)', 99 | action='store_true') 100 | parser.add_argument('--vid', dest='video_dev', 101 | help='device # of USB webcam (/dev/video?) [1]', 102 | default=1, type=int) 103 | parser.add_argument('--width', dest='image_width', 104 | help='image width', 105 | default=640, type=int) 106 | parser.add_argument('--height', dest='image_height', 107 | help='image height', 108 | default=480, type=int) 109 | parser.add_argument('--dnn', dest='dnn_model', 110 | help='vgg, yolo', 111 | default='yolo', type=str) 112 | parser.add_argument('--host', dest='host', 113 | help='Ip address', 114 | default='192.168.1.72', type=str) 115 | parser.add_argument('--port', dest='port', 116 | help='Ip port', 117 | default=8080, type=int) 118 | args = parser.parse_args() 119 | return args 120 | 121 | 122 | def open_cam_rtsp(uri, width, height, latency): 123 | gst_str = ('rtspsrc location={} latency={} ! ' 124 | 'rtph264depay ! h264parse ! omxh264dec ! ' 125 | 'nvvidconv ! ' 126 | 'video/x-raw, width=(int){}, height=(int){}, ' 127 | 'format=(string)BGRx ! ' 128 | 'videoconvert ! appsink').format(uri, latency, width, height) 129 | return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER) 130 | 131 | 132 | def open_cam_usb(dev, width, height): 133 | # We want to set width and height here, otherwise we could just do: 134 | # return cv2.VideoCapture(dev) 135 | gst_str = ('v4l2src device=/dev/video{} ! ' 136 | 'video/x-raw, width=(int){}, height=(int){} ! ' 137 | 'videoconvert ! appsink').format(dev, width, height) 138 | return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER) 139 | 140 | 141 | def open_cam_onboard(width, height): 142 | gst_elements = str(subprocess.check_output('gst-inspect-1.0')) 143 | if 'nvcamerasrc' in gst_elements: 144 | # On versions of L4T prior to 28.1, add 'flip-method=2' into gst_str 145 | gst_str = ('nvcamerasrc ! ' 146 | 'video/x-raw(memory:NVMM), ' 147 | 'width=(int)2592, height=(int)1458, ' 148 | 'format=(string)I420, framerate=(fraction)30/1 ! ' 149 | 'nvvidconv ! ' 150 | 'video/x-raw, width=(int){}, height=(int){}, ' 151 | 'format=(string)BGRx ! ' 152 | 'videoconvert ! appsink').format(width, height) 153 | elif 'nvarguscamerasrc' in gst_elements: 154 | gst_str = ('nvarguscamerasrc ! ' 155 | 'video/x-raw(memory:NVMM), ' 156 | 'width=(int)640, height=(int)480,' 157 | 'format=(string)NV12, framerate=(fraction)30/1 ! ' 158 | 'nvvidconv flip-method=0 ! ' 159 | 'video/x-raw, width=(int){}, height=(int){}, ' 160 | 'format=(string)BGRx ! ' 161 | 'videoconvert ! appsink').format(width, height) 162 | else: 163 | raise RuntimeError('onboard camera source not found!') 164 | return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER) 165 | 166 | 167 | def open_window(width, height): 168 | cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) 169 | cv2.resizeWindow(WINDOW_NAME, width, height) 170 | cv2.moveWindow(WINDOW_NAME, 0, 0) 171 | cv2.setWindowTitle(WINDOW_NAME, 'Camera Demo for Jetson TX2/TX1') 172 | 173 | def prepare_image_vgg(frame): 174 | min_img_size = 224 175 | transform_pipeline = transforms.Compose([transforms.Resize((min_img_size, min_img_size)), 176 | transforms.ToTensor(), 177 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 178 | std=[0.229, 0.224, 0.225])]) 179 | img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 180 | img = Image.fromarray(img_rgb) 181 | img = transform_pipeline(img) 182 | img = img.unsqueeze(0) 183 | return img 184 | 185 | def prepare_image_yolo(frame): 186 | min_img_size = 416 187 | image = cv2.resize(frame, (min_img_size, min_img_size), interpolation=cv2.INTER_CUBIC) 188 | image = np.array(image, dtype='float32') 189 | img = torch.from_numpy(image.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0) 190 | return img 191 | 192 | def show_preds(img, label, averageTime): 193 | x = 10 194 | y = 50 195 | 196 | font = cv2.FONT_HERSHEY_PLAIN 197 | 198 | pred = '{:20s}'.format(label[1]) 199 | cv2.putText(img, pred, (x, y), font, 2, (0, 0, 240), 2, cv2.LINE_AA) 200 | y += 30 201 | timeShow = 'AvgTime: {:.4f}'.format(averageTime) 202 | cv2.putText(img, timeShow, (x, y), font, 2, (0, 0, 240), 2, cv2.LINE_AA) 203 | 204 | return img 205 | 206 | def getVggLabelDic(class_file): 207 | with open(class_file, "r") as read_file: 208 | class_idx = json.load(read_file) 209 | labels = {int(key): value for key, value in class_idx.items()} 210 | return labels 211 | 212 | def decodePrediction_vgg(res, labels): 213 | res = torch.autograd.Variable(res) 214 | label_index = torch.argmax(res).item() 215 | return labels[label_index] 216 | 217 | def getActualDelay(action, model, preprocessed_image, totallayerNo, communication): 218 | if action == totallayerNo - 1: # local mobile process 219 | prediction = model(preprocessed_image.cuda()) 220 | return 0, prediction.item() 221 | else: 222 | intermediate_output = model(preprocessed_image.cuda(), server=False, partition=action) 223 | 224 | data_to_server = [action, intermediate_output.data] 225 | del intermediate_output 226 | 227 | start_time = time.time() 228 | communication.send_msg(data_to_server) 229 | 230 | result = communication.receive_msg() 231 | 232 | communication.close_channel() 233 | end_time = time.time() 234 | 235 | return end_time - start_time, result 236 | 237 | def load_obj(name): 238 | with open(name + '.pkl', 'rb') as f: 239 | return pickle.load(f) 240 | 241 | if __name__ == '__main__': 242 | args = parse_args() 243 | print('Called with args:') 244 | print(args) 245 | print('OpenCV version: {}'.format(cv2.__version__)) 246 | 247 | if args.dnn_model == 'vgg': 248 | model = vgg16() 249 | model.eval() 250 | frontEndDelay = load_obj('models/vgg16FrontEndDelay') 251 | labels = getVggLabelDic('models/imagenet_class_index.json') 252 | partitionInfo = vgg_info 253 | else: 254 | model = tinyYolo() 255 | model.eval() 256 | frontEndDelay = load_obj('models/yoloFrontEndDelay') 257 | labels = load_class_names('models/voc.names') 258 | partitionInfo = yolo_info 259 | 260 | model.cuda() 261 | Action_num = len(partitionInfo) 262 | 263 | muLinUCB = muLinUCB(mu=0.25, layerInfo=partitionInfo, frontDelay=frontEndDelay) 264 | communication = clientCommunication(args.host, args.port) 265 | 266 | if args.use_rtsp: 267 | cap = open_cam_rtsp(args.rtsp_uri, 268 | args.image_width, 269 | args.image_height, 270 | args.rtsp_latency) 271 | elif args.use_usb: 272 | cap = open_cam_usb(args.video_dev, 273 | args.image_width, 274 | args.image_height) 275 | else: # by default, use the Jetson onboard camera 276 | cap = open_cam_onboard(args.image_width, args.image_height) 277 | # cap = cv2.VideoCapture(0) 278 | 279 | if not cap.isOpened(): 280 | sys.exit('Failed to open camera!') 281 | 282 | open_window(args.image_width, args.image_height) 283 | 284 | show_help = True 285 | full_scrn = False 286 | help_text = '"Esc" to Quit, "H" for Help, "F" to Toggle Fullscreen' 287 | font = cv2.FONT_HERSHEY_PLAIN 288 | 289 | total_time = 0 290 | total_frame_num = 0 291 | currentFrameNum = 0 292 | keyflag = False 293 | KeyFrame = KeyFrameDetection(threshold=0.8) 294 | 295 | while True: 296 | if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: 297 | # Check to see if the user has closed the window 298 | # If yes, terminate the program 299 | break 300 | _, img = cap.read() # grab the next image frame from camera 301 | 302 | if args.dnn_model == 'vgg': 303 | preprocessed_image = prepare_image_vgg(img) 304 | else: 305 | preprocessed_image = prepare_image_yolo(img) 306 | 307 | # doubling trick is here. 308 | currentFrameNum = currentFrameNum + 1 309 | if muLinUCB.updateDoublingTrickFrameNum(currentFrameNum): 310 | currentFrameNum = 0 311 | 312 | # key frame detection 313 | if total_frame_num == 0: 314 | keyflag = False 315 | old_frame = np.copy(img) 316 | else: 317 | keyflag = KeyFrame.compare_images(old_frame, img) 318 | old_frame = np.copy(img) 319 | 320 | # print('keyflag', keyflag) 321 | 322 | partitionPoint = muLinUCB.getEstimationAction(keyflag, currentFrameNum) 323 | # print('partitionPoint', partitionPoint) 324 | 325 | end2endtime_start = time.time() 326 | actual_delay, res = getActualDelay(partitionPoint, model, preprocessed_image, Action_num, communication) 327 | 328 | end2endtime_end = time.time() 329 | 330 | total_frame_num = total_frame_num + 1 331 | total_time = total_time + (end2endtime_end - end2endtime_start) 332 | average_time = total_time/total_frame_num 333 | 334 | # update A and b 335 | muLinUCB.updateA_b(partitionPoint, actual_delay) 336 | 337 | # print results on the screen 338 | if args.dnn_model == 'vgg': 339 | label = decodePrediction_vgg(res, labels) 340 | img = show_preds(img, label, average_time) 341 | else: 342 | boxes = get_boxes(res, model, conf_thresh=0.5, nms_thresh=0.5) 343 | img = plot_boxes_cv2(img, boxes, class_names=labels) 344 | 345 | if show_help: 346 | cv2.putText(img, help_text, (11, 20), font, 347 | 1.0, (32, 32, 32), 4, cv2.LINE_AA) 348 | cv2.putText(img, help_text, (10, 20), font, 349 | 1.0, (240, 240, 240), 1, cv2.LINE_AA) 350 | 351 | cv2.imshow(WINDOW_NAME, img) 352 | 353 | key = cv2.waitKey(10) 354 | if key == 27: # ESC key: quit program 355 | break 356 | elif key == ord('H') or key == ord('h'): # toggle help message 357 | show_help = not show_help 358 | elif key == ord('F') or key == ord('f'): # toggle fullscreen 359 | full_scrn = not full_scrn 360 | if full_scrn: 361 | cv2.setWindowProperty(WINDOW_NAME, cv2.WND_PROP_FULLSCREEN, 362 | cv2.WINDOW_FULLSCREEN) 363 | else: 364 | cv2.setWindowProperty(WINDOW_NAME, cv2.WND_PROP_FULLSCREEN, 365 | cv2.WINDOW_NORMAL) 366 | 367 | 368 | cap.release() 369 | cv2.destroyAllWindows() 370 | -------------------------------------------------------------------------------- /communication.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import pickle 3 | import socket 4 | 5 | def recv_msg(sock): 6 | # Read message length and unpack it into an integer 7 | raw_msglen = recvall(sock, 4) 8 | if not raw_msglen: 9 | return None 10 | msglen = struct.unpack('>I', raw_msglen)[0] 11 | # Read the message data 12 | return recvall(sock, msglen) 13 | 14 | def recvall(sock, n): 15 | # Helper function to recv n bytes or return None if EOF is hit 16 | data = b'' 17 | while len(data) < n: 18 | packet = sock.recv(n - len(data)) 19 | if not packet: 20 | return None 21 | data += packet 22 | return data 23 | 24 | def send_msg(sock, msg): 25 | # Prefix each message with a 4-byte length (network byte order) 26 | msg = struct.pack('>I', len(msg)) + msg 27 | sock.sendall(msg) 28 | 29 | def decode_msg(msg): 30 | res = pickle.loads(msg) 31 | return res 32 | 33 | def encode_msg(data): 34 | msg = pickle.dumps(data) 35 | return msg 36 | 37 | class clientCommunication(): 38 | def __init__(self, host, port): 39 | self.host = host 40 | self.port = port 41 | self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 42 | 43 | def send_msg(self, msg): 44 | msg = encode_msg(msg) 45 | self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 46 | self.s.connect((self.host, self.port)) 47 | send_msg(self.s, msg) 48 | 49 | def receive_msg(self): 50 | received = recv_msg(self.s) 51 | received = decode_msg(received) 52 | return received 53 | 54 | def close_channel(self): 55 | self.s.close() 56 | 57 | class serverCommunication(): 58 | def __init__(self, host, port): 59 | self.host = host 60 | self.port = port 61 | self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 62 | self.s.bind((self.host, self.port)) 63 | self.s.listen() 64 | 65 | def send_msg(self, conn, msg): 66 | msg = encode_msg(msg) 67 | send_msg(conn, msg) 68 | 69 | def receive_msg(self, conn): 70 | received = recv_msg(conn) 71 | received = decode_msg(received) 72 | return received 73 | 74 | def accept_conn(self): 75 | conn, addr = self.s.accept() 76 | return conn, addr 77 | 78 | def close_channel(self): 79 | self.s.close() 80 | -------------------------------------------------------------------------------- /edge_server_main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import torchvision.transforms as transforms 4 | import torch 5 | 6 | from PIL import Image 7 | 8 | from models.vgg16 import vgg16 9 | from models.tiny_yolo import tinyYolo 10 | from communication import serverCommunication 11 | 12 | 13 | WINDOW_NAME = 'CameraDemo' 14 | 15 | 16 | def parse_args(): 17 | # Parse input arguments 18 | desc = 'ANS in edge server side' 19 | parser = argparse.ArgumentParser(description=desc) 20 | parser.add_argument('--dnn', dest='dnn_model', 21 | help='vgg, yolo', 22 | default='yolo', type=str) 23 | parser.add_argument('--host', dest='host', 24 | help='Ip address', 25 | default='192.168.1.72', type=str) 26 | parser.add_argument('--port', dest='port', 27 | help='Ip port', 28 | default=8080, type=int) 29 | args = parser.parse_args() 30 | return args 31 | 32 | if __name__ == '__main__': 33 | args = parse_args() 34 | print('Called with args:') 35 | print(args) 36 | print('OpenCV version: {}'.format(cv2.__version__)) 37 | 38 | if args.dnn_model == 'vgg': 39 | model = vgg16() 40 | model.eval() 41 | else: 42 | model = tinyYolo() 43 | model.eval() 44 | 45 | model.cuda() 46 | 47 | communication = serverCommunication(args.host, args.port) 48 | 49 | while True: 50 | try: 51 | conn, addr = communication.accept_conn() 52 | with conn: 53 | recv_data = communication.receive_msg(conn) 54 | print('receive data from mobile device !!!') 55 | partition_point = recv_data[0] 56 | data = recv_data[1] 57 | data = torch.autograd.Variable(data) 58 | prediction = model(data.cuda(), server=True, partition=partition_point) 59 | res = prediction.data 60 | 61 | msg = communication.send_msg(conn, res) 62 | 63 | except KeyboardInterrupt or TypeError or OSError: 64 | communication.close_channel() 65 | -------------------------------------------------------------------------------- /keyFrameDetection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from skimage.metrics import structural_similarity as ssim 3 | import cv2 4 | 5 | class KeyFrameDetection: 6 | def __init__(self, threshold): 7 | self.threshold = threshold 8 | 9 | def compare_images(self, imgA, imgB): 10 | imgA = cv2.cvtColor(imgA, cv2.COLOR_BGR2GRAY) 11 | imgB = cv2.cvtColor(imgB, cv2.COLOR_BGR2GRAY) 12 | s = ssim(imgA, imgB) 13 | # print('ssim:', s) 14 | if s < self.threshold: 15 | KeyFlag = True 16 | return KeyFlag 17 | KeyFlag = False 18 | return KeyFlag 19 | 20 | 21 | if __name__ == "__main__": 22 | keyFrameDetection = KeyFrameDetection(0.6) 23 | imgA = cv2.imread("Golden_Retriever_Hund_Dog.jpg") 24 | imgB = cv2.imread("Golden_Retriever_Hund_Dog.jpg") 25 | 26 | flag = keyFrameDetection.compare_images(imgA, imgB) 27 | print(flag) 28 | -------------------------------------------------------------------------------- /models/Golden_Retriever_Hund_Dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/letian-zhang/ANS/6895eedc0f66c46fed87658d4dc0d34b432a0ec1/models/Golden_Retriever_Hund_Dog.jpg -------------------------------------------------------------------------------- /models/imagenet_class_index.json: -------------------------------------------------------------------------------- 1 | {"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]} -------------------------------------------------------------------------------- /models/tiny_yolo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import torch 6 | import math 7 | from collections import OrderedDict 8 | 9 | 10 | def convert2cpu(gpu_matrix): 11 | return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix) 12 | 13 | def load_conv_bn(buf, start, conv_model, bn_model): 14 | num_w = conv_model.weight.numel() 15 | # print('num_w', num_w) 16 | num_b = bn_model.bias.numel() 17 | # print('num_b', num_b) 18 | bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b])) 19 | start = start + num_b 20 | bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b])) 21 | start = start + num_b 22 | bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b])) 23 | start = start + num_b 24 | bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b])) 25 | start = start + num_b 26 | conv_model.weight.data.copy_(torch.reshape(torch.from_numpy(buf[start:start + num_w]), ( 27 | conv_model.weight.shape[0], conv_model.weight.shape[1], conv_model.weight.shape[2], 28 | conv_model.weight.shape[3]))) 29 | start = start + num_w 30 | # conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])); start = start + num_w 31 | return start 32 | 33 | def load_conv(buf, start, conv_model): 34 | num_w = conv_model.weight.numel() 35 | num_b = conv_model.bias.numel() 36 | conv_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b])); start = start + num_b 37 | conv_model.weight.data.copy_(torch.reshape(torch.from_numpy(buf[start:start + num_w]), ( 38 | conv_model.weight.shape[0], conv_model.weight.shape[1], conv_model.weight.shape[2], 39 | conv_model.weight.shape[3]))) 40 | start = start + num_w 41 | return start 42 | 43 | class MaxPoolStride1(nn.Module): 44 | def __init__(self): 45 | super(MaxPoolStride1, self).__init__() 46 | 47 | def forward(self, x): 48 | x = F.max_pool2d(F.pad(x, (0, 1, 0, 1), mode='replicate'), 2, stride=1) 49 | return x 50 | 51 | def bbox_iou(box1, box2, x1y1x2y2=True): 52 | if x1y1x2y2: 53 | mx = min(box1[0], box2[0]) 54 | Mx = max(box1[2], box2[2]) 55 | my = min(box1[1], box2[1]) 56 | My = max(box1[3], box2[3]) 57 | w1 = box1[2] - box1[0] 58 | h1 = box1[3] - box1[1] 59 | w2 = box2[2] - box2[0] 60 | h2 = box2[3] - box2[1] 61 | else: 62 | mx = min(box1[0]-box1[2]/2.0, box2[0]-box2[2]/2.0) 63 | Mx = max(box1[0]+box1[2]/2.0, box2[0]+box2[2]/2.0) 64 | my = min(box1[1]-box1[3]/2.0, box2[1]-box2[3]/2.0) 65 | My = max(box1[1]+box1[3]/2.0, box2[1]+box2[3]/2.0) 66 | w1 = box1[2] 67 | h1 = box1[3] 68 | w2 = box2[2] 69 | h2 = box2[3] 70 | uw = Mx - mx 71 | uh = My - my 72 | cw = w1 + w2 - uw 73 | ch = h1 + h2 - uh 74 | if cw <= 0 or ch <= 0: 75 | return 0.0 76 | 77 | area1 = w1 * h1 78 | area2 = w2 * h2 79 | carea = cw * ch 80 | uarea = area1 + area2 - carea 81 | return carea/uarea 82 | 83 | def bbox_ious(boxes1, boxes2, x1y1x2y2=True): 84 | if x1y1x2y2: 85 | mx = torch.min(boxes1[0], boxes2[0]) 86 | Mx = torch.max(boxes1[2], boxes2[2]) 87 | my = torch.min(boxes1[1], boxes2[1]) 88 | My = torch.max(boxes1[3], boxes2[3]) 89 | w1 = boxes1[2] - boxes1[0] 90 | h1 = boxes1[3] - boxes1[1] 91 | w2 = boxes2[2] - boxes2[0] 92 | h2 = boxes2[3] - boxes2[1] 93 | else: 94 | mx = torch.min(boxes1[0]-boxes1[2]/2.0, boxes2[0]-boxes2[2]/2.0) 95 | Mx = torch.max(boxes1[0]+boxes1[2]/2.0, boxes2[0]+boxes2[2]/2.0) 96 | my = torch.min(boxes1[1]-boxes1[3]/2.0, boxes2[1]-boxes2[3]/2.0) 97 | My = torch.max(boxes1[1]+boxes1[3]/2.0, boxes2[1]+boxes2[3]/2.0) 98 | w1 = boxes1[2] 99 | h1 = boxes1[3] 100 | w2 = boxes2[2] 101 | h2 = boxes2[3] 102 | uw = Mx - mx 103 | uh = My - my 104 | cw = w1 + w2 - uw 105 | ch = h1 + h2 - uh 106 | mask = ((cw <= 0) + (ch <= 0) > 0) 107 | area1 = w1 * h1 108 | area2 = w2 * h2 109 | carea = cw * ch 110 | carea[mask] = 0 111 | uarea = area1 + area2 - carea 112 | return carea/uarea 113 | 114 | def build_targets(pred_boxes, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale, sil_thresh, seen): 115 | nB = target.size(0) 116 | nA = num_anchors 117 | nC = num_classes 118 | anchor_step = len(anchors)/num_anchors 119 | conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale 120 | coord_mask = torch.zeros(nB, nA, nH, nW) 121 | cls_mask = torch.zeros(nB, nA, nH, nW) 122 | tx = torch.zeros(nB, nA, nH, nW) 123 | ty = torch.zeros(nB, nA, nH, nW) 124 | tw = torch.zeros(nB, nA, nH, nW) 125 | th = torch.zeros(nB, nA, nH, nW) 126 | tconf = torch.zeros(nB, nA, nH, nW) 127 | tcls = torch.zeros(nB, nA, nH, nW) 128 | 129 | nAnchors = nA*nH*nW 130 | nPixels = nH*nW 131 | for b in range(nB): 132 | cur_pred_boxes = pred_boxes[b*nAnchors:(b+1)*nAnchors].t() 133 | cur_ious = torch.zeros(nAnchors) 134 | for t in range(50): 135 | if target[b][t*5+1] == 0: 136 | break 137 | gx = target[b][t*5+1]*nW 138 | gy = target[b][t*5+2]*nH 139 | gw = target[b][t*5+3]*nW 140 | gh = target[b][t*5+4]*nH 141 | cur_gt_boxes = torch.FloatTensor([gx,gy,gw,gh]).repeat(nAnchors,1).t() 142 | cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False)) 143 | conf_mask[b][cur_ious>sil_thresh] = 0 144 | if seen < 12800: 145 | if anchor_step == 4: 146 | tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1,nA,1,1).repeat(nB,1,nH,nW) 147 | ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view(1,nA,1,1).repeat(nB,1,nH,nW) 148 | else: 149 | tx.fill_(0.5) 150 | ty.fill_(0.5) 151 | tw.zero_() 152 | th.zero_() 153 | coord_mask.fill_(1) 154 | 155 | nGT = 0 156 | nCorrect = 0 157 | for b in range(nB): 158 | for t in range(50): 159 | if target[b][t*5+1] == 0: 160 | break 161 | nGT = nGT + 1 162 | best_iou = 0.0 163 | best_n = -1 164 | min_dist = 10000 165 | gx = target[b][t*5+1] * nW 166 | gy = target[b][t*5+2] * nH 167 | gi = int(gx) 168 | gj = int(gy) 169 | gw = target[b][t*5+3]*nW 170 | gh = target[b][t*5+4]*nH 171 | gt_box = [0, 0, gw, gh] 172 | for n in range(nA): 173 | aw = anchors[anchor_step*n] 174 | ah = anchors[anchor_step*n+1] 175 | anchor_box = [0, 0, aw, ah] 176 | iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False) 177 | if anchor_step == 4: 178 | ax = anchors[anchor_step*n+2] 179 | ay = anchors[anchor_step*n+3] 180 | dist = pow(((gi+ax) - gx), 2) + pow(((gj+ay) - gy), 2) 181 | if iou > best_iou: 182 | best_iou = iou 183 | best_n = n 184 | elif anchor_step==4 and iou == best_iou and dist < min_dist: 185 | best_iou = iou 186 | best_n = n 187 | min_dist = dist 188 | 189 | gt_box = [gx, gy, gw, gh] 190 | pred_box = pred_boxes[b*nAnchors+best_n*nPixels+gj*nW+gi] 191 | 192 | coord_mask[b][best_n][gj][gi] = 1 193 | cls_mask[b][best_n][gj][gi] = 1 194 | conf_mask[b][best_n][gj][gi] = object_scale 195 | tx[b][best_n][gj][gi] = target[b][t*5+1] * nW - gi 196 | ty[b][best_n][gj][gi] = target[b][t*5+2] * nH - gj 197 | tw[b][best_n][gj][gi] = math.log(gw/anchors[anchor_step*best_n]) 198 | th[b][best_n][gj][gi] = math.log(gh/anchors[anchor_step*best_n+1]) 199 | iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) # best_iou 200 | tconf[b][best_n][gj][gi] = iou 201 | tcls[b][best_n][gj][gi] = target[b][t*5] 202 | if iou > 0.5: 203 | nCorrect = nCorrect + 1 204 | 205 | return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls 206 | 207 | class RegionLoss(nn.Module): 208 | def __init__(self, num_classes=0, anchors=[], num_anchors=1): 209 | super(RegionLoss, self).__init__() 210 | self.num_classes = num_classes 211 | self.anchors = anchors 212 | self.num_anchors = num_anchors 213 | self.anchor_step = len(anchors)/num_anchors 214 | self.coord_scale = 1 215 | self.noobject_scale = 1 216 | self.object_scale = 5 217 | self.class_scale = 1 218 | self.thresh = 0.6 219 | self.seen = 0 220 | 221 | def forward(self, output, target): 222 | #output : BxAs*(4+1+num_classes)*H*W 223 | nB = output.data.size(0) 224 | nA = self.num_anchors 225 | nC = self.num_classes 226 | nH = output.data.size(2) 227 | nW = output.data.size(3) 228 | 229 | output = output.view(nB, nA, (5+nC), nH, nW) 230 | x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW)) 231 | y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW)) 232 | w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW) 233 | h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW) 234 | conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW)) 235 | cls = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda())) 236 | cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC) 237 | 238 | pred_boxes = torch.cuda.FloatTensor(4, nB*nA*nH*nW) 239 | grid_x = torch.linspace(0, nW-1, nW).repeat(nH,1).repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda() 240 | grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda() 241 | anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda() 242 | anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda() 243 | anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH*nW).view(nB*nA*nH*nW) 244 | anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH*nW).view(nB*nA*nH*nW) 245 | pred_boxes[0] = x.data + grid_x 246 | pred_boxes[1] = y.data + grid_y 247 | pred_boxes[2] = torch.exp(w.data) * anchor_w 248 | pred_boxes[3] = torch.exp(h.data) * anchor_h 249 | pred_boxes = convert2cpu(pred_boxes.transpose(0,1).contiguous().view(-1,4)) 250 | 251 | nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf,tcls = build_targets(pred_boxes, target.data, self.anchors, nA, nC, \ 252 | nH, nW, self.noobject_scale, self.object_scale, self.thresh, self.seen) 253 | cls_mask = (cls_mask == 1) 254 | nProposals = int((conf > 0.25).sum().data[0]) 255 | 256 | tx = Variable(tx.cuda()) 257 | ty = Variable(ty.cuda()) 258 | tw = Variable(tw.cuda()) 259 | th = Variable(th.cuda()) 260 | tconf = Variable(tconf.cuda()) 261 | tcls = Variable(tcls.view(-1)[cls_mask].long().cuda()) 262 | 263 | coord_mask = Variable(coord_mask.cuda()) 264 | conf_mask = Variable(conf_mask.cuda().sqrt()) 265 | cls_mask = Variable(cls_mask.view(-1, 1).repeat(1,nC).cuda()) 266 | cls = cls[cls_mask].view(-1, nC) 267 | 268 | loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x*coord_mask, tx*coord_mask)/2.0 269 | loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y*coord_mask, ty*coord_mask)/2.0 270 | loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w*coord_mask, tw*coord_mask)/2.0 271 | loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h*coord_mask, th*coord_mask)/2.0 272 | loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2.0 273 | loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls) 274 | loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls 275 | 276 | return loss 277 | 278 | 279 | class TinyYoloNet(nn.Module): 280 | def __init__(self): 281 | super(TinyYoloNet, self).__init__() 282 | self.seen = 0 283 | self.num_classes = 20 284 | self.anchors = [1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52] 285 | self.num_anchors = len(self.anchors)/2 286 | self.num_output = (5+self.num_classes)*self.num_anchors 287 | 288 | self.loss = RegionLoss(self.num_classes, self.anchors, self.num_anchors) 289 | self.cnn = nn.Sequential(OrderedDict([ 290 | # conv1 291 | ('conv1', nn.Conv2d(3, 16, 3, 1, 1, bias=False)), 292 | ('bn1', nn.BatchNorm2d(16)), 293 | ('leaky1', nn.LeakyReLU(0.1, inplace=True)), 294 | ('pool1', nn.MaxPool2d(2, 2)), 295 | 296 | # conv2 297 | ('conv2', nn.Conv2d(16, 32, 3, 1, 1, bias=False)), 298 | ('bn2', nn.BatchNorm2d(32)), 299 | ('leaky2', nn.LeakyReLU(0.1, inplace=True)), 300 | ('pool2', nn.MaxPool2d(2, 2)), 301 | 302 | # conv3 303 | ('conv3', nn.Conv2d(32, 64, 3, 1, 1, bias=False)), 304 | ('bn3', nn.BatchNorm2d(64)), 305 | ('leaky3', nn.LeakyReLU(0.1, inplace=True)), 306 | ('pool3', nn.MaxPool2d(2, 2)), 307 | 308 | # conv4 309 | ('conv4', nn.Conv2d(64, 128, 3, 1, 1, bias=False)), 310 | ('bn4', nn.BatchNorm2d(128)), 311 | ('leaky4', nn.LeakyReLU(0.1, inplace=True)), 312 | ('pool4', nn.MaxPool2d(2, 2)), 313 | 314 | # conv5 315 | ('conv5', nn.Conv2d(128, 256, 3, 1, 1, bias=False)), 316 | ('bn5', nn.BatchNorm2d(256)), 317 | ('leaky5', nn.LeakyReLU(0.1, inplace=True)), 318 | ('pool5', nn.MaxPool2d(2, 2)), 319 | 320 | # conv6 321 | ('conv6', nn.Conv2d(256, 512, 3, 1, 1, bias=False)), 322 | ('bn6', nn.BatchNorm2d(512)), 323 | ('leaky6', nn.LeakyReLU(0.1, inplace=True)), 324 | ('pool6', MaxPoolStride1()), 325 | 326 | # conv7 327 | ('conv7', nn.Conv2d(512, 1024, 3, 1, 1, bias=False)), 328 | ('bn7', nn.BatchNorm2d(1024)), 329 | ('leaky7', nn.LeakyReLU(0.1, inplace=True)), 330 | 331 | # conv8 332 | ('conv8', nn.Conv2d(1024, 1024, 3, 1, 1, bias=False)), 333 | ('bn8', nn.BatchNorm2d(1024)), 334 | ('leaky8', nn.LeakyReLU(0.1, inplace=True)), 335 | 336 | # output 337 | ('output', nn.Conv2d(1024, int(self.num_output), 1, 1, 0)), 338 | ])) 339 | 340 | def forward(self, x, server=True, partition=0): 341 | if server == True: 342 | if partition == 0: 343 | x = self.cnn(x) 344 | else: 345 | x = self.cnn[partition:](x) 346 | else: 347 | if partition == 0: 348 | x = x 349 | else: 350 | x = self.cnn[0:partition](x) 351 | return x 352 | 353 | def print_network(self): 354 | print(self) 355 | 356 | def load_weights(self, path): 357 | 358 | buf = np.fromfile(path, dtype = np.float32) 359 | start = 4 360 | #print(buf.shape) 361 | start = load_conv_bn(buf, start, self.cnn[0], self.cnn[1], ) 362 | start = load_conv_bn(buf, start, self.cnn[4], self.cnn[5]) 363 | start = load_conv_bn(buf, start, self.cnn[8], self.cnn[9]) 364 | start = load_conv_bn(buf, start, self.cnn[12], self.cnn[13]) 365 | start = load_conv_bn(buf, start, self.cnn[16], self.cnn[17]) 366 | start = load_conv_bn(buf, start, self.cnn[20], self.cnn[21]) 367 | 368 | start = load_conv_bn(buf, start, self.cnn[24], self.cnn[25]) 369 | start = load_conv_bn(buf, start, self.cnn[27], self.cnn[28]) 370 | start = load_conv(buf, start, self.cnn[30]) 371 | 372 | def tinyYolo(): 373 | m = TinyYoloNet() 374 | m.float() 375 | m.load_weights('C:/Users/zhang/PycharmProjects/WorkBoard/partirion_inference/models/yolov2-tiny-voc.weights') 376 | return m 377 | 378 | 379 | if __name__ == '__main__': 380 | from PIL import Image 381 | from utils import * 382 | 383 | m = tinyYolo() 384 | m.eval() 385 | # print(m) 386 | 387 | use_cuda = 1 388 | if use_cuda: 389 | m.cuda() 390 | 391 | img = Image.open('Golden_Retriever_Hund_Dog.jpg').convert('RGB') 392 | sized = img.resize((416, 416)) 393 | 394 | boxes = do_detect(m, sized, 0.5, 0.5, use_cuda) 395 | 396 | class_names = load_class_names('voc.names') 397 | plot_boxes(img, boxes, 'predict1.jpg', class_names) 398 | 399 | -------------------------------------------------------------------------------- /models/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | import math 5 | import torch 6 | import numpy as np 7 | from PIL import Image, ImageDraw, ImageFont 8 | from torch.autograd import Variable 9 | 10 | import struct 11 | import imghdr 12 | 13 | def sigmoid(x): 14 | return 1.0/(math.exp(-x)+1.) 15 | 16 | def softmax(x): 17 | x = torch.exp(x - torch.max(x)) 18 | x = x/x.sum() 19 | return x 20 | 21 | 22 | def bbox_iou(box1, box2, x1y1x2y2=True): 23 | if x1y1x2y2: 24 | mx = min(box1[0], box2[0]) 25 | Mx = max(box1[2], box2[2]) 26 | my = min(box1[1], box2[1]) 27 | My = max(box1[3], box2[3]) 28 | w1 = box1[2] - box1[0] 29 | h1 = box1[3] - box1[1] 30 | w2 = box2[2] - box2[0] 31 | h2 = box2[3] - box2[1] 32 | else: 33 | mx = min(box1[0]-box1[2]/2.0, box2[0]-box2[2]/2.0) 34 | Mx = max(box1[0]+box1[2]/2.0, box2[0]+box2[2]/2.0) 35 | my = min(box1[1]-box1[3]/2.0, box2[1]-box2[3]/2.0) 36 | My = max(box1[1]+box1[3]/2.0, box2[1]+box2[3]/2.0) 37 | w1 = box1[2] 38 | h1 = box1[3] 39 | w2 = box2[2] 40 | h2 = box2[3] 41 | uw = Mx - mx 42 | uh = My - my 43 | cw = w1 + w2 - uw 44 | ch = h1 + h2 - uh 45 | carea = 0 46 | if cw <= 0 or ch <= 0: 47 | return 0.0 48 | 49 | area1 = w1 * h1 50 | area2 = w2 * h2 51 | carea = cw * ch 52 | uarea = area1 + area2 - carea 53 | return carea/uarea 54 | 55 | def bbox_ious(boxes1, boxes2, x1y1x2y2=True): 56 | if x1y1x2y2: 57 | mx = torch.min(boxes1[0], boxes2[0]) 58 | Mx = torch.max(boxes1[2], boxes2[2]) 59 | my = torch.min(boxes1[1], boxes2[1]) 60 | My = torch.max(boxes1[3], boxes2[3]) 61 | w1 = boxes1[2] - boxes1[0] 62 | h1 = boxes1[3] - boxes1[1] 63 | w2 = boxes2[2] - boxes2[0] 64 | h2 = boxes2[3] - boxes2[1] 65 | else: 66 | mx = torch.min(boxes1[0]-boxes1[2]/2.0, boxes2[0]-boxes2[2]/2.0) 67 | Mx = torch.max(boxes1[0]+boxes1[2]/2.0, boxes2[0]+boxes2[2]/2.0) 68 | my = torch.min(boxes1[1]-boxes1[3]/2.0, boxes2[1]-boxes2[3]/2.0) 69 | My = torch.max(boxes1[1]+boxes1[3]/2.0, boxes2[1]+boxes2[3]/2.0) 70 | w1 = boxes1[2] 71 | h1 = boxes1[3] 72 | w2 = boxes2[2] 73 | h2 = boxes2[3] 74 | uw = Mx - mx 75 | uh = My - my 76 | cw = w1 + w2 - uw 77 | ch = h1 + h2 - uh 78 | mask = ((cw <= 0) + (ch <= 0) > 0) 79 | area1 = w1 * h1 80 | area2 = w2 * h2 81 | carea = cw * ch 82 | carea[mask] = 0 83 | uarea = area1 + area2 - carea 84 | return carea/uarea 85 | 86 | def nms(boxes, nms_thresh): 87 | if len(boxes) == 0: 88 | return boxes 89 | 90 | det_confs = torch.zeros(len(boxes)) 91 | for i in range(len(boxes)): 92 | det_confs[i] = 1-boxes[i][4] 93 | 94 | _,sortIds = torch.sort(det_confs) 95 | out_boxes = [] 96 | for i in range(len(boxes)): 97 | box_i = boxes[sortIds[i]] 98 | if box_i[4] > 0: 99 | out_boxes.append(box_i) 100 | for j in range(i+1, len(boxes)): 101 | box_j = boxes[sortIds[j]] 102 | if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh: 103 | #print(box_i, box_j, bbox_iou(box_i, box_j, x1y1x2y2=False)) 104 | box_j[4] = 0 105 | return out_boxes 106 | 107 | def convert2cpu(gpu_matrix): 108 | return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix) 109 | 110 | def convert2cpu_long(gpu_matrix): 111 | return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix) 112 | 113 | def get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False): 114 | num_anchors = int(num_anchors) 115 | anchor_step = int(len(anchors)/num_anchors) 116 | if output.dim() == 3: 117 | output = output.unsqueeze(0) 118 | batch = output.size(0) 119 | assert(output.size(1) == (5+num_classes)*num_anchors) 120 | h = output.size(2) 121 | w = output.size(3) 122 | 123 | t0 = time.time() 124 | all_boxes = [] 125 | output = output.view(int(batch*num_anchors), int(5+num_classes), int(h*w)).transpose(0, 1).contiguous().view(int(5+num_classes), int(batch*num_anchors*h*w)) 126 | 127 | grid_x = torch.linspace(0, w-1, w).repeat(h,1).repeat(int(batch*num_anchors), 1, 1).view(int(batch*num_anchors*h*w)).cuda() 128 | grid_y = torch.linspace(0, h-1, h).repeat(w,1).t().repeat(int(batch*num_anchors), 1, 1).view(int(batch*num_anchors*h*w)).cuda() 129 | xs = torch.sigmoid(output[0]) + grid_x 130 | ys = torch.sigmoid(output[1]) + grid_y 131 | 132 | anchor_w = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([0])) 133 | anchor_h = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([1])) 134 | anchor_w = anchor_w.repeat(batch, 1).repeat(1, 1, h*w).view(batch*num_anchors*h*w).cuda() 135 | anchor_h = anchor_h.repeat(batch, 1).repeat(1, 1, h*w).view(batch*num_anchors*h*w).cuda() 136 | ws = torch.exp(output[2]) * anchor_w 137 | hs = torch.exp(output[3]) * anchor_h 138 | 139 | det_confs = torch.sigmoid(output[4]) 140 | 141 | cls_confs = torch.nn.Softmax()(Variable(output[5:5+num_classes].transpose(0, 1))).data 142 | cls_max_confs, cls_max_ids = torch.max(cls_confs, 1) 143 | cls_max_confs = cls_max_confs.view(-1) 144 | cls_max_ids = cls_max_ids.view(-1) 145 | t1 = time.time() 146 | 147 | sz_hw = h*w 148 | sz_hwa = sz_hw*num_anchors 149 | det_confs = convert2cpu(det_confs) 150 | cls_max_confs = convert2cpu(cls_max_confs) 151 | cls_max_ids = convert2cpu_long(cls_max_ids) 152 | xs = convert2cpu(xs) 153 | ys = convert2cpu(ys) 154 | ws = convert2cpu(ws) 155 | hs = convert2cpu(hs) 156 | if validation: 157 | cls_confs = convert2cpu(cls_confs.view(-1, num_classes)) 158 | t2 = time.time() 159 | for b in range(batch): 160 | boxes = [] 161 | for cy in range(h): 162 | for cx in range(w): 163 | for i in range(num_anchors): 164 | ind = b*sz_hwa + i*sz_hw + cy*w + cx 165 | det_conf = det_confs[ind] 166 | if only_objectness: 167 | conf = det_confs[ind] 168 | else: 169 | conf = det_confs[ind] * cls_max_confs[ind] 170 | 171 | if conf > conf_thresh: 172 | bcx = xs[ind] 173 | bcy = ys[ind] 174 | bw = ws[ind] 175 | bh = hs[ind] 176 | cls_max_conf = cls_max_confs[ind] 177 | cls_max_id = cls_max_ids[ind] 178 | box = [bcx/w, bcy/h, bw/w, bh/h, det_conf, cls_max_conf, cls_max_id] 179 | if (not only_objectness) and validation: 180 | for c in range(num_classes): 181 | tmp_conf = cls_confs[ind][c] 182 | if c != cls_max_id and det_confs[ind]*tmp_conf > conf_thresh: 183 | box.append(tmp_conf) 184 | box.append(c) 185 | boxes.append(box) 186 | all_boxes.append(boxes) 187 | t3 = time.time() 188 | if False: 189 | print('---------------------------------') 190 | print('matrix computation : %f' % (t1-t0)) 191 | print(' gpu to cpu : %f' % (t2-t1)) 192 | print(' boxes filter : %f' % (t3-t2)) 193 | print('---------------------------------') 194 | return all_boxes 195 | 196 | def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None): 197 | import cv2 198 | colors = torch.FloatTensor([[1,0,1],[0,0,1],[0,1,1],[0,1,0],[1,1,0],[1,0,0]]); 199 | def get_color(c, x, max_val): 200 | ratio = float(x)/max_val * 5 201 | i = int(math.floor(ratio)) 202 | j = int(math.ceil(ratio)) 203 | ratio = ratio - i 204 | r = (1-ratio) * colors[i][c] + ratio*colors[j][c] 205 | return int(r*255) 206 | 207 | width = img.shape[1] 208 | height = img.shape[0] 209 | for i in range(len(boxes)): 210 | box = boxes[i] 211 | x1 = int(((box[0] - box[2]/2.0) * width)) 212 | y1 = int(((box[1] - box[3]/2.0) * height)) 213 | x2 = int(((box[0] + box[2]/2.0) * width)) 214 | y2 = int(((box[1] + box[3]/2.0) * height)) 215 | 216 | if color: 217 | rgb = color 218 | else: 219 | rgb = (255, 0, 0) 220 | if len(box) >= 7 and class_names: 221 | cls_conf = box[5] 222 | cls_id = box[6] 223 | print('%s: %f' % (class_names[cls_id], cls_conf)) 224 | classes = len(class_names) 225 | offset = cls_id * 123457 % classes 226 | red = get_color(2, offset, classes) 227 | green = get_color(1, offset, classes) 228 | blue = get_color(0, offset, classes) 229 | if color is None: 230 | rgb = (red, green, blue) 231 | img = cv2.putText(img, class_names[cls_id], (x1,y1), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 240), 2) 232 | #img = cv2.putText(img, class_names[cls_id], (x1,y1), cv2.FONT_HERSHEY_SIMPLEX, 2, rgb, 2) 233 | #img = cv2.rectangle(img, (x1,y1), (x2,y2), rgb, 2) 234 | img = cv2.rectangle(img, (x1,y1), (x2,y2), (0, 0, 240), 2) 235 | if savename: 236 | print("save plot results to %s" % savename) 237 | cv2.imwrite(savename, img) 238 | return img 239 | 240 | def plot_boxes(img, boxes, savename=None, class_names=None): 241 | colors = torch.FloatTensor([[1,0,1],[0,0,1],[0,1,1],[0,1,0],[1,1,0],[1,0,0]]); 242 | def get_color(c, x, max_val): 243 | ratio = float(x)/max_val * 5 244 | i = int(math.floor(ratio)) 245 | j = int(math.ceil(ratio)) 246 | ratio = ratio - i 247 | r = (1-ratio) * colors[i][c] + ratio*colors[j][c] 248 | return int(r*255) 249 | 250 | width = img.width 251 | height = img.height 252 | draw = ImageDraw.Draw(img) 253 | for i in range(len(boxes)): 254 | box = boxes[i] 255 | x1 = (box[0] - box[2]/2.0) * width 256 | y1 = (box[1] - box[3]/2.0) * height 257 | x2 = (box[0] + box[2]/2.0) * width 258 | y2 = (box[1] + box[3]/2.0) * height 259 | 260 | rgb = (255, 0, 0) 261 | if len(box) >= 7 and class_names: 262 | cls_conf = box[5] 263 | cls_id = box[6] 264 | print('%s: %f' % (class_names[cls_id], cls_conf)) 265 | classes = len(class_names) 266 | offset = cls_id * 123457 % classes 267 | red = get_color(2, offset, classes) 268 | green = get_color(1, offset, classes) 269 | blue = get_color(0, offset, classes) 270 | rgb = (red, green, blue) 271 | draw.text((x1, y1), class_names[cls_id], fill=rgb) 272 | draw.rectangle([x1, y1, x2, y2], outline = rgb) 273 | if savename: 274 | print("save plot results to %s" % savename) 275 | img.save(savename) 276 | return img 277 | 278 | def read_truths(lab_path): 279 | if not os.path.exists(lab_path): 280 | return np.array([]) 281 | if os.path.getsize(lab_path): 282 | truths = np.loadtxt(lab_path) 283 | truths = truths.reshape(truths.size/5, 5) # to avoid single truth problem 284 | return truths 285 | else: 286 | return np.array([]) 287 | 288 | def read_truths_args(lab_path, min_box_scale): 289 | truths = read_truths(lab_path) 290 | new_truths = [] 291 | for i in range(truths.shape[0]): 292 | if truths[i][3] < min_box_scale: 293 | continue 294 | new_truths.append([truths[i][0], truths[i][1], truths[i][2], truths[i][3], truths[i][4]]) 295 | return np.array(new_truths) 296 | 297 | def load_class_names(namesfile): 298 | class_names = [] 299 | with open(namesfile, 'r') as fp: 300 | lines = fp.readlines() 301 | for line in lines: 302 | line = line.rstrip() 303 | class_names.append(line) 304 | return class_names 305 | 306 | def image2torch(img): 307 | width = img.width 308 | height = img.height 309 | img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())) 310 | img = img.view(height, width, 3).transpose(0,1).transpose(0,2).contiguous() 311 | img = img.view(1, 3, height, width) 312 | img = img.float().div(255.0) 313 | return img 314 | 315 | def partirion_output(model, img, action): 316 | model.eval() 317 | img = torch.from_numpy(img.transpose(2,0,1)).float().div(255.0).unsqueeze(0) 318 | img = Variable(img) 319 | with torch.no_grad(): 320 | output = model(img.cuda(), mode=action, client=True) 321 | output = output.data 322 | del img 323 | return output 324 | 325 | def get_boxes(res, model, conf_thresh, nms_thresh): 326 | 327 | boxes = get_region_boxes(res, conf_thresh, model.num_classes, model.anchors, model.num_anchors)[0] 328 | 329 | boxes = nms(boxes, nms_thresh) 330 | 331 | return boxes 332 | 333 | def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1): 334 | model.eval() 335 | t0 = time.time() 336 | 337 | if isinstance(img, Image.Image): 338 | width = img.width 339 | height = img.height 340 | img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())) 341 | img = img.view(height, width, 3).transpose(0,1).transpose(0,2).contiguous() 342 | img = img.view(1, 3, height, width) 343 | img = img.float().div(255.0) 344 | elif type(img) == np.ndarray: # cv2 image 345 | img = torch.from_numpy(img.transpose(2,0,1)).float().div(255.0).unsqueeze(0) 346 | else: 347 | print("unknow image type") 348 | exit(-1) 349 | 350 | t1 = time.time() 351 | 352 | if use_cuda: 353 | img = img.cuda() 354 | img = torch.autograd.Variable(img) 355 | t2 = time.time() 356 | 357 | output = model(img) 358 | output = output.data 359 | #for j in range(100): 360 | # sys.stdout.write('%f ' % (output.storage()[j])) 361 | #print('') 362 | t3 = time.time() 363 | 364 | boxes = get_region_boxes(output, conf_thresh, model.num_classes, model.anchors, model.num_anchors)[0] 365 | #for j in range(len(boxes)): 366 | # print(boxes[j]) 367 | t4 = time.time() 368 | 369 | boxes = nms(boxes, nms_thresh) 370 | t5 = time.time() 371 | 372 | if False: 373 | print('-----------------------------------') 374 | print(' image to tensor : %f' % (t1 - t0)) 375 | print(' tensor to cuda : %f' % (t2 - t1)) 376 | print(' predict : %f' % (t3 - t2)) 377 | print('get_region_boxes : %f' % (t4 - t3)) 378 | print(' nms : %f' % (t5 - t4)) 379 | print(' total : %f' % (t5 - t0)) 380 | print('-----------------------------------') 381 | return boxes 382 | 383 | def read_data_cfg(datacfg): 384 | options = dict() 385 | options['gpus'] = '0,1,2,3' 386 | options['num_workers'] = '10' 387 | with open(datacfg, 'r') as fp: 388 | lines = fp.readlines() 389 | 390 | for line in lines: 391 | line = line.strip() 392 | if line == '': 393 | continue 394 | key,value = line.split('=') 395 | key = key.strip() 396 | value = value.strip() 397 | options[key] = value 398 | return options 399 | 400 | def scale_bboxes(bboxes, width, height): 401 | import copy 402 | dets = copy.deepcopy(bboxes) 403 | for i in range(len(dets)): 404 | dets[i][0] = dets[i][0] * width 405 | dets[i][1] = dets[i][1] * height 406 | dets[i][2] = dets[i][2] * width 407 | dets[i][3] = dets[i][3] * height 408 | return dets 409 | 410 | def file_lines(thefilepath): 411 | count = 0 412 | thefile = open(thefilepath, 'rb') 413 | while True: 414 | buffer = thefile.read(8192*1024) 415 | if not buffer: 416 | break 417 | count += buffer.count('\n') 418 | thefile.close( ) 419 | return count 420 | 421 | def get_image_size(fname): 422 | '''Determine the image type of fhandle and return its size. 423 | from draco''' 424 | with open(fname, 'rb') as fhandle: 425 | head = fhandle.read(24) 426 | if len(head) != 24: 427 | return 428 | if imghdr.what(fname) == 'png': 429 | check = struct.unpack('>i', head[4:8])[0] 430 | if check != 0x0d0a1a0a: 431 | return 432 | width, height = struct.unpack('>ii', head[16:24]) 433 | elif imghdr.what(fname) == 'gif': 434 | width, height = struct.unpack('H', fhandle.read(2))[0] - 2 447 | # We are at a SOFn block 448 | fhandle.seek(1, 1) # Skip `precision' byte. 449 | height, width = struct.unpack('>HH', fhandle.read(4)) 450 | except Exception: #IGNORE:W0703 451 | return 452 | else: 453 | return 454 | return width, height 455 | 456 | def logging(message): 457 | print('%s %s' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message)) 458 | -------------------------------------------------------------------------------- /models/vgg16.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | __all__ = ['Vgg16', 'vgg16'] 5 | 6 | class Vgg16(nn.Module): 7 | def __init__(self, num_classes=1000, init_weights=True): 8 | super(Vgg16, self).__init__() 9 | self.features = nn.Sequential( 10 | nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 0 11 | nn.ReLU(inplace=True), # 1 12 | nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 2 13 | nn.ReLU(inplace=True), # 30 14 | nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), # 4 15 | nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 5 16 | nn.ReLU(inplace=True), # 6 17 | nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 7 18 | nn.ReLU(inplace=True), # 8 19 | nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), # 9 20 | nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 10 21 | nn.ReLU(inplace=True), # 11 22 | nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 12 23 | nn.ReLU(inplace=True), # 13 24 | nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 14 25 | nn.ReLU(inplace=True), # 15 26 | nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), # 16 27 | nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 17 28 | nn.ReLU(inplace=True), # 18 29 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 19 30 | nn.ReLU(inplace=True), # 20 31 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 21 32 | nn.ReLU(inplace=True), # 22 33 | nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), # 23 34 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 24 35 | nn.ReLU(inplace=True), # 25 36 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 26 37 | nn.ReLU(inplace=True), # 27 38 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), # 28 39 | nn.ReLU(inplace=True), # 29 40 | nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), # 30 41 | ) 42 | self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) # 18 43 | # x = torch.flatten(x, 1) 44 | self.classifier = nn.Sequential( 45 | nn.Linear(512 * 7 * 7, 4096), # 19 46 | nn.ReLU(True), 47 | nn.Dropout(), 48 | nn.Linear(4096, 4096), # 20 49 | nn.ReLU(True), 50 | nn.Dropout(), 51 | nn.Linear(4096, num_classes), # 21 52 | ) 53 | 54 | if init_weights: 55 | self._initialize_weights() 56 | 57 | def forward(self, x, server=True, partition=0): 58 | if server == True: 59 | if partition == 0: 60 | x = self.features(x) 61 | x = self.avgpool(x) 62 | x = torch.flatten(x, 1) 63 | x = self.classifier(x) 64 | elif partition == 1: 65 | x = self.features[2:](x) 66 | x = self.avgpool(x) 67 | x = torch.flatten(x, 1) 68 | x = self.classifier(x) 69 | elif partition == 2: 70 | x = self.features[4:](x) 71 | x = self.avgpool(x) 72 | x = torch.flatten(x, 1) 73 | x = self.classifier(x) 74 | elif partition == 3: 75 | x = self.features[5:](x) 76 | x = self.avgpool(x) 77 | x = torch.flatten(x, 1) 78 | x = self.classifier(x) 79 | elif partition == 4: 80 | x = self.features[7:](x) 81 | x = self.avgpool(x) 82 | x = torch.flatten(x, 1) 83 | x = self.classifier(x) 84 | elif partition == 5: 85 | x = self.features[9:](x) 86 | x = self.avgpool(x) 87 | x = torch.flatten(x, 1) 88 | x = self.classifier(x) 89 | elif partition == 6: 90 | x = self.features[10:](x) 91 | x = self.avgpool(x) 92 | x = torch.flatten(x, 1) 93 | x = self.classifier(x) 94 | elif partition == 7: 95 | x = self.features[12:](x) 96 | x = self.avgpool(x) 97 | x = torch.flatten(x, 1) 98 | x = self.classifier(x) 99 | elif partition == 8: 100 | x = self.features[14:](x) 101 | x = self.avgpool(x) 102 | x = torch.flatten(x, 1) 103 | x = self.classifier(x) 104 | elif partition == 9: 105 | x = self.features[16:](x) 106 | x = self.avgpool(x) 107 | x = torch.flatten(x, 1) 108 | x = self.classifier(x) 109 | elif partition == 10: 110 | x = self.features[17:](x) 111 | x = self.avgpool(x) 112 | x = torch.flatten(x, 1) 113 | x = self.classifier(x) 114 | elif partition == 11: 115 | x = self.features[19:](x) 116 | x = self.avgpool(x) 117 | x = torch.flatten(x, 1) 118 | x = self.classifier(x) 119 | elif partition == 12: 120 | x = self.features[21:](x) 121 | x = self.avgpool(x) 122 | x = torch.flatten(x, 1) 123 | x = self.classifier(x) 124 | elif partition == 13: 125 | x = self.features[23:](x) 126 | x = self.avgpool(x) 127 | x = torch.flatten(x, 1) 128 | x = self.classifier(x) 129 | elif partition == 14: 130 | x = self.features[24:](x) 131 | x = self.avgpool(x) 132 | x = torch.flatten(x, 1) 133 | x = self.classifier(x) 134 | elif partition == 15: 135 | x = self.features[26:](x) 136 | x = self.avgpool(x) 137 | x = torch.flatten(x, 1) 138 | x = self.classifier(x) 139 | elif partition == 16: 140 | x = self.features[28:](x) 141 | x = self.avgpool(x) 142 | x = torch.flatten(x, 1) 143 | x = self.classifier(x) 144 | elif partition == 17: 145 | x = self.features[30:](x) 146 | x = self.avgpool(x) 147 | x = torch.flatten(x, 1) 148 | x = self.classifier(x) 149 | elif partition == 18: 150 | x = self.avgpool(x) 151 | x = torch.flatten(x, 1) 152 | x = self.classifier(x) 153 | elif partition == 19: 154 | x = self.classifier(x) 155 | elif partition == 20: 156 | x = self.classifier[3:](x) 157 | elif partition == 21: 158 | x = self.classifier[6:](x) 159 | elif partition == 22: 160 | x = x 161 | else: 162 | print('Please give the right partition point.') 163 | else: 164 | if partition == 0: 165 | x = x 166 | elif partition == 1: 167 | x = self.features[0:2](x) 168 | elif partition == 2: 169 | x = self.features[0:4](x) 170 | elif partition == 3: 171 | x = self.features[0:5](x) 172 | elif partition == 4: 173 | x = self.features[0:7](x) 174 | elif partition == 5: 175 | x = self.features[0:9](x) 176 | elif partition == 6: 177 | x = self.features[0:10](x) 178 | elif partition == 7: 179 | x = self.features[0:12](x) 180 | elif partition == 8: 181 | x = self.features[0:14](x) 182 | elif partition == 9: 183 | x = self.features[0:16](x) 184 | elif partition == 10: 185 | x = self.features[0:17](x) 186 | elif partition == 11: 187 | x = self.features[0:19](x) 188 | elif partition == 12: 189 | x = self.features[0:21](x) 190 | elif partition == 13: 191 | x = self.features[0:23](x) 192 | elif partition == 14: 193 | x = self.features[0:24](x) 194 | elif partition == 15: 195 | x = self.features[0:26](x) 196 | elif partition == 16: 197 | x = self.features[0:28](x) 198 | elif partition == 17: 199 | x = self.features[0:30](x) 200 | elif partition == 18: 201 | x = self.features(x) 202 | elif partition == 19: 203 | x = self.features(x) 204 | x = self.avgpool(x) 205 | x = torch.flatten(x, 1) 206 | elif partition == 20: 207 | x = self.features(x) 208 | x = self.avgpool(x) 209 | x = torch.flatten(x, 1) 210 | x = self.classifier[0:3](x) 211 | elif partition == 21: 212 | x = self.features(x) 213 | x = self.avgpool(x) 214 | x = torch.flatten(x, 1) 215 | x = self.classifier[0:6](x) 216 | else: 217 | x = self.features(x) 218 | x = self.avgpool(x) 219 | x = torch.flatten(x, 1) 220 | x = self.classifier(x) 221 | return x 222 | 223 | def _initialize_weights(self): 224 | for m in self.modules(): 225 | if isinstance(m, nn.Conv2d): 226 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 227 | if m.bias is not None: 228 | nn.init.constant_(m.bias, 0) 229 | elif isinstance(m, nn.BatchNorm2d): 230 | nn.init.constant_(m.weight, 1) 231 | nn.init.constant_(m.bias, 0) 232 | elif isinstance(m, nn.Linear): 233 | nn.init.normal_(m.weight, 0, 0.01) 234 | nn.init.constant_(m.bias, 0) 235 | 236 | 237 | def vgg16(num_classes=1000, pretrained=True, progress=True): 238 | file = 'https://download.pytorch.org/models/vgg16-397923af.pth' 239 | model = Vgg16(num_classes) 240 | if pretrained: 241 | state_dict = torch.hub.load_state_dict_from_url(file, progress=progress) 242 | model.load_state_dict(state_dict) 243 | 244 | return model 245 | 246 | if __name__ == '__main__': 247 | print('test partition points in vgg16!!!') 248 | 249 | import json 250 | import torchvision.transforms as transforms 251 | from PIL import Image 252 | 253 | with open("imagenet_class_index.json", "r") as read_file: 254 | class_idx = json.load(read_file) 255 | labels = {int(key): value for key, value in class_idx.items()} 256 | 257 | model = vgg16() 258 | model.eval() 259 | if torch.cuda.is_available(): 260 | model.cuda() 261 | 262 | min_img_size = 224 263 | transform_pipeline = transforms.Compose([transforms.Resize((min_img_size, min_img_size)), 264 | transforms.ToTensor(), 265 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 266 | std=[0.229, 0.224, 0.225])]) 267 | 268 | img = Image.open('Golden_Retriever_Hund_Dog.jpg') 269 | img = transform_pipeline(img) 270 | img = img.unsqueeze(0) 271 | 272 | for partition in range(23): 273 | with torch.no_grad(): 274 | intermediate = model(img.cuda(), server=False, partition=partition) 275 | prediction = model(intermediate, server=True, partition=partition) 276 | 277 | prediction = torch.argmax(prediction) 278 | 279 | print('partition point ', partition, labels[prediction.item()]) 280 | -------------------------------------------------------------------------------- /models/vgg16FrontEndDelay.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/letian-zhang/ANS/6895eedc0f66c46fed87658d4dc0d34b432a0ec1/models/vgg16FrontEndDelay.pkl -------------------------------------------------------------------------------- /models/voc.names: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor 21 | -------------------------------------------------------------------------------- /models/yoloFrontEndDelay.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/letian-zhang/ANS/6895eedc0f66c46fed87658d4dc0d34b432a0ec1/models/yoloFrontEndDelay.pkl -------------------------------------------------------------------------------- /muLinUCB.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def fillThetaContext(layerInfo, theta_context_dim): 4 | Action_num = len(layerInfo) 5 | x_theta = np.zeros((theta_context_dim, Action_num)) 6 | actionList = [] 7 | for i in range(Action_num): 8 | x_theta[0][i] = layerInfo[i][3] 9 | x_theta[1][i] = layerInfo[i][0] 10 | 11 | x_theta[2][i] = layerInfo[i][4] 12 | x_theta[3][i] = layerInfo[i][1] 13 | 14 | x_theta[4][i] = layerInfo[i][5] 15 | x_theta[5][i] = layerInfo[i][2] 16 | 17 | x_theta[6][i] = layerInfo[i][6] 18 | actionList.append(layerInfo[i][7]) 19 | return x_theta, actionList 20 | 21 | def getCx(x_theta, Action_num): 22 | listC_x = [] 23 | for i in range(Action_num): 24 | temp = np.sqrt(np.matmul(x_theta[:, [i]].T, x_theta[:, [i]])) 25 | listC_x.append(temp[0][0]) 26 | Cx = pow(max(listC_x), 2) 27 | return Cx 28 | 29 | class muLinUCB(): 30 | def __init__(self, mu, layerInfo, frontDelay): 31 | self.mu = mu 32 | self.numOfAction = len(layerInfo) 33 | self.thetaContextDim = 7 34 | self.x_theta, self.actionList = fillThetaContext(layerInfo, self.thetaContextDim) 35 | self.C_x = getCx(self.x_theta, self.numOfAction) 36 | self.frontDelay = frontDelay 37 | 38 | self.frameNum = 200 39 | self.delta = 0.1 40 | self.C_noise = 0.05 41 | self.l_key = 0.8 42 | self.l_nonkey = 0.2 43 | self.C_theta = 1 44 | self.A = np.diag(np.random.randint(1, 9, size=self.thetaContextDim)) 45 | self.b = np.zeros((self.thetaContextDim, 1)) 46 | self.alpha = (self.C_theta + np.sqrt(np.log((1 + self.frameNum * self.C_x * self.C_x)/self.delta) * self.thetaContextDim)*self.C_noise)/(1 - self.l_key) 47 | 48 | self.forceSamplingRate = 0.25 49 | self.forceSampleFrame = np.ceil(np.power(self.frameNum, self.forceSamplingRate)) 50 | print('forceSampleFrame:', self.forceSampleFrame) 51 | 52 | def updateDoublingTrickFrameNum(self, current_frame): 53 | if current_frame > self.frameNum: 54 | self.frameNum = self.frameNum * 2 55 | self.alpha = (self.C_theta + np.sqrt(np.log((1 + self.frameNum * self.C_x * self.C_x) / self.delta) * self.thetaContextDim) * self.C_noise) / (1 - self.l_key) 56 | self.forceSampleFrame = np.ceil(np.power(self.frameNum, self.forceSamplingRate)) 57 | return True 58 | return False 59 | 60 | def getEstimationAction(self, key_frame, current_frame): 61 | A_inv = np.linalg.inv(self.A) 62 | theta = np.matmul(A_inv, self.b) 63 | 64 | if key_frame: 65 | L = self.l_key 66 | else: 67 | L = self.l_nonkey 68 | 69 | estimate_delay = [] 70 | 71 | for action_index in range(self.numOfAction): 72 | x_1 = np.copy(self.x_theta[:, [action_index]]) 73 | x_2 = np.copy(self.x_theta[:, [action_index]]) 74 | 75 | temp_1 = np.matmul(x_1.T, theta) 76 | temp_2 = self.alpha * np.sqrt((1 - L) * np.matmul(np.matmul(x_1.T, A_inv), x_2)) 77 | 78 | estimate_delay.append(temp_1 - temp_2 + self.frontDelay[action_index]) 79 | 80 | if current_frame % self.forceSampleFrame == 0: 81 | estimate_action = estimate_delay.index(min(estimate_delay[0:-1])) 82 | else: 83 | estimate_action = estimate_delay.index(min(estimate_delay)) 84 | return estimate_action 85 | 86 | def updateA_b(self, estimate_action, actual_delay): 87 | if estimate_action != self.numOfAction - 1: 88 | self.A = self.A + np.matmul(self.x_theta[:, [estimate_action]], self.x_theta[:, [estimate_action]].T) 89 | self.b = self.b + self.x_theta[:, [estimate_action]] * actual_delay 90 | 91 | 92 | if __name__ == '__main__': 93 | partitionInfo = { 94 | 0: [13, 3, 24, 15346630656, 123633664, 26208256, 4818272], 95 | 1: [12, 3, 23, 15259926528, 123633664, 22996992, 102761824], 96 | 2: [11, 3, 22, 13410238464, 123633664, 19785728, 102761824], 97 | 3: [11, 3, 21, 13410238464, 123633664, 16574464, 25691488], 98 | 4: [10, 3, 20, 12485394432, 123633664, 13363200, 51381600], 99 | 5: [9, 3, 19, 10635706368, 123633664, 10151936, 51381600], 100 | 6: [9, 3, 18, 10635706368, 123633664, 8546304, 12846432], 101 | 7: [8, 3, 17, 9710862336, 123633664, 6940672, 25691496], 102 | 8: [7, 3, 16, 7861174272, 123633664, 5335040, 25691496], 103 | 9: [6, 3, 15, 6011486208, 123633664, 4532224, 25691496], 104 | 10: [6, 3, 14, 6011486208, 123633664, 3729408, 6423912], 105 | 11: [5, 3, 13, 5086642176, 123633664, 2926592, 12846440], 106 | 12: [4, 3, 12, 3236954112, 123633664, 2123776, 12846440], 107 | 13: [3, 3, 11, 1387266048, 123633664, 1320960, 12846440], 108 | 14: [3, 3, 10, 1387266048, 123633664, 919552, 3212648], 109 | 15: [2, 3, 9, 924844032, 123633664, 518144, 3212648], 110 | 16: [1, 3, 8, 462422016, 123633664, 417792, 3212648], 111 | 17: [0, 3, 7, 0, 123633664, 317440, 3212648], 112 | 18: [0, 3, 6, 0, 123633664, 217088, 3212648], 113 | 19: [0, 3, 4, 0, 123633664, 16384, 804200], 114 | 20: [0, 2, 2, 0, 20873216, 12288, 804200], 115 | 21: [0, 1, 0, 0, 4096000, 0, 132416], 116 | 22: [0, 0, 0, 0, 0, 0, 0] 117 | } 118 | 119 | frontDelay = [0 for index in range(len(partitionInfo))] 120 | muLinUCB = muLinUCB(0.25, partitionInfo, frontDelay) 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /yolo_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | import math 5 | import torch 6 | import numpy as np 7 | from PIL import Image, ImageDraw, ImageFont 8 | from torch.autograd import Variable 9 | 10 | import struct 11 | import imghdr 12 | 13 | def sigmoid(x): 14 | return 1.0/(math.exp(-x)+1.) 15 | 16 | def softmax(x): 17 | x = torch.exp(x - torch.max(x)) 18 | x = x/x.sum() 19 | return x 20 | 21 | 22 | def bbox_iou(box1, box2, x1y1x2y2=True): 23 | if x1y1x2y2: 24 | mx = min(box1[0], box2[0]) 25 | Mx = max(box1[2], box2[2]) 26 | my = min(box1[1], box2[1]) 27 | My = max(box1[3], box2[3]) 28 | w1 = box1[2] - box1[0] 29 | h1 = box1[3] - box1[1] 30 | w2 = box2[2] - box2[0] 31 | h2 = box2[3] - box2[1] 32 | else: 33 | mx = min(box1[0]-box1[2]/2.0, box2[0]-box2[2]/2.0) 34 | Mx = max(box1[0]+box1[2]/2.0, box2[0]+box2[2]/2.0) 35 | my = min(box1[1]-box1[3]/2.0, box2[1]-box2[3]/2.0) 36 | My = max(box1[1]+box1[3]/2.0, box2[1]+box2[3]/2.0) 37 | w1 = box1[2] 38 | h1 = box1[3] 39 | w2 = box2[2] 40 | h2 = box2[3] 41 | uw = Mx - mx 42 | uh = My - my 43 | cw = w1 + w2 - uw 44 | ch = h1 + h2 - uh 45 | carea = 0 46 | if cw <= 0 or ch <= 0: 47 | return 0.0 48 | 49 | area1 = w1 * h1 50 | area2 = w2 * h2 51 | carea = cw * ch 52 | uarea = area1 + area2 - carea 53 | return carea/uarea 54 | 55 | def bbox_ious(boxes1, boxes2, x1y1x2y2=True): 56 | if x1y1x2y2: 57 | mx = torch.min(boxes1[0], boxes2[0]) 58 | Mx = torch.max(boxes1[2], boxes2[2]) 59 | my = torch.min(boxes1[1], boxes2[1]) 60 | My = torch.max(boxes1[3], boxes2[3]) 61 | w1 = boxes1[2] - boxes1[0] 62 | h1 = boxes1[3] - boxes1[1] 63 | w2 = boxes2[2] - boxes2[0] 64 | h2 = boxes2[3] - boxes2[1] 65 | else: 66 | mx = torch.min(boxes1[0]-boxes1[2]/2.0, boxes2[0]-boxes2[2]/2.0) 67 | Mx = torch.max(boxes1[0]+boxes1[2]/2.0, boxes2[0]+boxes2[2]/2.0) 68 | my = torch.min(boxes1[1]-boxes1[3]/2.0, boxes2[1]-boxes2[3]/2.0) 69 | My = torch.max(boxes1[1]+boxes1[3]/2.0, boxes2[1]+boxes2[3]/2.0) 70 | w1 = boxes1[2] 71 | h1 = boxes1[3] 72 | w2 = boxes2[2] 73 | h2 = boxes2[3] 74 | uw = Mx - mx 75 | uh = My - my 76 | cw = w1 + w2 - uw 77 | ch = h1 + h2 - uh 78 | mask = ((cw <= 0) + (ch <= 0) > 0) 79 | area1 = w1 * h1 80 | area2 = w2 * h2 81 | carea = cw * ch 82 | carea[mask] = 0 83 | uarea = area1 + area2 - carea 84 | return carea/uarea 85 | 86 | def nms(boxes, nms_thresh): 87 | if len(boxes) == 0: 88 | return boxes 89 | 90 | det_confs = torch.zeros(len(boxes)) 91 | for i in range(len(boxes)): 92 | det_confs[i] = 1-boxes[i][4] 93 | 94 | _,sortIds = torch.sort(det_confs) 95 | out_boxes = [] 96 | for i in range(len(boxes)): 97 | box_i = boxes[sortIds[i]] 98 | if box_i[4] > 0: 99 | out_boxes.append(box_i) 100 | for j in range(i+1, len(boxes)): 101 | box_j = boxes[sortIds[j]] 102 | if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh: 103 | #print(box_i, box_j, bbox_iou(box_i, box_j, x1y1x2y2=False)) 104 | box_j[4] = 0 105 | return out_boxes 106 | 107 | def convert2cpu(gpu_matrix): 108 | return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix) 109 | 110 | def convert2cpu_long(gpu_matrix): 111 | return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix) 112 | 113 | def get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False): 114 | num_anchors = int(num_anchors) 115 | anchor_step = int(len(anchors)/num_anchors) 116 | if output.dim() == 3: 117 | output = output.unsqueeze(0) 118 | batch = output.size(0) 119 | assert(output.size(1) == (5+num_classes)*num_anchors) 120 | h = output.size(2) 121 | w = output.size(3) 122 | 123 | t0 = time.time() 124 | all_boxes = [] 125 | output = output.view(int(batch*num_anchors), int(5+num_classes), int(h*w)).transpose(0, 1).contiguous().view(int(5+num_classes), int(batch*num_anchors*h*w)) 126 | 127 | grid_x = torch.linspace(0, w-1, w).repeat(h,1).repeat(int(batch*num_anchors), 1, 1).view(int(batch*num_anchors*h*w)).cuda() 128 | grid_y = torch.linspace(0, h-1, h).repeat(w,1).t().repeat(int(batch*num_anchors), 1, 1).view(int(batch*num_anchors*h*w)).cuda() 129 | xs = torch.sigmoid(output[0]) + grid_x 130 | ys = torch.sigmoid(output[1]) + grid_y 131 | 132 | anchor_w = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([0])) 133 | anchor_h = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([1])) 134 | anchor_w = anchor_w.repeat(batch, 1).repeat(1, 1, h*w).view(batch*num_anchors*h*w).cuda() 135 | anchor_h = anchor_h.repeat(batch, 1).repeat(1, 1, h*w).view(batch*num_anchors*h*w).cuda() 136 | ws = torch.exp(output[2]) * anchor_w 137 | hs = torch.exp(output[3]) * anchor_h 138 | 139 | det_confs = torch.sigmoid(output[4]) 140 | 141 | cls_confs = torch.nn.Softmax()(Variable(output[5:5+num_classes].transpose(0, 1))).data 142 | cls_max_confs, cls_max_ids = torch.max(cls_confs, 1) 143 | cls_max_confs = cls_max_confs.view(-1) 144 | cls_max_ids = cls_max_ids.view(-1) 145 | t1 = time.time() 146 | 147 | sz_hw = h*w 148 | sz_hwa = sz_hw*num_anchors 149 | det_confs = convert2cpu(det_confs) 150 | cls_max_confs = convert2cpu(cls_max_confs) 151 | cls_max_ids = convert2cpu_long(cls_max_ids) 152 | xs = convert2cpu(xs) 153 | ys = convert2cpu(ys) 154 | ws = convert2cpu(ws) 155 | hs = convert2cpu(hs) 156 | if validation: 157 | cls_confs = convert2cpu(cls_confs.view(-1, num_classes)) 158 | t2 = time.time() 159 | for b in range(batch): 160 | boxes = [] 161 | for cy in range(h): 162 | for cx in range(w): 163 | for i in range(num_anchors): 164 | ind = b*sz_hwa + i*sz_hw + cy*w + cx 165 | det_conf = det_confs[ind] 166 | if only_objectness: 167 | conf = det_confs[ind] 168 | else: 169 | conf = det_confs[ind] * cls_max_confs[ind] 170 | 171 | if conf > conf_thresh: 172 | bcx = xs[ind] 173 | bcy = ys[ind] 174 | bw = ws[ind] 175 | bh = hs[ind] 176 | cls_max_conf = cls_max_confs[ind] 177 | cls_max_id = cls_max_ids[ind] 178 | box = [bcx/w, bcy/h, bw/w, bh/h, det_conf, cls_max_conf, cls_max_id] 179 | if (not only_objectness) and validation: 180 | for c in range(num_classes): 181 | tmp_conf = cls_confs[ind][c] 182 | if c != cls_max_id and det_confs[ind]*tmp_conf > conf_thresh: 183 | box.append(tmp_conf) 184 | box.append(c) 185 | boxes.append(box) 186 | all_boxes.append(boxes) 187 | t3 = time.time() 188 | if False: 189 | print('---------------------------------') 190 | print('matrix computation : %f' % (t1-t0)) 191 | print(' gpu to cpu : %f' % (t2-t1)) 192 | print(' boxes filter : %f' % (t3-t2)) 193 | print('---------------------------------') 194 | return all_boxes 195 | 196 | def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None): 197 | import cv2 198 | colors = torch.FloatTensor([[1,0,1],[0,0,1],[0,1,1],[0,1,0],[1,1,0],[1,0,0]]); 199 | def get_color(c, x, max_val): 200 | ratio = float(x)/max_val * 5 201 | i = int(math.floor(ratio)) 202 | j = int(math.ceil(ratio)) 203 | ratio = ratio - i 204 | r = (1-ratio) * colors[i][c] + ratio*colors[j][c] 205 | return int(r*255) 206 | 207 | width = img.shape[1] 208 | height = img.shape[0] 209 | for i in range(len(boxes)): 210 | box = boxes[i] 211 | x1 = int(((box[0] - box[2]/2.0) * width)) 212 | y1 = int(((box[1] - box[3]/2.0) * height)) 213 | x2 = int(((box[0] + box[2]/2.0) * width)) 214 | y2 = int(((box[1] + box[3]/2.0) * height)) 215 | 216 | if color: 217 | rgb = color 218 | else: 219 | rgb = (255, 0, 0) 220 | if len(box) >= 7 and class_names: 221 | cls_conf = box[5] 222 | cls_id = box[6] 223 | print('%s: %f' % (class_names[cls_id], cls_conf)) 224 | classes = len(class_names) 225 | offset = cls_id * 123457 % classes 226 | red = get_color(2, offset, classes) 227 | green = get_color(1, offset, classes) 228 | blue = get_color(0, offset, classes) 229 | if color is None: 230 | rgb = (red, green, blue) 231 | img = cv2.putText(img, class_names[cls_id], (x1,y1), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 240), 2) 232 | #img = cv2.putText(img, class_names[cls_id], (x1,y1), cv2.FONT_HERSHEY_SIMPLEX, 2, rgb, 2) 233 | #img = cv2.rectangle(img, (x1,y1), (x2,y2), rgb, 2) 234 | img = cv2.rectangle(img, (x1,y1), (x2,y2), (0, 0, 240), 2) 235 | if savename: 236 | print("save plot results to %s" % savename) 237 | cv2.imwrite(savename, img) 238 | return img 239 | 240 | def plot_boxes(img, boxes, savename=None, class_names=None): 241 | colors = torch.FloatTensor([[1,0,1],[0,0,1],[0,1,1],[0,1,0],[1,1,0],[1,0,0]]); 242 | def get_color(c, x, max_val): 243 | ratio = float(x)/max_val * 5 244 | i = int(math.floor(ratio)) 245 | j = int(math.ceil(ratio)) 246 | ratio = ratio - i 247 | r = (1-ratio) * colors[i][c] + ratio*colors[j][c] 248 | return int(r*255) 249 | 250 | width = img.width 251 | height = img.height 252 | draw = ImageDraw.Draw(img) 253 | for i in range(len(boxes)): 254 | box = boxes[i] 255 | x1 = (box[0] - box[2]/2.0) * width 256 | y1 = (box[1] - box[3]/2.0) * height 257 | x2 = (box[0] + box[2]/2.0) * width 258 | y2 = (box[1] + box[3]/2.0) * height 259 | 260 | rgb = (255, 0, 0) 261 | if len(box) >= 7 and class_names: 262 | cls_conf = box[5] 263 | cls_id = box[6] 264 | print('%s: %f' % (class_names[cls_id], cls_conf)) 265 | classes = len(class_names) 266 | offset = cls_id * 123457 % classes 267 | red = get_color(2, offset, classes) 268 | green = get_color(1, offset, classes) 269 | blue = get_color(0, offset, classes) 270 | rgb = (red, green, blue) 271 | draw.text((x1, y1), class_names[cls_id], fill=rgb) 272 | draw.rectangle([x1, y1, x2, y2], outline = rgb) 273 | if savename: 274 | print("save plot results to %s" % savename) 275 | img.save(savename) 276 | return img 277 | 278 | def read_truths(lab_path): 279 | if not os.path.exists(lab_path): 280 | return np.array([]) 281 | if os.path.getsize(lab_path): 282 | truths = np.loadtxt(lab_path) 283 | truths = truths.reshape(truths.size/5, 5) # to avoid single truth problem 284 | return truths 285 | else: 286 | return np.array([]) 287 | 288 | def read_truths_args(lab_path, min_box_scale): 289 | truths = read_truths(lab_path) 290 | new_truths = [] 291 | for i in range(truths.shape[0]): 292 | if truths[i][3] < min_box_scale: 293 | continue 294 | new_truths.append([truths[i][0], truths[i][1], truths[i][2], truths[i][3], truths[i][4]]) 295 | return np.array(new_truths) 296 | 297 | def load_class_names(namesfile): 298 | class_names = [] 299 | with open(namesfile, 'r') as fp: 300 | lines = fp.readlines() 301 | for line in lines: 302 | line = line.rstrip() 303 | class_names.append(line) 304 | return class_names 305 | 306 | def image2torch(img): 307 | width = img.width 308 | height = img.height 309 | img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())) 310 | img = img.view(height, width, 3).transpose(0,1).transpose(0,2).contiguous() 311 | img = img.view(1, 3, height, width) 312 | img = img.float().div(255.0) 313 | return img 314 | 315 | def partirion_output(model, img, action): 316 | model.eval() 317 | img = torch.from_numpy(img.transpose(2,0,1)).float().div(255.0).unsqueeze(0) 318 | img = Variable(img) 319 | with torch.no_grad(): 320 | output = model(img.cuda(), mode=action, client=True) 321 | output = output.data 322 | del img 323 | return output 324 | 325 | def get_boxes(res, model, conf_thresh, nms_thresh): 326 | boxes = get_region_boxes(res, conf_thresh, model.num_classes, model.anchors, model.num_anchors)[0] 327 | boxes = nms(boxes, nms_thresh) 328 | return boxes 329 | 330 | def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1): 331 | model.eval() 332 | t0 = time.time() 333 | 334 | if isinstance(img, Image.Image): 335 | width = img.width 336 | height = img.height 337 | img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())) 338 | img = img.view(height, width, 3).transpose(0,1).transpose(0,2).contiguous() 339 | img = img.view(1, 3, height, width) 340 | img = img.float().div(255.0) 341 | elif type(img) == np.ndarray: # cv2 image 342 | img = torch.from_numpy(img.transpose(2,0,1)).float().div(255.0).unsqueeze(0) 343 | else: 344 | print("unknow image type") 345 | exit(-1) 346 | 347 | t1 = time.time() 348 | 349 | if use_cuda: 350 | img = img.cuda() 351 | img = torch.autograd.Variable(img) 352 | t2 = time.time() 353 | 354 | output = model(img) 355 | output = output.data 356 | #for j in range(100): 357 | # sys.stdout.write('%f ' % (output.storage()[j])) 358 | #print('') 359 | t3 = time.time() 360 | 361 | boxes = get_region_boxes(output, conf_thresh, model.num_classes, model.anchors, model.num_anchors)[0] 362 | #for j in range(len(boxes)): 363 | # print(boxes[j]) 364 | t4 = time.time() 365 | 366 | boxes = nms(boxes, nms_thresh) 367 | t5 = time.time() 368 | 369 | if False: 370 | print('-----------------------------------') 371 | print(' image to tensor : %f' % (t1 - t0)) 372 | print(' tensor to cuda : %f' % (t2 - t1)) 373 | print(' predict : %f' % (t3 - t2)) 374 | print('get_region_boxes : %f' % (t4 - t3)) 375 | print(' nms : %f' % (t5 - t4)) 376 | print(' total : %f' % (t5 - t0)) 377 | print('-----------------------------------') 378 | return boxes 379 | 380 | def read_data_cfg(datacfg): 381 | options = dict() 382 | options['gpus'] = '0,1,2,3' 383 | options['num_workers'] = '10' 384 | with open(datacfg, 'r') as fp: 385 | lines = fp.readlines() 386 | 387 | for line in lines: 388 | line = line.strip() 389 | if line == '': 390 | continue 391 | key,value = line.split('=') 392 | key = key.strip() 393 | value = value.strip() 394 | options[key] = value 395 | return options 396 | 397 | def scale_bboxes(bboxes, width, height): 398 | import copy 399 | dets = copy.deepcopy(bboxes) 400 | for i in range(len(dets)): 401 | dets[i][0] = dets[i][0] * width 402 | dets[i][1] = dets[i][1] * height 403 | dets[i][2] = dets[i][2] * width 404 | dets[i][3] = dets[i][3] * height 405 | return dets 406 | 407 | def file_lines(thefilepath): 408 | count = 0 409 | thefile = open(thefilepath, 'rb') 410 | while True: 411 | buffer = thefile.read(8192*1024) 412 | if not buffer: 413 | break 414 | count += buffer.count('\n') 415 | thefile.close( ) 416 | return count 417 | 418 | def get_image_size(fname): 419 | '''Determine the image type of fhandle and return its size. 420 | from draco''' 421 | with open(fname, 'rb') as fhandle: 422 | head = fhandle.read(24) 423 | if len(head) != 24: 424 | return 425 | if imghdr.what(fname) == 'png': 426 | check = struct.unpack('>i', head[4:8])[0] 427 | if check != 0x0d0a1a0a: 428 | return 429 | width, height = struct.unpack('>ii', head[16:24]) 430 | elif imghdr.what(fname) == 'gif': 431 | width, height = struct.unpack('H', fhandle.read(2))[0] - 2 444 | # We are at a SOFn block 445 | fhandle.seek(1, 1) # Skip `precision' byte. 446 | height, width = struct.unpack('>HH', fhandle.read(4)) 447 | except Exception: #IGNORE:W0703 448 | return 449 | else: 450 | return 451 | return width, height 452 | 453 | def logging(message): 454 | print('%s %s' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message)) 455 | --------------------------------------------------------------------------------