├── .gitignore ├── README.md ├── assign_detection_to_trackers.py ├── attack ├── __init__.py └── attack_v2.py ├── data ├── move-in-zoomed │ ├── 01.mp4 │ ├── 02.mp4 │ ├── 03.mp4 │ ├── 04.mp4 │ ├── 05.mp4 │ ├── 06.mp4 │ ├── 07.mp4 │ ├── 08.mp4 │ ├── 09.mp4 │ └── 10.mp4 └── move-out-zoomed │ ├── 01.mp4 │ ├── 02.mp4 │ ├── 03.mp4 │ ├── 04.mp4 │ ├── 05.mp4 │ ├── 06.mp4 │ ├── 07.mp4 │ ├── 08.mp4 │ ├── 09.mp4 │ └── 10.mp4 ├── main.py ├── models ├── yolov3 │ ├── image_utils.py │ ├── keras_utils.py │ ├── model_data │ │ ├── FiraMono-Medium.otf │ │ ├── coco_classes.txt │ │ └── yolov3_anchors.txt │ ├── yolov3_model.py │ └── yolov3_wrapper.py └── yolov3_wrapper.py ├── pipeline_center.py ├── tracker └── kalman_filter.py └── utils ├── file_utils.py ├── image_utils.py ├── keras_utils.py ├── load_DETRAC.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | out/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # large model data 8 | *.pny 9 | *.h5 10 | 11 | **/*.h5 12 | # mac shit 13 | .DS_Store 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | output/*.png 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # celery beat schedule file 89 | celerybeat-schedule 90 | 91 | # SageMath parsed files 92 | *.sage.py 93 | 94 | # Environments 95 | .env 96 | .venv 97 | env/ 98 | venv/ 99 | ENV/ 100 | env.bak/ 101 | venv.bak/ 102 | 103 | # Spyder project settings 104 | .spyderproject 105 | .spyproject 106 | 107 | # Rope project settings 108 | .ropeproject 109 | 110 | # mkdocs documentation 111 | /site 112 | 113 | # mypy 114 | .mypy_cache/ 115 | 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tracker Hijacking Attack 2 | First download the YOLOv3 weight file by: 3 | ``` 4 | wget https://perceptron-benchmark.s3-us-west-1.amazonaws.com/models/coco/yolov3.h5 -P ./models/yolov3/model_data/ 5 | ``` 6 | Run the `main.py` 7 | ``` 8 | python3 main.py 9 | ``` 10 | The output will be the number of frames required for launching a successful tracker hijacking attack, and the position for fabricate adversarial bounding box in each attack frame. 11 | -------------------------------------------------------------------------------- /assign_detection_to_trackers.py: -------------------------------------------------------------------------------- 1 | from utils import utils 2 | 3 | from sklearn.utils.linear_assignment_ import linear_assignment 4 | import cv2 5 | import numpy as np 6 | import math 7 | 8 | import pdb 9 | 10 | weight_same_camera = { 11 | 'appearance' : 0.45, 12 | 'motion' : 0.4, 13 | 'shape' : 0.15, 14 | 'overlap' : 0.05, 15 | } 16 | 17 | def assign_detections_to_trackers(trackers_obj, detections_obj, iou_thrd=0.3): 18 | ''' 19 | From current list of trackers and new detections, output matched detections, 20 | unmatchted trackers, unmatched detections. 21 | ''' 22 | 23 | trackers = [temp_obj['bbox'] for temp_obj in trackers_obj] 24 | detections = [temp_obj['bbox'] for temp_obj in detections_obj] 25 | 26 | IOU_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32) 27 | Motion_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32) 28 | Shape_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32) 29 | for t, trk in enumerate(trackers): 30 | #trk = convert_to_cv2bbox(trk) 31 | for d, det in enumerate(detections): 32 | # det = convert_to_cv2bbox(det) 33 | IOU_mat[t, d] = utils.box_iou(trk, det) 34 | Motion_mat[t, d] = get_motion_score(trk, det) 35 | Shape_mat[t, d] = get_shape_score(trk, det) 36 | 37 | # Produces matches 38 | # Solve the maximizing the sum of IOU assignment problem using the 39 | # Hungarian algorithm (also known as Munkres algorithm) 40 | 41 | matched_idx = linear_assignment(-IOU_mat) 42 | 43 | unmatched_trackers, unmatched_detections = [], [] 44 | for t, trk in enumerate(trackers): 45 | if(t not in matched_idx[:, 0]): 46 | unmatched_trackers.append(t) 47 | 48 | for d, det in enumerate(detections): 49 | if(d not in matched_idx[:, 1]): 50 | unmatched_detections.append(d) 51 | 52 | matches = [] 53 | 54 | # For creating trackers we consider any detection with an 55 | # overlap less than iou_thrd to signifiy the existence of 56 | # an untracked object 57 | 58 | for m in matched_idx: 59 | if(IOU_mat[m[0], m[1]] < iou_thrd): 60 | unmatched_trackers.append(m[0]) 61 | unmatched_detections.append(m[1]) 62 | else: 63 | matches.append(m.reshape(1, 2)) 64 | 65 | if(len(matches) == 0): 66 | matches = np.empty((0, 2), dtype=int) 67 | else: 68 | matches = np.concatenate(matches, axis=0) 69 | 70 | return matches, np.array( 71 | unmatched_detections), np.array(unmatched_trackers) 72 | 73 | 74 | def _gaussian(x, mu, sigma): 75 | return math.exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)) 76 | 77 | 78 | def get_motion_score(trk, det): 79 | center_det = [(det[2] + det[0]) / 2, (det[3] - det[1]) / 2] 80 | center_trk = [(trk[2] + trk[0]) / 2, (trk[3] - trk[1]) / 2] 81 | width_trk = trk[2] - trk[0] + 1 82 | height_trk = trk[3] - trk[1] + 1 83 | s = _gaussian(center_trk[0], center_det[0], width_trk) * \ 84 | _gaussian(center_trk[1], center_det[1], height_trk) 85 | return s 86 | 87 | def get_shape_score(trk, det): 88 | width_trk = trk[2] - trk[0] + 1 89 | height_trk = trk[3] - trk[1] + 1 90 | width_det = det[2] - det[0] + 1 91 | height_det = det[3] - det[1] + 1 92 | 93 | s = (height_det - height_trk) * (width_det - width_trk) / \ 94 | (width_det * height_det) 95 | return -1 * abs(s) 96 | -------------------------------------------------------------------------------- /attack/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/attack/__init__.py -------------------------------------------------------------------------------- /attack/attack_v2.py: -------------------------------------------------------------------------------- 1 | '''apply only one fabrication attack''' 2 | import sys 3 | sys.path.append("../") 4 | from models.yolov3_wrapper import YOLOv3 5 | from pipeline_center import pipeline 6 | from utils.utils import letterbox_image, box_iou 7 | from PIL import Image 8 | 9 | import cv2 10 | import glob 11 | import numpy as np 12 | from keras import backend as K 13 | import skvideo.io 14 | import copy 15 | import math 16 | import os 17 | from tqdm import tqdm 18 | 19 | import pdb 20 | 21 | class KerasYOLOv3Model_plus(YOLOv3): 22 | def detect_image(self, image): 23 | """Determines the locations of the cars in the image 24 | 25 | Args: 26 | image: numpy array 27 | 28 | Returns: 29 | detected objects with: bbox, confident score, class index 30 | [ 31 | dictionary { 32 | bbox: np.array([left, up, right, down]) 33 | score: confident_score 34 | class_idx: class_idx 35 | class_name: class name category 36 | } 37 | ] 38 | 39 | """ 40 | pred_dic = self.predict(image) 41 | pred_list = self._dic2list(pred_dic) 42 | return pred_list 43 | 44 | def _dic2list(self, pred_dic): 45 | pred_list = [] 46 | for temp_class, temp_score, temp_bbox in zip(pred_dic['classes'], pred_dic['scores'], pred_dic['boxes']): 47 | temp_dic = {} 48 | temp_dic['class_idx'] = temp_class 49 | temp_dic['score'] = temp_score 50 | temp_dic['bbox'] = [temp_bbox[1], temp_bbox[0], temp_bbox[3], temp_bbox[2]] 51 | try: 52 | temp_dic['class_name'] = self._class_names[temp_class] 53 | except: 54 | temp_dic['class_name'] = 'None' 55 | pred_list.append(temp_dic) 56 | return pred_list 57 | 58 | 59 | 60 | def calculate_translation_center(bbox1, bbox2): 61 | ''' 62 | calculate center translation vector of bbox1 to bbox2 63 | bbox : nparray ot list 64 | [left, top, right, bottom] 65 | ''' 66 | bbox1 = np.array(bbox1).astype(float) 67 | bbox2 = np.array(bbox2).astype(float) 68 | center_1 = np.array([(bbox1[2] + bbox1[0]) / 2, (bbox1[3] + bbox1[1]) / 2]) 69 | center_2 = np.array([(bbox2[2] + bbox2[0]) / 2, (bbox2[3] + bbox2[1]) / 2]) 70 | return center_2 - center_1 71 | 72 | def is_match(target_trk_id, target_det_id, match_info): 73 | match_list = match_info[0] 74 | for match_trk, match_det in match_list: 75 | if match_trk == target_trk_id and match_det == target_det_id: 76 | return True 77 | return False 78 | 79 | def find_det_id_by_match_info(target_trk_id, match_info): 80 | match_list = match_info[0] 81 | unmatched_dets = match_info[1] 82 | unmatched_trks = match_info[2] 83 | if target_trk_id in unmatched_trks: 84 | raise ValueError('Target tracker is not matched to any detection.') 85 | for match_trk, match_det in match_list: 86 | if match_trk == target_trk_id: 87 | return match_det 88 | raise ValueError('Target tracker is not in tracker list.') 89 | 90 | def bgr2rgb(bgr_array): 91 | temp = [] 92 | temp.append(bgr_array[:,:,2]) 93 | temp.append(bgr_array[:,:,1]) 94 | temp.append(bgr_array[:,:,0]) 95 | return np.transpose(np.array(temp),(1, 2, 0)) 96 | 97 | def find_match_trk(match_info, det_id): 98 | match_info_pair = match_info[0] 99 | for temp_pair in match_info_pair: 100 | if temp_pair[0] == det_id: 101 | return temp_pair[1] 102 | return None 103 | 104 | def sort_bbox_by_area(detected_objects_list): 105 | if not detected_objects_list: 106 | return detected_objects_list 107 | area_list = [] 108 | for temp_det in detected_objects_list: 109 | temp_bbox = temp_det['bbox'] 110 | temp_area = (temp_bbox[2] - temp_bbox[0]) * (temp_bbox[3] - temp_bbox[1]) 111 | area_list.append(temp_area) 112 | sorted_idx = [i[0] for i in sorted(enumerate(area_list), key=lambda x:x[1], reverse = True)] 113 | ret_list = [] 114 | for temp_idx in sorted_idx: 115 | ret_list.append(detected_objects_list[temp_idx]) 116 | return ret_list 117 | 118 | def _box_area(bbox): 119 | return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) 120 | 121 | def nms_fine_tune(detected_objects_list, th=0.5): 122 | ret_list = [] 123 | for i in range(len(detected_objects_list)): 124 | is_append = True 125 | for j in range(len(detected_objects_list)): 126 | if i == j: 127 | continue 128 | iou = box_iou(detected_objects_list[i]['bbox'], detected_objects_list[j]['bbox']) 129 | if iou > th and _box_area(detected_objects_list[i]['bbox']) <= _box_area(detected_objects_list[j]['bbox']): 130 | is_append = False 131 | break 132 | if is_append: 133 | ret_list.append(detected_objects_list[i]) 134 | return ret_list 135 | 136 | def is_missing_detection(detected_objects_list, target_bbox, target_id=0): 137 | temp_bbox = detected_objects_list[target_id]['bbox'] 138 | temp_area = _box_area(temp_bbox) 139 | target_area = _box_area(target_bbox) 140 | if float(temp_area) / float(target_area) < 0.3: 141 | return True 142 | return False 143 | 144 | def tracker_bbox_list(tracker_list): 145 | ret = [] 146 | for tracker in tracker_list: 147 | ret.append((tracker.obj)) 148 | return ret 149 | 150 | def attack_video(params, video_path=None, attack_det_id_dict=None, patch_bbox=None, moving_direction=None, verbose=0, is_return=False): 151 | 152 | detector = KerasYOLOv3Model_plus(sess = K.get_session()) 153 | 154 | n_attacks = None 155 | 156 | videogen = skvideo.io.FFmpegReader(video_path) 157 | virtual_attack = False 158 | detected_objects_list_prev = None 159 | match_info_prev = None 160 | 161 | cal_dx_dy_flag = True 162 | attack_frame_list = [*attack_det_id_dict] 163 | attack_frame_list.sort() 164 | 165 | attacking_flag = False 166 | attack_count_idx = 0 167 | 168 | is_init = True 169 | params_min_hits = params['min_hits'] 170 | for frame_count, image in enumerate(videogen.nextFrame()): 171 | if frame_count > 1: 172 | is_init = False 173 | 174 | image_yolo, _ = letterbox_image(image, shape=(416, 416), data_format='channels_last') 175 | image = bgr2rgb((image_yolo * 255).astype(np.uint8)) 176 | image_yolo_pil = Image.fromarray((image_yolo * 255).astype(np.uint8)) 177 | detected_objects_list = detector.detect_image(image_yolo_pil) 178 | detected_objects_list = nms_fine_tune(detected_objects_list) 179 | 180 | 181 | detected_objects_list = sort_bbox_by_area(detected_objects_list) 182 | if len(detected_objects_list) != 0: 183 | nat_detected_objects_list = copy.deepcopy(detected_objects_list) 184 | 185 | if frame_count in attack_frame_list or attacking_flag == True: 186 | target_det_id = attack_det_id_dict[frame_count - attack_count_idx][attack_count_idx] 187 | 188 | if attack_count_idx == 0: 189 | attacking_flag = True 190 | target_trk_id = find_match_trk(match_info_prev, target_det_id) 191 | target_init_bbox = detected_objects_list[target_det_id]['bbox'] 192 | target_init_trk_bbox = (params_prev['tracker_list'][target_trk_id].obj)['bbox'] 193 | print("Attack starts at frame {}".format(frame_count)) 194 | print("Target bbox location in the original frame {}: {} ".format(frame_count, target_init_bbox)) 195 | if attack_count_idx != 0: 196 | _, _, match_info_nat = pipeline(image, nat_detected_objects_list, frame_count, params_prev, detect_output=True, verbose=0, virtual_attack=virtual_attack, return_match_info=True) 197 | attacking_flag = is_match(target_trk_id, target_det_id, match_info_nat) 198 | if not attacking_flag: 199 | detection_missing = is_missing_detection(nat_detected_objects_list, target_init_bbox, target_det_id) 200 | try: 201 | tracking_missing = is_missing_detection(tracker_bbox_list(params_prev['tracker_list']), target_init_trk_bbox, target_trk_id) 202 | except: 203 | pdb.set_trace() 204 | if detection_missing and not tracking_missing: 205 | attacking_flag = True 206 | else: 207 | print('Attack finished with {0} attacks.'.format(attack_count_idx)) 208 | n_attacks = attack_count_idx 209 | cal_dx_dy_flag = True 210 | attack_count_idx = 0 211 | return n_attacks 212 | 213 | if attacking_flag: 214 | temp_attack_obj = detected_objects_list_prev[target_det_id] 215 | target_det_prev = temp_attack_obj 216 | target_trk_prev = params_prev['tracker_list'][target_trk_id].obj 217 | translation_vecter_center = calculate_translation_center(target_trk_prev['bbox'], target_det_prev['bbox']) 218 | 219 | attack_bbox = temp_attack_obj['bbox'] 220 | attack_param = params_prev 221 | L = 5 #bbox moving pixel length 222 | 223 | if cal_dx_dy_flag and moving_direction is None: 224 | if translation_vecter_center[0] == 0.: 225 | ratio = 1000.0 226 | else: 227 | ratio = abs(translation_vecter_center[1] / translation_vecter_center[0]) 228 | dx = L * 1 / math.sqrt((1 + ratio * ratio)) 229 | dy = dx * ratio 230 | if translation_vecter_center[0] > 0: 231 | dx *= -1 232 | if translation_vecter_center[1] > 0: 233 | dy *= -1 234 | cal_dx_dy_flag = False 235 | 236 | if attack_count_idx == 0: 237 | for sub_attack_count in range(100): 238 | if moving_direction is None: 239 | fake_det_bbox = (target_trk_prev['bbox'] + np.array([dx, dy, dx, dy]) * (sub_attack_count + 1)).astype(int) 240 | else: 241 | fake_det_bbox = (target_trk_prev['bbox'] + np.array(moving_direction) * (sub_attack_count + 1)).astype(int) 242 | 243 | detected_objects_list[target_det_id]['bbox'] = fake_det_bbox 244 | _, param_attack, match_info = pipeline(image, detected_objects_list, frame_count, params, detect_output=True, verbose=0, virtual_attack=virtual_attack, return_match_info=True) 245 | if is_match(target_trk_id, target_det_id, match_info): 246 | attack_bbox = fake_det_bbox 247 | attack_param = param_attack 248 | if box_iou(patch_bbox, fake_det_bbox) <= 0.0: 249 | break 250 | else: 251 | break 252 | detected_objects_list[target_det_id]['bbox'] = attack_bbox 253 | else: 254 | del detected_objects_list[target_det_id] 255 | 256 | print("Fabricate bbox location {} at frame {}".format(attack_bbox, frame_count)) 257 | image_yolo_pil.save('./output/' + 'ori_' + str(frame_count) + '.png') 258 | attack_count_idx += 1 259 | 260 | image_track, params, match_info = pipeline(image, detected_objects_list, frame_count, params, detect_output=True, verbose=verbose, virtual_attack=virtual_attack, return_match_info=True, is_init=is_init) 261 | 262 | cv2.imwrite('./output/track/' + str(frame_count) + '.png', image_track) 263 | 264 | match_info_prev = copy.deepcopy(match_info) 265 | detected_objects_list_prev = copy.deepcopy(nat_detected_objects_list) 266 | params_prev = copy.deepcopy(params) 267 | 268 | return n_attacks 269 | 270 | def cal_success_rate(input_list): 271 | results = [] 272 | total_num = len(input_list) 273 | xs = [1, 2, 3, 4, 5, 6, 7, 8] 274 | for x in xs: 275 | count = 0 276 | for ret in input_list: 277 | if ret <= x: 278 | count += 1 279 | results.append(float(count) / float(total_num)) 280 | return results 281 | 282 | 283 | 284 | -------------------------------------------------------------------------------- /data/move-in-zoomed/01.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/01.mp4 -------------------------------------------------------------------------------- /data/move-in-zoomed/02.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/02.mp4 -------------------------------------------------------------------------------- /data/move-in-zoomed/03.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/03.mp4 -------------------------------------------------------------------------------- /data/move-in-zoomed/04.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/04.mp4 -------------------------------------------------------------------------------- /data/move-in-zoomed/05.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/05.mp4 -------------------------------------------------------------------------------- /data/move-in-zoomed/06.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/06.mp4 -------------------------------------------------------------------------------- /data/move-in-zoomed/07.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/07.mp4 -------------------------------------------------------------------------------- /data/move-in-zoomed/08.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/08.mp4 -------------------------------------------------------------------------------- /data/move-in-zoomed/09.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/09.mp4 -------------------------------------------------------------------------------- /data/move-in-zoomed/10.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/10.mp4 -------------------------------------------------------------------------------- /data/move-out-zoomed/01.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/01.mp4 -------------------------------------------------------------------------------- /data/move-out-zoomed/02.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/02.mp4 -------------------------------------------------------------------------------- /data/move-out-zoomed/03.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/03.mp4 -------------------------------------------------------------------------------- /data/move-out-zoomed/04.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/04.mp4 -------------------------------------------------------------------------------- /data/move-out-zoomed/05.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/05.mp4 -------------------------------------------------------------------------------- /data/move-out-zoomed/06.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/06.mp4 -------------------------------------------------------------------------------- /data/move-out-zoomed/07.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/07.mp4 -------------------------------------------------------------------------------- /data/move-out-zoomed/08.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/08.mp4 -------------------------------------------------------------------------------- /data/move-out-zoomed/09.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/09.mp4 -------------------------------------------------------------------------------- /data/move-out-zoomed/10.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/10.mp4 -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | sys.path.append(os.path.abspath('./')) 4 | from attack.attack_v2 import attack_video 5 | from collections import deque 6 | import numpy as np 7 | from tqdm import tqdm 8 | from attack.attack_v2 import cal_success_rate 9 | 10 | if __name__ == "__main__": 11 | videos_info = [ 12 | # video file name, attack start frame, patch coords:[left, top, right, bottom] 13 | ('move-in-zoomed/01.mp4', 3, [188, 284, 247, 309], None), 14 | ('move-in-zoomed/02.mp4', 3, [195, 233, 289, 264], None), 15 | ('move-in-zoomed/03.mp4', 3, [238, 194, 319, 256], None), 16 | ('move-in-zoomed/04.mp4', 3, [229, 231, 305, 267], None), 17 | ('move-in-zoomed/05.mp4', 3, [202, 172, 277, 222], None), 18 | ('move-in-zoomed/06.mp4', 3, [236, 230, 334, 298], None), 19 | ('move-in-zoomed/07.mp4', 3, [195, 203, 252, 260], None), 20 | ('move-in-zoomed/08.mp4', 3, [136, 193, 247, 280], None), 21 | ('move-in-zoomed/09.mp4', 3, [246, 210, 373, 340], None), 22 | ('move-in-zoomed/10.mp4', 5, [196, 205, 300, 287], None), 23 | ('move-out-zoomed/01.mp4', 3, [192, 213, 310, 273], [5, 0, 5, 0]), 24 | ('move-out-zoomed/02.mp4', 3, [143, 222, 303, 302], [5, 0, 5, 0]), 25 | ('move-out-zoomed/03.mp4', 3, [158, 192, 300, 283], [-5, 0, -5, 0]), 26 | ('move-out-zoomed/04.mp4', 3, [154, 230, 281, 289], [-5, 0, -5, 0]), 27 | ('move-out-zoomed/05.mp4', 3, [194, 167, 297, 249], [5, 0, 5, 0]), 28 | ('move-out-zoomed/06.mp4', 3, [174, 166, 326, 280], [-5, 0, -5, 0]), 29 | ('move-out-zoomed/07.mp4', 3, [182, 211, 304, 271], [5, 0, 5, 0]), 30 | ('move-out-zoomed/08.mp4', 3, [100, 131, 304, 307], [-5, 0, -5, 0]), 31 | ('move-out-zoomed/09.mp4', 3, [144, 188, 293, 310], [-5, 0, -5, 0]), 32 | ('move-out-zoomed/10.mp4', 3, [171, 159, 264, 238], [5, 0, 5, 0]), 33 | ] 34 | 35 | dir_path = './data/' 36 | results = [] 37 | for idx, video_info in enumerate(tqdm(videos_info)): 38 | print(video_info[0]) 39 | (video_path, temp_attack_frame, patch_bbox, moving_direction) = video_info 40 | video_path = os.path.join(dir_path, video_path) 41 | temp_attack_frame_id_list = [] 42 | for _ in range(100): 43 | temp_attack_frame_id_list.append(0) 44 | attack_det_id_dict = {temp_attack_frame : temp_attack_frame_id_list} 45 | 46 | params = { 47 | 'max_age' : 60, #4 48 | 'min_hits' : 6, #1 49 | 'tracker_list' : [], 50 | } 51 | id_list = [] 52 | for idx in range(100): 53 | id_list.append(str(idx)) 54 | params['track_id_list'] = deque(id_list) 55 | 56 | ret = attack_video(params, video_path=video_path, attack_det_id_dict=attack_det_id_dict, patch_bbox=patch_bbox, moving_direction=moving_direction, is_return=True) 57 | results.append(ret) 58 | -------------------------------------------------------------------------------- /models/yolov3/image_utils.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | 4 | #Debug 5 | import tensorflow as tf 6 | from tensorflow.image import ResizeMethod 7 | 8 | def letterbox_image(image, size): 9 | """ Resize image with unchanged aspect ratio using padding. 10 | 11 | Args: 12 | image: PIL.Image.Image (Jpeg or PNG) 13 | size: Tuple (416, 416) 14 | 15 | Returns: 16 | new_image: PIL.Image.Image 17 | """ 18 | iw, ih = image.size 19 | w, h = size 20 | scale = min(w / iw, h / ih) 21 | nw = int(iw * scale) 22 | nh = int(ih * scale) 23 | 24 | image = image.resize((nw, nh), Image.BICUBIC) 25 | new_image = Image.new('RGB', size, (128, 128, 128)) 26 | # new_image = Image.new('RGB', size, (0, 0, 0)) 27 | new_image.paste(image, ((w-nw)//2, (h-nh)//2)) 28 | return new_image 29 | 30 | def letterbox_image_tf_dynamic(image, size, resize_method=ResizeMethod.BILINEAR): 31 | """ Letterbox image that handles dynamic Tensor type """ 32 | if len(image.get_shape()) == 4: 33 | ih, iw = tf.shape(image)[1], tf.shape(image)[2] 34 | images = image 35 | else: 36 | ih, iw = tf.shape(image)[0], tf.shape(image)[1] 37 | images = [image] 38 | w, h = tf.constant(size[0]), tf.constant(size[1]) 39 | scale = tf.minimum(w / iw, h / ih) 40 | nw = tf.cast(tf.cast(iw, tf.float64) * scale, tf.int32) 41 | nh = tf.cast(tf.cast(ih, tf.float64) * scale, tf.int32) 42 | 43 | image_tensor = tf.image.resize_images(images, (nh, nw), method=resize_method, align_corners=True) 44 | 45 | h_pad = tf.cast((h-nh)//2, tf.int32) 46 | w_pad = tf.cast((w-nw)//2, tf.int32) 47 | c_pad = 0 48 | if len(image_tensor.shape) == 4: 49 | paddings = [[0,0], [h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]] 50 | else: 51 | paddings = [[h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]] 52 | 53 | image_tensor = tf.pad(image_tensor, paddings, constant_values=128. / 255.) 54 | return image_tensor 55 | 56 | 57 | 58 | def letterbox_image_tf_static(image, raw_size, tgt_size, resize_method=ResizeMethod.BILINEAR): 59 | """ Letterbox image that only handles static shape, but more efficiently.""" 60 | if len(image.shape) == 4: 61 | images = image 62 | else: 63 | images = [image] 64 | 65 | iw, ih = raw_size 66 | w, h = tgt_size 67 | scale = min(w / iw, h / ih) 68 | nw = int(iw * scale) 69 | nh = int(ih * scale) 70 | 71 | h_pad, w_pad, c_pad = (h - nh) // 2, (w - nw) // 2, 0 72 | 73 | image_tensor = tf.image.resize_images(images, (nh, nw), method=resize_method, align_corners=True) 74 | paddings = [[0,0], [h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]] 75 | 76 | image_tensor = tf.pad(image_tensor, paddings, constant_values=128. / 255.) 77 | return image_tensor 78 | 79 | 80 | def image_to_ndarray(image, expand_dims=True): 81 | """ Convert PIL Image to numpy.ndarray and add batch dimension 82 | 83 | Args: 84 | image: PIL.Image.Image 85 | 86 | Returns: 87 | image_data: numpy.ndarray (1, 416, 416, 3) or (416, 416, 3) 88 | 89 | """ 90 | image_data = np.array(image, dtype='float32') 91 | image_data /= 255. 92 | if expand_dims == True: 93 | image_data = np.expand_dims(image_data, 0) 94 | if image_data.shape[-1] == 4: 95 | image_data = image_data[...,0:-1] 96 | return image_data 97 | 98 | def ndarray_to_image(image_data): 99 | if len(image_data.shape) == 4: 100 | image_data = np.squeeze(image_data, axis=0) 101 | image_data = (image_data * 255).astype("uint8") 102 | return Image.fromarray(image_data) 103 | 104 | def load_yolov3_image(img_fpath): 105 | """ Load and resize an image for yolo3. """ 106 | model_image_size = (416, 416) 107 | image = Image.open(img_fpath) 108 | boxed_image = letterbox_image(image, tuple(reversed(model_image_size))) 109 | image_data = np.array(boxed_image, dtype='float32') 110 | image_data /= 255. 111 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 112 | return image_data 113 | 114 | def l1_diff(image1, image2): 115 | diff = np.abs(image1 - image2) 116 | return np.sum(diff) 117 | 118 | def l0_diff(image1, image2): 119 | diff = np.abs(image1 - image2) 120 | return np.count_nonzero(diff) 121 | 122 | def l_inf_diff(image1, image2): 123 | diff = np.abs(image1 - image2) 124 | return np.max(diff) 125 | 126 | def main(): 127 | image = Image.open('images/cat.jpg') 128 | 129 | boxed_image = letterbox_image(image, tuple(reversed((416,416)))) 130 | image_data_pil = image_to_ndarray(boxed_image, expand_dims=False) 131 | x_img_pil = tf.placeholder(tf.float32, shape=(416, 416, 3)) 132 | 133 | image_data_tf_dynamic = image_to_ndarray(image, expand_dims=False) 134 | x_img_tf_large = tf.placeholder(tf.float32, shape=(None,None, 3)) 135 | x_img_tf = letterbox_image_tf_dynamic(x_img_tf_large, (416,416)) 136 | 137 | image_data_tf_static = image_to_ndarray(image, expand_dims=False) 138 | x_img_tf_large_static = tf.placeholder(tf.float32, shape=(1080,1920, 3)) 139 | x_img_tf_static = letterbox_image_tf_static(x_img_tf_large_static, (1920, 1080), (416, 416)) 140 | 141 | with tf.Session() as sess: 142 | image_resized_pil = sess.run(x_img_pil, feed_dict={x_img_pil: image_data_pil}) 143 | image_resized_tf = sess.run(x_img_tf, feed_dict={x_img_tf_large: image_data_tf_dynamic}) 144 | image_resized_tf = np.squeeze(image_resized_tf, axis=0) 145 | image_resized_tf_static = sess.run(x_img_tf_static, feed_dict={x_img_tf_large_static: image_data_tf_static}) 146 | 147 | 148 | l1 = l1_diff(image_resized_tf, image_resized_tf_static) 149 | l0 = l0_diff(image_resized_tf, image_resized_tf_static) 150 | l_inf = l_inf_diff(image_resized_tf, image_resized_tf_static) 151 | 152 | print("l1 %f, l0 %d, l_inf %f" % (l1, l0, l_inf)) 153 | image_pil = ndarray_to_image(image_resized_pil) 154 | image_tf = ndarray_to_image(image_resized_tf) 155 | image_tf_static = ndarray_to_image(image_resized_tf_static) 156 | 157 | 158 | image_tf.save('tf.png') 159 | image_pil.save('pil.png') 160 | image_tf_static.save('tf_static.png') 161 | 162 | if __name__ == "__main__": 163 | main() -------------------------------------------------------------------------------- /models/yolov3/keras_utils.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | 3 | 4 | def compose(*funcs): 5 | """Compose arbitrarily many functions, evaluated left to right. 6 | 7 | Reference: https://mathieularose.com/function-composition-in-python/ 8 | """ 9 | # return lambda x: reduce(lambda v, f: f(v), funcs, x) 10 | if funcs: 11 | return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) 12 | else: 13 | raise ValueError('Composition of empty sequence not supported.') 14 | -------------------------------------------------------------------------------- /models/yolov3/model_data/FiraMono-Medium.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/models/yolov3/model_data/FiraMono-Medium.otf -------------------------------------------------------------------------------- /models/yolov3/model_data/coco_classes.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /models/yolov3/model_data/yolov3_anchors.txt: -------------------------------------------------------------------------------- 1 | 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 2 | -------------------------------------------------------------------------------- /models/yolov3/yolov3_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Baidu Inc. All Rights Reserved 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ======================================================================== 15 | 16 | """YOLO_v3 Model Defined in Keras.""" 17 | 18 | from functools import wraps 19 | 20 | import pdb 21 | import numpy as np 22 | import tensorflow as tf 23 | from keras import backend as K 24 | from keras.layers import Conv2D, Add,\ 25 | ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D 26 | from keras.layers.advanced_activations import LeakyReLU 27 | from keras.layers.normalization import BatchNormalization 28 | from keras.models import Model 29 | from keras.regularizers import l2 30 | 31 | from models.yolov3.keras_utils import compose 32 | 33 | 34 | @wraps(Conv2D) 35 | def DarknetConv2D(*args, **kwargs): 36 | """ Wrapper to set Darknet parameters for Convolution2D. 37 | 38 | Args: 39 | args: Non-keyword variable length argument list from upper layer function. 40 | kwargs: Keyworded variable length of arguments from upper layer function. 41 | 42 | Returns: 43 | 4D tensor with shape: (batch, channels, rows, cols) if 44 | data_format is "channels_first" or 4D tensor with shape: 45 | (batch, rows, cols, channels) if data_format is "channels_last". 46 | """ 47 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} 48 | darknet_conv_kwargs['padding'] =\ 49 | 'valid' if kwargs.get('strides') == (2, 2) else 'same' 50 | darknet_conv_kwargs.update(kwargs) 51 | return Conv2D(*args, **darknet_conv_kwargs) 52 | 53 | 54 | def DarknetConv2D_BN_Leaky(*args, **kwargs): 55 | """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" 56 | no_bias_kwargs = {'use_bias': False} 57 | no_bias_kwargs.update(kwargs) 58 | return compose( 59 | DarknetConv2D(*args, **no_bias_kwargs), 60 | BatchNormalization(), 61 | LeakyReLU(alpha=0.1)) 62 | 63 | 64 | def resblock_body(x, num_filters, num_blocks): 65 | '''A series of resblocks starting with a downsampling Convolution2D''' 66 | # Darknet uses left and top padding instead of 'same' mode 67 | x = ZeroPadding2D(((1, 0), (1, 0)))(x) 68 | x = DarknetConv2D_BN_Leaky(num_filters, (3, 3), strides=(2, 2))(x) 69 | for i in range(num_blocks): 70 | y = compose( 71 | DarknetConv2D_BN_Leaky(num_filters//2, (1, 1)), 72 | DarknetConv2D_BN_Leaky(num_filters, (3, 3)))(x) 73 | x = Add()([x, y]) 74 | return x 75 | 76 | 77 | def darknet_body(x): 78 | """ Darknet layers. 79 | 80 | Darknet have 52 Convolution2D layers 81 | 82 | Args: 83 | Tensor object passing through each layer 84 | """ 85 | x = DarknetConv2D_BN_Leaky(32, (3, 3))(x) 86 | x = resblock_body(x, 64, 1) 87 | x = resblock_body(x, 128, 2) 88 | x = resblock_body(x, 256, 8) 89 | x = resblock_body(x, 512, 8) 90 | x = resblock_body(x, 1024, 4) 91 | return x 92 | 93 | 94 | def make_last_layers(x, num_filters, out_filters): 95 | """ Last few layers for detecting objects with different sizes. 96 | 6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer. 97 | www.cyberailab.com/home/a-closer-look-at-yolov3 98 | """ 99 | x = compose( 100 | DarknetConv2D_BN_Leaky(num_filters, (1, 1)), 101 | DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), 102 | DarknetConv2D_BN_Leaky(num_filters, (1, 1)), 103 | DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), 104 | DarknetConv2D_BN_Leaky(num_filters, (1, 1)))(x) 105 | y = compose( 106 | DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), 107 | DarknetConv2D(out_filters, (1, 1)))(x) 108 | return x, y 109 | 110 | 111 | def yolo_body(inputs, num_anchors, num_classes): 112 | """ Create YOLOv3 model CNN body in Keras. 113 | y1, y2, y3 for detecting small, medium, and large objects. 114 | 115 | Args: 116 | inputs: Tensor model.inputs [1, 416, 416, 3]. 117 | num_anchors: anchors. 118 | num_classes: number of classes. 119 | 120 | Returns: 121 | model: Keras model, output shape is: 122 | [(1, 13, 13, 255), (1, 13, 13, 255), (1, 13, 13, 255)]. 123 | 255 = 85 (80 classes, 1 logits, 4 box parameters) * 3 (anchor boxes). 124 | 3 elements corresponding to 3 object size (small, medium, large). 125 | """ 126 | darknet = Model(inputs, darknet_body(inputs)) 127 | x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5)) 128 | 129 | x = compose( 130 | DarknetConv2D_BN_Leaky(256, (1, 1)), 131 | UpSampling2D(2))(x) 132 | x = Concatenate()([x, darknet.layers[152].output]) 133 | x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5)) 134 | 135 | x = compose( 136 | DarknetConv2D_BN_Leaky(128, (1, 1)), 137 | UpSampling2D(2))(x) 138 | x = Concatenate()([x, darknet.layers[92].output]) 139 | x, y3 = make_last_layers(x, 128, num_anchors*(num_classes + 5)) 140 | 141 | return Model(inputs, [y1, y2, y3]) 142 | 143 | 144 | def tiny_yolo_body(inputs, num_anchors, num_classes): 145 | '''Create Tiny YOLO_v3 model CNN body in keras.''' 146 | x1 = compose( 147 | DarknetConv2D_BN_Leaky(16, (3, 3)), 148 | MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), 149 | DarknetConv2D_BN_Leaky(32, (3, 3)), 150 | MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), 151 | DarknetConv2D_BN_Leaky(64, (3, 3)), 152 | MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), 153 | DarknetConv2D_BN_Leaky(128, (3, 3)), 154 | MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), 155 | DarknetConv2D_BN_Leaky(256, (3, 3)))(inputs) 156 | x2 = compose( 157 | MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), 158 | DarknetConv2D_BN_Leaky(512, (3, 3)), 159 | MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'), 160 | DarknetConv2D_BN_Leaky(1024, (3, 3)), 161 | DarknetConv2D_BN_Leaky(256, (1, 1)))(x1) 162 | y1 = compose( 163 | DarknetConv2D_BN_Leaky(512, (3, 3)), 164 | DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) 165 | 166 | x2 = compose( 167 | DarknetConv2D_BN_Leaky(128, (1, 1)), 168 | UpSampling2D(2))(x2) 169 | y2 = compose( 170 | Concatenate(), 171 | DarknetConv2D_BN_Leaky(256, (3, 3)), 172 | DarknetConv2D(num_anchors * (num_classes+5), (1, 1)))([x2, x1]) 173 | 174 | return Model(inputs, [y1, y2]) 175 | 176 | 177 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): 178 | """ Convert final layer features to bounding box parameters. No threshold 179 | or nms applied yet. 180 | 181 | Args: 182 | feats: Elements in the output list from K.model.output: 183 | shape = (N, 13, 13, 255) 184 | anchors: anchors. 185 | num_classes: num of classes. 186 | input_shape: input shape obtained from model output grid information. 187 | 188 | Returns: 189 | Breaking the 85 output logits into box_xy, box_wh, box_confidence, and 190 | box_class_probs. 191 | """ 192 | 193 | num_anchors = len(anchors) 194 | # Reshape to batch, height, width, num_anchors, box_params. 195 | anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) 196 | 197 | grid_shape = K.shape(feats)[1:3] # height, width 198 | grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), 199 | [1, grid_shape[1], 1, 1]) 200 | grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), 201 | [grid_shape[0], 1, 1, 1]) 202 | grid = K.concatenate([grid_x, grid_y]) 203 | grid = K.cast(grid, K.dtype(feats)) 204 | 205 | feats = K.reshape( 206 | feats, 207 | [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) 208 | 209 | # The last dimension: x,y,w,h,objectness,80_class_conf 210 | # Adjust preditions to each spatial grid point and anchor size. 211 | box_xy = (K.sigmoid(feats[..., :2]) + grid) /\ 212 | K.cast(grid_shape[::-1], K.dtype(feats)) 213 | 214 | box_wh = K.exp(feats[..., 2:4]) *\ 215 | anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) 216 | box_confidence = K.sigmoid(feats[..., 4:5]) 217 | box_class_probs = K.sigmoid(feats[..., 5:]) 218 | 219 | box_coord_logits = feats[..., :4] 220 | box_confidence_logits = feats[..., 4:5] 221 | box_class_probs_logits = feats[..., 5:] 222 | 223 | if calc_loss is True: 224 | return grid, feats, box_xy, box_wh 225 | return box_xy, box_wh, box_confidence,\ 226 | box_class_probs, box_coord_logits,\ 227 | box_confidence_logits, box_class_probs_logits 228 | 229 | 230 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): 231 | """ Scale boxes to original image shape. 232 | 233 | Args: 234 | input_shape: shape of model input (416, 416) 235 | 236 | Taking (1920, 1080) pic as an example 237 | 238 | """ 239 | box_yx = box_xy[..., ::-1] # reverse last dimension list order 240 | box_hw = box_wh[..., ::-1] 241 | input_shape = K.cast(input_shape, K.dtype(box_yx)) # (416, 416) 242 | image_shape = K.cast(image_shape, K.dtype(box_yx)) # (416, 416) 243 | new_shape = K.round(image_shape * K.min(input_shape/image_shape)) 244 | offset = (input_shape-new_shape)/2./input_shape 245 | scale = input_shape/new_shape 246 | box_yx = (box_yx - offset) * scale # rescale to [1080, 1920] 247 | box_hw *= scale 248 | 249 | box_mins = box_yx - (box_hw / 2.) 250 | box_maxes = box_yx + (box_hw / 2.) 251 | boxes = K.concatenate([ 252 | box_mins[..., 0:1], # y_min 253 | box_mins[..., 1:2], # x_min 254 | box_maxes[..., 0:1], # y_max 255 | box_maxes[..., 1:2] # x_max 256 | ]) 257 | 258 | # Scale boxes back to original image shape. 259 | boxes *= K.concatenate([image_shape, image_shape]) 260 | return boxes 261 | 262 | 263 | def yolo_boxes_and_scores(feats, anchors, 264 | num_classes, input_shape, image_shape): 265 | """ Convert Conv layer output to boxes 266 | 267 | Multiply box_confidence with class_confidence to get real box_scores for each class 268 | 269 | Args: 270 | feats: Elements in the output list from K.model.output: 271 | shape = (N, 13, 13, 255) 272 | anchors: anchors. 273 | num_classes: num of classes. 274 | input_shape: input shape obtained from model output grid information. 275 | image_shape: placeholder for ORIGINAL image data shape. 276 | 277 | """ 278 | box_xy, box_wh, box_confidence, box_class_probs,\ 279 | box_coord_logits, box_confidence_logits,\ 280 | box_class_probs_logits = yolo_head( 281 | feats, anchors, num_classes, input_shape) 282 | 283 | boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) 284 | 285 | boxes = K.reshape(boxes, [-1, 4]) 286 | 287 | box_scores = box_confidence * box_class_probs 288 | box_scores = K.reshape(box_scores, [-1, num_classes]) 289 | 290 | box_coord_logits = K.reshape(box_coord_logits, [-1, 4]) 291 | box_confidence_logits = K.reshape(box_confidence_logits, [-1]) 292 | box_class_probs_logits =\ 293 | K.reshape(box_class_probs_logits, [-1, num_classes]) 294 | return boxes, box_scores, box_coord_logits,\ 295 | box_confidence_logits, box_class_probs_logits 296 | 297 | 298 | def yolo_eval(yolo_outputs, 299 | anchors, 300 | num_classes, 301 | image_shape, 302 | max_boxes=20, 303 | score_threshold=.6, 304 | iou_threshold=.5): 305 | """Evaluate YOLO model on given input and return filtered boxes.""" 306 | num_layers = len(yolo_outputs) 307 | # import pdb 308 | # pdb.set_trace() 309 | anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]\ 310 | if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] 311 | input_shape = K.shape(yolo_outputs[0])[1:3] * 32 312 | boxes = [] 313 | box_scores = [] 314 | 315 | box_coord_logits = [] 316 | box_confidence_logits = [] 317 | box_class_probs_logits = [] 318 | 319 | for l in range(num_layers): 320 | _boxes, _box_scores, _box_coord_logits,\ 321 | _box_confidence_logits, _box_class_probs_logits =\ 322 | yolo_boxes_and_scores( 323 | yolo_outputs[l], 324 | anchors[anchor_mask[l]], 325 | num_classes, input_shape, image_shape) 326 | boxes.append(_boxes) 327 | box_scores.append(_box_scores) 328 | 329 | box_coord_logits.append(_box_coord_logits) 330 | box_confidence_logits.append(_box_confidence_logits) 331 | box_class_probs_logits.append(_box_class_probs_logits) 332 | 333 | boxes = K.concatenate(boxes, axis=0) 334 | box_scores = K.concatenate(box_scores, axis=0) 335 | 336 | box_coord_logits = K.concatenate(box_coord_logits, axis=0) 337 | box_confidence_logits = K.concatenate(box_confidence_logits, axis=0) 338 | box_class_probs_logits = K.concatenate(box_class_probs_logits, axis=0) 339 | 340 | mask = box_scores >= score_threshold 341 | max_boxes_tensor = K.constant(max_boxes, dtype='int32') 342 | boxes_ = [] 343 | scores_ = [] 344 | classes_ = [] 345 | for c in range(num_classes): 346 | # TODO: use keras backend instead of tf. 347 | class_boxes = tf.boolean_mask(boxes, mask[:, c]) 348 | class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) 349 | nms_index = tf.image.non_max_suppression( 350 | class_boxes, 351 | class_box_scores, 352 | max_boxes_tensor, 353 | iou_threshold=iou_threshold) 354 | class_boxes = K.gather(class_boxes, nms_index) 355 | class_box_scores = K.gather(class_box_scores, nms_index) 356 | classes = K.ones_like(class_box_scores, 'int32') * c 357 | boxes_.append(class_boxes) 358 | scores_.append(class_box_scores) 359 | classes_.append(classes) 360 | boxes_ = K.concatenate(boxes_, axis=0) 361 | scores_ = K.concatenate(scores_, axis=0) 362 | classes_ = K.concatenate(classes_, axis=0) 363 | 364 | return boxes_, scores_, classes_ 365 | 366 | 367 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): 368 | '''Preprocess true boxes to training input format 369 | 370 | Parameters 371 | ---------- 372 | true_boxes: array, shape=(m, T, 5) 373 | Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape. 374 | input_shape: array-like, hw, multiples of 32 375 | anchors: array, shape=(N, 2), wh 376 | num_classes: integer 377 | 378 | Returns 379 | ------- 380 | y_true: list of array, shape like yolo_outputs, xywh are reletive value 381 | 382 | ''' 383 | assert (true_boxes[..., 4] < num_classes).all(),\ 384 | 'class id must be less than num_classes' 385 | num_layers = len(anchors)//3 # default setting 386 | anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]\ 387 | if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] 388 | 389 | true_boxes = np.array(true_boxes, dtype='float32') 390 | input_shape = np.array(input_shape, dtype='int32') 391 | boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2 392 | boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2] 393 | true_boxes[..., 0:2] = boxes_xy / input_shape[::-1] 394 | true_boxes[..., 2:4] = boxes_wh / input_shape[::-1] 395 | 396 | m = true_boxes.shape[0] 397 | grid_shapes = [input_shape//{0: 32, 1: 16, 2: 8}[l] 398 | for l in range(num_layers)] 399 | y_true = [np.zeros((m, grid_shapes[l][0], 400 | grid_shapes[l][1], 401 | len(anchor_mask[l]), 402 | 5 + num_classes), 403 | dtype='float32') 404 | for l in range(num_layers)] 405 | 406 | # Expand dim to apply broadcasting. 407 | anchors = np.expand_dims(anchors, 0) 408 | anchor_maxes = anchors / 2. 409 | anchor_mins = -anchor_maxes 410 | valid_mask = boxes_wh[..., 0] > 0 411 | 412 | for b in range(m): 413 | # Discard zero rows. 414 | wh = boxes_wh[b, valid_mask[b]] 415 | if len(wh) == 0: 416 | continue 417 | # Expand dim to apply broadcasting. 418 | wh = np.expand_dims(wh, -2) 419 | box_maxes = wh / 2. 420 | box_mins = -box_maxes 421 | 422 | intersect_mins = np.maximum(box_mins, anchor_mins) 423 | intersect_maxes = np.minimum(box_maxes, anchor_maxes) 424 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) 425 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 426 | box_area = wh[..., 0] * wh[..., 1] 427 | anchor_area = anchors[..., 0] * anchors[..., 1] 428 | iou = intersect_area / (box_area + anchor_area - intersect_area) 429 | 430 | # Find best anchor for each true box 431 | best_anchor = np.argmax(iou, axis=-1) 432 | 433 | for t, n in enumerate(best_anchor): 434 | for l in range(num_layers): 435 | if n in anchor_mask[l]: 436 | i = np.floor(true_boxes[b, t, 0] * 437 | grid_shapes[l][1]).astype('int32') 438 | j = np.floor(true_boxes[b, t, 1] * 439 | grid_shapes[l][0]).astype('int32') 440 | k = anchor_mask[l].index(n) 441 | c = true_boxes[b, t, 4].astype('int32') 442 | y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4] 443 | y_true[l][b, j, i, k, 4] = 1 444 | y_true[l][b, j, i, k, 5 + c] = 1 445 | 446 | return y_true 447 | 448 | 449 | def box_iou(b1, b2): 450 | '''Return iou tensor 451 | 452 | Parameters 453 | ---------- 454 | b1: tensor, shape=(i1,...,iN, 4), xywh 455 | b2: tensor, shape=(j, 4), xywh 456 | 457 | Returns 458 | ------- 459 | iou: tensor, shape=(i1,...,iN, j) 460 | 461 | ''' 462 | 463 | # Expand dim to apply broadcasting. 464 | b1 = K.expand_dims(b1, -2) 465 | b1_xy = b1[..., :2] 466 | b1_wh = b1[..., 2:4] 467 | b1_wh_half = b1_wh/2. 468 | b1_mins = b1_xy - b1_wh_half 469 | b1_maxes = b1_xy + b1_wh_half 470 | 471 | # Expand dim to apply broadcasting. 472 | b2 = K.expand_dims(b2, 0) 473 | b2_xy = b2[..., :2] 474 | b2_wh = b2[..., 2:4] 475 | b2_wh_half = b2_wh/2. 476 | b2_mins = b2_xy - b2_wh_half 477 | b2_maxes = b2_xy + b2_wh_half 478 | 479 | intersect_mins = K.maximum(b1_mins, b2_mins) 480 | intersect_maxes = K.minimum(b1_maxes, b2_maxes) 481 | intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) 482 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] 483 | b1_area = b1_wh[..., 0] * b1_wh[..., 1] 484 | b2_area = b2_wh[..., 0] * b2_wh[..., 1] 485 | iou = intersect_area / (b1_area + b2_area - intersect_area) 486 | 487 | return iou 488 | 489 | 490 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): 491 | '''Return yolo_loss tensor 492 | 493 | Parameters 494 | ---------- 495 | yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body 496 | y_true: list of array, the output of preprocess_true_boxes 497 | anchors: array, shape=(N, 2), wh 498 | num_classes: integer 499 | ignore_thresh: float, the iou threshold whether 500 | to ignore object confidence loss 501 | 502 | Returns 503 | ------- 504 | loss: tensor, shape=(1,) 505 | 506 | ''' 507 | num_layers = len(anchors)//3 # default setting 508 | yolo_outputs = args[:num_layers] 509 | y_true = args[num_layers:] 510 | anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]\ 511 | if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] 512 | input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 513 | 32, K.dtype(y_true[0])) 514 | grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], 515 | K.dtype(y_true[0])) for l in range(num_layers)] 516 | loss = 0 517 | m = K.shape(yolo_outputs[0])[0] # batch size, tensor 518 | mf = K.cast(m, K.dtype(yolo_outputs[0])) 519 | 520 | for l in range(num_layers): 521 | object_mask = y_true[l][..., 4:5] 522 | true_class_probs = y_true[l][..., 5:] 523 | 524 | grid, raw_pred, pred_xy, pred_wh = yolo_head( 525 | yolo_outputs[l], 526 | anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) 527 | pred_box = K.concatenate([pred_xy, pred_wh]) 528 | 529 | # Darknet raw box to calculate loss. 530 | raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid 531 | raw_true_wh = K.log(y_true[l][..., 2:4] / 532 | anchors[anchor_mask[l]] * input_shape[::-1]) 533 | raw_true_wh = K.switch(object_mask, 534 | raw_true_wh, K.zeros_like(raw_true_wh)) 535 | box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] 536 | 537 | # Find ignore mask, iterate over each of batch. 538 | ignore_mask = tf.TensorArray( 539 | K.dtype(y_true[0]), 540 | size=1, 541 | dynamic_size=True) 542 | object_mask_bool = K.cast(object_mask, 'bool') 543 | 544 | def loop_body(b, ignore_mask): 545 | true_box = tf.boolean_mask( 546 | y_true[l][b, ..., 0:4], 547 | object_mask_bool[b, ..., 0]) 548 | iou = box_iou(pred_box[b], true_box) 549 | best_iou = K.max(iou, axis=-1) 550 | ignore_mask = ignore_mask.write( 551 | b, 552 | K.cast(best_iou < ignore_thresh, K.dtype(true_box))) 553 | return b+1, ignore_mask 554 | _, ignore_mask = K.control_flow_ops.while_loop( 555 | lambda b, 556 | *args: b < m, 557 | loop_body, 558 | [0, ignore_mask]) 559 | ignore_mask = ignore_mask.stack() 560 | ignore_mask = K.expand_dims(ignore_mask, -1) 561 | 562 | # K.binary_crossentropy is helpful to avoid exp overflow. 563 | xy_loss = object_mask * box_loss_scale *\ 564 | K.binary_crossentropy( 565 | raw_true_xy, 566 | raw_pred[..., 0:2], 567 | from_logits=True) 568 | wh_loss = object_mask * box_loss_scale * 0.5\ 569 | * K.square(raw_true_wh-raw_pred[..., 2:4]) 570 | confidence_loss = object_mask *\ 571 | K.binary_crossentropy( 572 | object_mask, 573 | raw_pred[..., 4:5], 574 | from_logits=True) +\ 575 | (1-object_mask) *\ 576 | K.binary_crossentropy( 577 | object_mask, 578 | raw_pred[..., 4:5], 579 | from_logits=True) *\ 580 | ignore_mask 581 | class_loss = object_mask * K.binary_crossentropy( 582 | true_class_probs, 583 | raw_pred[..., 5:], 584 | from_logits=True) 585 | 586 | xy_loss = K.sum(xy_loss) / mf 587 | wh_loss = K.sum(wh_loss) / mf 588 | confidence_loss = K.sum(confidence_loss) / mf 589 | class_loss = K.sum(class_loss) / mf 590 | loss += xy_loss + wh_loss + confidence_loss + class_loss 591 | if print_loss: 592 | loss = tf.Print( 593 | loss, 594 | [loss, 595 | xy_loss, 596 | wh_loss, 597 | confidence_loss, 598 | class_loss, 599 | K.sum(ignore_mask)], 600 | message='loss: ') 601 | return loss 602 | -------------------------------------------------------------------------------- /models/yolov3/yolov3_wrapper.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("/home/yantao/workspace/projects/baidu/bbox_std") 3 | 4 | import tensorflow as tf 5 | import os 6 | import numpy as np 7 | import colorsys 8 | import logging 9 | import json 10 | import pickle 11 | 12 | from PIL import Image, ImageFont, ImageDraw 13 | from collections import defaultdict 14 | from keras import backend as K 15 | from keras.models import Model 16 | from keras.layers import Input, Lambda 17 | from models.yolov3.yolov3_model import yolo_body, yolo_eval 18 | from models.yolov3.image_utils import letterbox_image, image_to_ndarray, letterbox_image_tf_dynamic 19 | 20 | import pdb 21 | 22 | class YOLOv3(object): 23 | _defaults = { 24 | "model_path": 'models/yolov3/model_data/yolov3.h5', 25 | "anchors_path": 'models/yolov3/model_data/yolov3_anchors.txt', 26 | "classes_path": 'models/yolov3/model_data/coco_classes.txt', 27 | "box_score_threshold": 0.3, 28 | "nms_iou_threshold": 0.45, 29 | "mAP_iou_threshold": 0.5, 30 | "model_image_size": (416, 416), 31 | "gpu_num": 1, 32 | } 33 | @classmethod 34 | def get_defaults(cls, n): 35 | if n in cls._defaults: 36 | return cls._defaults[n] 37 | else: 38 | return "Unrecognized attribute name'" + n + "'" 39 | 40 | def _get_class(self): 41 | classes_path = os.path.expanduser(self.classes_path) 42 | with open(classes_path) as f: 43 | class_names = f.readlines() 44 | class_names = [c.strip() for c in class_names] 45 | return class_names 46 | 47 | def _get_anchors(self): 48 | anchors_path = os.path.expanduser(self.anchors_path) 49 | with open(anchors_path) as f: 50 | anchors = f.readline() 51 | anchors = [float(x) for x in anchors.split(',')] 52 | return np.array(anchors).reshape(-1, 2) 53 | 54 | def __init__(self, **kwargs): 55 | self.__dict__.update(self._defaults) 56 | self.__dict__.update(kwargs) 57 | K.set_session(self.sess) 58 | self.logger = logging.getLogger(self.__class__.__name__) 59 | self.class_names = self._get_class() 60 | self.num_classes = len(self.class_names) 61 | self.anchors = self._get_anchors() 62 | self.logger.info("Loading %s model ...", self.__class__.__name__) 63 | self.model = self.create_model() 64 | self.logger.info("Model loaded.") 65 | self.input_image_shape = K.placeholder(shape=(2,)) 66 | self.boxes, self.scores, self.classes = yolo_eval(self.model.output, 67 | self.anchors, self.num_classes, 68 | self.input_image_shape, 69 | score_threshold=self.box_score_threshold, 70 | iou_threshold=self.nms_iou_threshold) 71 | 72 | def create_model(self): 73 | 74 | self.input_image = tf.placeholder(tf.float32, (None, None, None, 3)) 75 | boxed_image = letterbox_image_tf_dynamic(self.input_image, (416, 416)) 76 | input = Input(tensor=boxed_image) 77 | model = yolo_body(input, len(self.anchors)//3, len(self.class_names)) 78 | 79 | model.load_weights(self.model_path) 80 | return model 81 | 82 | def _feed_forward(self, image): 83 | image_data = image_to_ndarray(image) 84 | image_shape = [image.size[1], image.size[0]] # Original image dimension 85 | out_boxes, out_scores, out_classes = self.sess.run( 86 | [self.boxes, self.scores, self.classes], 87 | feed_dict={ 88 | self.input_image: image_data, 89 | self.input_image_shape: image_shape, 90 | K.learning_phase(): 0 91 | }) 92 | return out_boxes, out_scores, out_classes 93 | 94 | def predict(self, image, show_image=False): 95 | ''' 96 | return dictionary of list 97 | 98 | Output: 99 | { 100 | 'boxes' : [[top, left, bottom, right], ...] 101 | 'scores' : [float, ...] 102 | 'classes' : [int, ...] 103 | } 104 | ''' 105 | 106 | out_boxes, out_scores, out_classes = self._feed_forward(image) 107 | prediction = {} 108 | prediction['boxes'] = [] 109 | prediction['scores'] = [] 110 | prediction['classes'] = [] 111 | for temp_box, temp_score, temp_class in zip(out_boxes, out_scores, out_classes): 112 | prediction['boxes'].append(temp_box.tolist()) 113 | prediction['scores'].append(temp_score) 114 | prediction['classes'].append(temp_class) 115 | 116 | return prediction 117 | 118 | def main(): 119 | image = Image.open('images/cat.jpg') 120 | model = YOLOv3(sess = K.get_session()) 121 | 122 | if __name__ == "__main__": 123 | main() -------------------------------------------------------------------------------- /models/yolov3_wrapper.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import tensorflow as tf 4 | import os 5 | import numpy as np 6 | import colorsys 7 | import logging 8 | import json 9 | import pickle 10 | 11 | from PIL import Image, ImageFont, ImageDraw 12 | from collections import defaultdict 13 | from keras import backend as K 14 | from keras.models import Model 15 | from keras.layers import Input, Lambda 16 | from models.yolov3.yolov3_model import yolo_body, yolo_eval 17 | from models.yolov3.image_utils import letterbox_image, image_to_ndarray, letterbox_image_tf_dynamic 18 | 19 | import pdb 20 | 21 | class YOLOv3(object): 22 | _defaults = { 23 | "model_path": 'models/yolov3/model_data/yolov3.h5', 24 | "anchors_path": 'models/yolov3/model_data/yolov3_anchors.txt', 25 | "classes_path": 'models/yolov3/model_data/coco_classes.txt', 26 | "box_score_threshold": 0.3, 27 | "nms_iou_threshold": 0.45, 28 | "mAP_iou_threshold": 0.5, 29 | "model_image_size": (416, 416), 30 | "gpu_num": 1, 31 | } 32 | @classmethod 33 | def get_defaults(cls, n): 34 | if n in cls._defaults: 35 | return cls._defaults[n] 36 | else: 37 | return "Unrecognized attribute name'" + n + "'" 38 | 39 | def _get_class(self): 40 | classes_path = os.path.expanduser(self.classes_path) 41 | with open(classes_path) as f: 42 | class_names = f.readlines() 43 | class_names = [c.strip() for c in class_names] 44 | return class_names 45 | 46 | def _get_anchors(self): 47 | anchors_path = os.path.expanduser(self.anchors_path) 48 | with open(anchors_path) as f: 49 | anchors = f.readline() 50 | anchors = [float(x) for x in anchors.split(',')] 51 | return np.array(anchors).reshape(-1, 2) 52 | 53 | def __init__(self, **kwargs): 54 | self.__dict__.update(self._defaults) 55 | self.__dict__.update(kwargs) 56 | K.set_session(self.sess) 57 | self.logger = logging.getLogger(self.__class__.__name__) 58 | self.class_names = self._get_class() 59 | self.num_classes = len(self.class_names) 60 | self.anchors = self._get_anchors() 61 | self.logger.info("Loading %s model ...", self.__class__.__name__) 62 | self.model = self.create_model() 63 | self.logger.info("Model loaded.") 64 | self.input_image_shape = K.placeholder(shape=(2,)) 65 | self.boxes, self.scores, self.classes = yolo_eval(self.model.output, 66 | self.anchors, self.num_classes, 67 | self.input_image_shape, 68 | score_threshold=self.box_score_threshold, 69 | iou_threshold=self.nms_iou_threshold) 70 | 71 | def create_model(self): 72 | 73 | self.input_image = tf.placeholder(tf.float32, (None, None, None, 3)) 74 | boxed_image = letterbox_image_tf_dynamic(self.input_image, (416, 416)) 75 | input = Input(tensor=boxed_image) 76 | model = yolo_body(input, len(self.anchors)//3, len(self.class_names)) 77 | 78 | model.load_weights(self.model_path) 79 | return model 80 | 81 | def _feed_forward(self, image): 82 | image_data = image_to_ndarray(image) 83 | image_shape = [image.size[1], image.size[0]] # Original image dimension 84 | out_boxes, out_scores, out_classes = self.sess.run( 85 | [self.boxes, self.scores, self.classes], 86 | feed_dict={ 87 | self.input_image: image_data, 88 | self.input_image_shape: image_shape, 89 | K.learning_phase(): 0 90 | }) 91 | return out_boxes, out_scores, out_classes 92 | 93 | def predict(self, image, show_image=False): 94 | ''' 95 | return dictionary of list 96 | 97 | Output: 98 | { 99 | 'boxes' : [[top, left, bottom, right], ...] 100 | 'scores' : [float, ...] 101 | 'classes' : [int, ...] 102 | } 103 | ''' 104 | 105 | out_boxes, out_scores, out_classes = self._feed_forward(image) 106 | prediction = {} 107 | prediction['boxes'] = [] 108 | prediction['scores'] = [] 109 | prediction['classes'] = [] 110 | for temp_box, temp_score, temp_class in zip(out_boxes, out_scores, out_classes): 111 | prediction['boxes'].append(temp_box.tolist()) 112 | prediction['scores'].append(temp_score) 113 | prediction['classes'].append(temp_class) 114 | 115 | return prediction 116 | 117 | def main(): 118 | model = YOLOv3(sess = K.get_session()) 119 | 120 | if __name__ == "__main__": 121 | main() -------------------------------------------------------------------------------- /pipeline_center.py: -------------------------------------------------------------------------------- 1 | from tracker.kalman_filter import Tracker_center as Tracker 2 | from utils import utils 3 | from assign_detection_to_trackers import assign_detections_to_trackers 4 | 5 | import cv2 6 | import numpy as np 7 | from PIL import Image 8 | import copy 9 | 10 | import pdb 11 | 12 | def pipeline(img, det, frame_count, params_ori, is_init=False, detect_output=False, verbose=1, virtual_attack=False, return_match_info=False): 13 | ''' 14 | Pipeline function for detection and tracking 15 | Args: 16 | img : nparray 17 | input image array 18 | det : object or list 19 | detector or detection results 20 | frame_count : int 21 | frame index 22 | params : dic 23 | parameters used for tracking 24 | detect_output : bool 25 | If True, det is detection results 26 | verbose : int 27 | verbose 28 | virtual_attack : bool 29 | If true, apply virtual attack. 30 | ''' 31 | params = copy.deepcopy(params_ori) 32 | if detect_output: 33 | assert isinstance(det, list) or det == None 34 | 35 | tracker_list = params['tracker_list'] 36 | max_age = params['max_age'] 37 | min_hits = params['min_hits'] 38 | track_id_list = params['track_id_list'] 39 | 40 | frame_count += 1 41 | if detect_output: 42 | detected_objects_list = det 43 | else: 44 | detected_objects_list = det.detect_image(img) 45 | 46 | if virtual_attack: 47 | detected_objects_list = [] 48 | 49 | if verbose == 1: 50 | print('Frame:', frame_count) 51 | print('Detected objects: ', detected_objects_list) 52 | 53 | x_obj = [] 54 | img_bbox = img.copy() 55 | for idx, detected_object in enumerate(detected_objects_list): 56 | img_bbox= utils.draw_box_label(img_bbox, detected_object, box_color=(255, 0, 0), thickness=10) 57 | 58 | if len(tracker_list) > 0: 59 | for trk in tracker_list: 60 | x_obj.append(trk.obj) 61 | 62 | z_obj = [obj for obj in detected_objects_list] 63 | 64 | matched, unmatched_dets, unmatched_trks = assign_detections_to_trackers(x_obj, z_obj, iou_thrd = 0.5) #0.3 65 | 66 | if verbose == 1: 67 | print('Detection: ', z_obj) 68 | print('x_obj: ', x_obj) 69 | print('matched:', matched) 70 | print('unmatched_det:', unmatched_dets) 71 | print('unmatched_trks:', unmatched_trks) 72 | 73 | # Deal with matched detections 74 | if matched.size > 0: 75 | for trk_idx, det_idx in matched: 76 | z = z_obj[det_idx]['bbox'] 77 | 78 | z_center = np.array([(z[0] + z[2]) / 2, (z[1] + z[3]) / 2]) 79 | z_center = np.expand_dims(z_center, axis=0).T 80 | z_wh = np.array([z[2] - z[0] + 1, z[3] - z[1] + 1]) 81 | tmp_trk = tracker_list[trk_idx] 82 | tmp_trk.kalman_filter(z_center, z_wh) 83 | xx_state = tmp_trk.get_x_state().T[0].tolist() 84 | xx_center = [xx_state[0], xx_state[2]] 85 | xx_wh = tmp_trk.whRCF.get_state() 86 | 87 | temp_bbox = np.array([xx_center[0] - xx_wh[0] / 2, xx_center[1] - xx_wh[1] / 2, xx_center[0] + xx_wh[0] / 2, xx_center[1] + xx_wh[1] / 2]).astype('int') 88 | x_obj[trk_idx]['bbox'] = temp_bbox 89 | tmp_trk.obj['bbox'] = temp_bbox 90 | x_obj[trk_idx]['score'] = z_obj[det_idx]['score'] 91 | tmp_trk.obj['score'] = z_obj[det_idx]['score'] 92 | x_obj[trk_idx]['class_idx'] = z_obj[det_idx]['class_idx'] 93 | tmp_trk.obj['class_idx'] = z_obj[det_idx]['class_idx'] 94 | x_obj[trk_idx]['class_name'] = z_obj[det_idx]['class_name'] 95 | tmp_trk.obj['class_name'] = z_obj[det_idx]['class_name'] 96 | 97 | if not is_init: 98 | tmp_trk.hits += 1 99 | else: 100 | tmp_trk.hits = params_ori['min_hits'] 101 | tmp_trk.no_losses = 0 102 | 103 | # Deal with unmatched detections 104 | if len(unmatched_dets) > 0: 105 | for idx in unmatched_dets: 106 | z = z_obj[idx]['bbox'] 107 | 108 | z_center = np.array([(z[0] + z[2]) / 2, (z[1] + z[3]) / 2]) 109 | z_center = np.expand_dims(z_center, axis=0).T 110 | z_wh = np.array([z[2] - z[0] + 1, z[3] - z[1] + 1]) 111 | tmp_trk = Tracker() # Create a new tracker 112 | x = np.array([[z_center[0], 0, z_center[1], 0]]).T 113 | tmp_trk.Init(x, z_wh) 114 | tmp_trk.predict_only() 115 | xx_state = tmp_trk.get_x_state() 116 | xx_state = xx_state.T[0].tolist() 117 | xx_center =[xx_state[0], xx_state[2]] 118 | xx_wh = tmp_trk.whRCF.get_state() 119 | 120 | temp_bbox = np.array([xx_center[0] - xx_wh[0] / 2, xx_center[1] - xx_wh[1] / 2, xx_center[0] + xx_wh[0] / 2, xx_center[1] + xx_wh[1] / 2]).astype('int') 121 | tmp_trk.obj['bbox'] = temp_bbox 122 | tmp_trk.obj['score'] = z_obj[idx]['score'] 123 | tmp_trk.obj['class_idx'] = z_obj[idx]['class_idx'] 124 | tmp_trk.obj['class_name'] = z_obj[idx]['class_name'] 125 | 126 | tmp_trk.id = track_id_list.popleft() # assign an ID for the tracker 127 | tracker_list.append(tmp_trk) 128 | x_obj.append(tmp_trk.obj) 129 | 130 | # Deal with unmatched tracks 131 | if len(unmatched_trks)>0: 132 | for trk_idx in unmatched_trks: 133 | tmp_trk = tracker_list[trk_idx] 134 | tmp_trk.no_losses += 1 135 | tmp_trk.predict_only() 136 | xx_state = tmp_trk.get_x_state() 137 | xx_state = xx_state.T[0].tolist() 138 | xx_center =[xx_state[0], xx_state[2]] 139 | xx_wh = tmp_trk.whRCF.get_state() 140 | 141 | temp_bbox = np.array([xx_center[0] - xx_wh[0] / 2, xx_center[1] - xx_wh[1] / 2, xx_center[0] + xx_wh[0] / 2, xx_center[1] + xx_wh[1] / 2]) 142 | tmp_trk.obj['bbox'] = temp_bbox 143 | x_obj[trk_idx]['bbox'] = temp_bbox 144 | 145 | img_bbox_track = img_bbox.copy() 146 | # The list of tracks to be annotated 147 | good_tracker_list =[] 148 | for trk in tracker_list: 149 | if ((trk.hits >= min_hits) and (trk.no_losses <= max_age)): 150 | good_tracker_list.append(trk) 151 | x_cv2 = trk.obj['bbox'] 152 | if verbose == 1: 153 | print('updated box: ', x_cv2) 154 | img_bbox_track = utils.draw_box_label(img_bbox, trk.obj) # Draw the bounding boxes on the 155 | 156 | # Book keeping 157 | deleted_tracks = filter(lambda x: x.no_losses > max_age, tracker_list) 158 | 159 | for trk in deleted_tracks: 160 | track_id_list.append(trk.id) 161 | 162 | tracker_list = [x for x in tracker_list if x.no_losses<=max_age] 163 | if verbose == 1: 164 | print('Ending tracker_list: ',len(tracker_list)) 165 | print('Ending good tracker_list: ',len(good_tracker_list)) 166 | 167 | params_new = {} 168 | params_new['tracker_list'] = tracker_list 169 | params_new['max_age'] = max_age 170 | params_new['min_hits'] = min_hits 171 | params_new['track_id_list'] = track_id_list 172 | 173 | if return_match_info: 174 | return img_bbox_track, params_new, (matched, unmatched_dets, unmatched_trks) 175 | else: 176 | return img_bbox_track, params_new -------------------------------------------------------------------------------- /tracker/kalman_filter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import dot 3 | from scipy.linalg import inv, block_diag 4 | 5 | import pdb 6 | 7 | class FirstOrderRCLowPassFilter(): 8 | def __init__(self): 9 | self.alpha_ = 0.0 10 | self.inited_ = False 11 | self.state_ = np.array([0.0, 0.0]) 12 | 13 | def SetAlpha(self, alpha): 14 | self.alpha_ = alpha 15 | self.inited_ = False 16 | 17 | def AddMeasure(self, z): 18 | if self.inited_: 19 | self.state_ = z + self.alpha_ * (self.state_ - z) 20 | else: 21 | self.state_ = z 22 | self.inited_ = True 23 | 24 | def AddMeasure_noinput(self): 25 | z = self.state_ 26 | self.AddMeasure(z) 27 | 28 | 29 | def get_state(self): 30 | return self.state_ 31 | 32 | def isInited(self): 33 | return self.inited_ 34 | 35 | 36 | class Tracker_center(): # kalman filter which only track the center of bbox 37 | def __init__(self): 38 | self.inited_ = False 39 | self.id = 0 # tracker's id 40 | self.obj = {} # object information 41 | 42 | self.hits = 0 # number of detection matches 43 | self.no_losses = 0 # number of unmatched tracks (track loss) 44 | 45 | # Initialize parameters for Kalman Filtering 46 | # The state is the (x, y) coordinates of the center of detection box 47 | # state: [center_c, center_c_dot, center_r, center_r_dot] 48 | 49 | self.x_state_ = [] 50 | self.whRCF = FirstOrderRCLowPassFilter() 51 | self.whRCF.SetAlpha(0.5) 52 | self.dt = 1 53 | 54 | #state transition matrix F 55 | self.F = np.array([[1, 0, 0, 0], 56 | [0, 1, 0, 0], 57 | [0, 0, 1, 0], 58 | [0, 0, 0, 1]]) 59 | self.F[0, 1] = self.dt 60 | self.F[2, 3] = self.dt 61 | 62 | # Measurement matrix H, assuming we can only measure the coordinates 63 | self.H = np.array([[1, 0, 0, 0], 64 | [0, 0, 1, 0]]) 65 | 66 | # Initialize the state covariance P 67 | self.L = 10.0 #10.0 #no change 68 | self.P = np.diag(self.L * np.ones(4)) 69 | 70 | # Initialize the process covariance 71 | self.Q_comp_mat = np.array([[self.dt**4/4., self.dt**3/2.], 72 | [self.dt**3/2., self.dt**2]]) 73 | self.Q = block_diag(self.Q_comp_mat, self.Q_comp_mat) 74 | 75 | # Initialize the measurement covariance 76 | self.R_scaler = 1.0 #1.0 77 | self.R_diag_array = self.R_scaler * np.array([self.L, self.L]) 78 | self.R = np.diag(self.R_diag_array) 79 | 80 | def Init(self, x, wh): 81 | self.x_state_ = x 82 | self.inited_ = True 83 | self.whRCF.AddMeasure(wh) 84 | 85 | def update_R(self): 86 | R_diag_array = self.R_scaler * np.array([self.L, self.L]) 87 | self.R = np.diag(R_diag_array) 88 | 89 | def isInited(self): 90 | if not self.inited_: 91 | return False 92 | if not self.whRCF.isInited(): 93 | return False 94 | return True 95 | 96 | def get_x_state(self): 97 | if not self.isInited(): 98 | raise ValueError('tracker not initiated.') 99 | 100 | return self.x_state_ 101 | 102 | def kalman_filter(self, z, wh): 103 | ''' 104 | Implement the Kalman Filter, including the predict and the update stages, 105 | with the measurement z 106 | ''' 107 | if not self.isInited(): 108 | raise ValueError('tracker not initiated.') 109 | 110 | x = self.x_state_.astype('float') 111 | # Predict 112 | x = dot(self.F, x) 113 | self.P = dot(self.F, self.P).dot(self.F.T) + self.Q 114 | 115 | #Update 116 | S = dot(self.H, self.P).dot(self.H.T) + self.R 117 | K = dot(self.P, self.H.T).dot(inv(S)) # Kalman gain 118 | y = z - dot(self.H, x) # residual 119 | 120 | x += dot(K, y) 121 | self.P = self.P - dot(K, self.H).dot(self.P) 122 | self.x_state_ = x.astype(int) # convert to integer coordinates 123 | #(pixel values) 124 | 125 | self.whRCF.AddMeasure(wh) 126 | 127 | def predict_only(self): 128 | ''' 129 | Implment only the predict stage. This is used for unmatched detections and 130 | unmatched tracks 131 | ''' 132 | if not self.isInited(): 133 | raise ValueError('tracker not initiated.') 134 | x = self.x_state_ 135 | # Predict 136 | x = dot(self.F, x) 137 | self.P = dot(self.F, self.P).dot(self.F.T) + self.Q 138 | self.x_state_ = x.astype(int) 139 | 140 | self.whRCF.AddMeasure_noinput() 141 | 142 | class Tracker(): # class for Kalman Filter-based tracker 143 | def __init__(self): 144 | # Initialize parametes for tracker (history) 145 | self.id = 0 # tracker's id 146 | self.obj = {} 147 | 148 | self.hits = 0 # number of detection matches 149 | self.no_losses = 0 # number of unmatched tracks (track loss) 150 | 151 | # Initialize parameters for Kalman Filtering 152 | # The state is the (x, y) coordinates of the detection box 153 | # state: [left, left_dot, up, up_dot, right, right_dot, down, down_dot] 154 | # or[left, left_dot, up, up_dot, width, width_dot, height, height_dot] 155 | self.x_state=[] 156 | self.dt = 1. # time interval 157 | 158 | # Process matrix, assuming constant velocity model 159 | self.F = np.array([[1, self.dt, 0, 0, 0, 0, 0, 0], 160 | [0, 1, 0, 0, 0, 0, 0, 0], 161 | [0, 0, 1, self.dt, 0, 0, 0, 0], 162 | [0, 0, 0, 1, 0, 0, 0, 0], 163 | [0, 0, 0, 0, 1, self.dt, 0, 0], 164 | [0, 0, 0, 0, 0, 1, 0, 0], 165 | [0, 0, 0, 0, 0, 0, 1, self.dt], 166 | [0, 0, 0, 0, 0, 0, 0, 1]]) 167 | 168 | # Measurement matrix, assuming we can only measure the coordinates 169 | 170 | self.H = np.array([[1, 0, 0, 0, 0, 0, 0, 0], 171 | [0, 0, 1, 0, 0, 0, 0, 0], 172 | [0, 0, 0, 0, 1, 0, 0, 0], 173 | [0, 0, 0, 0, 0, 0, 1, 0]]) 174 | 175 | 176 | # Initialize the state covariance 177 | self.L = 10.0 178 | self.P = np.diag(self.L*np.ones(8)) 179 | 180 | 181 | # Initialize the process covariance 182 | self.Q_comp_mat = np.array([[self.dt**4/4., self.dt**3/2.], 183 | [self.dt**3/2., self.dt**2]]) 184 | self.Q = block_diag(self.Q_comp_mat, self.Q_comp_mat, 185 | self.Q_comp_mat, self.Q_comp_mat) 186 | 187 | # Initialize the measurement covariance 188 | self.R_scaler = 1.0 189 | self.R_diag_array = self.R_scaler * np.array([self.L, self.L, self.L, self.L]) 190 | self.R = np.diag(self.R_diag_array) 191 | 192 | 193 | def update_R(self): 194 | R_diag_array = self.R_scaler * np.array([self.L, self.L, self.L, self.L]) 195 | self.R = np.diag(R_diag_array) 196 | 197 | 198 | 199 | 200 | def kalman_filter(self, z): 201 | ''' 202 | Implement the Kalman Filter, including the predict and the update stages, 203 | with the measurement z 204 | ''' 205 | x = self.x_state 206 | # Predict 207 | x = dot(self.F, x) 208 | self.P = dot(self.F, self.P).dot(self.F.T) + self.Q 209 | 210 | #Update 211 | S = dot(self.H, self.P).dot(self.H.T) + self.R 212 | K = dot(self.P, self.H.T).dot(inv(S)) # Kalman gain 213 | y = z - dot(self.H, x) # residual 214 | x += dot(K, y) 215 | self.P = self.P - dot(K, self.H).dot(self.P) 216 | self.x_state = x.astype(int) # convert to integer coordinates 217 | #(pixel values) 218 | 219 | def predict_only(self): 220 | ''' 221 | Implment only the predict stage. This is used for unmatched detections and 222 | unmatched tracks 223 | ''' 224 | x = self.x_state 225 | # Predict 226 | x = dot(self.F, x) 227 | self.P = dot(self.F, self.P).dot(self.F.T) + self.Q 228 | self.x_state = x.astype(int) -------------------------------------------------------------------------------- /utils/file_utils.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import shutil 4 | import numpy as np 5 | 6 | folder_name = '/home/yunhan/Documents/data/apollo/' 7 | 8 | filenames = [] 9 | count = 0 10 | for filename in glob.iglob('/home/yunhan/Documents/data/apollo/output_highway/images/**/*.jpg', recursive=True): 11 | print(count) 12 | shutil.move(filename, '/home/yunhan/Documents/data/detection/%05d.jpg' % count) 13 | count = count + 1 14 | -------------------------------------------------------------------------------- /utils/image_utils.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | 4 | #Debug 5 | import tensorflow as tf 6 | from tensorflow.image import ResizeMethod 7 | 8 | def letterbox_image(image, size): 9 | """ Resize image with unchanged aspect ratio using padding. 10 | 11 | Args: 12 | image: PIL.Image.Image (Jpeg or PNG) 13 | size: Tuple (416, 416) 14 | 15 | Returns: 16 | new_image: PIL.Image.Image 17 | """ 18 | iw, ih = image.size 19 | w, h = size 20 | scale = min(w / iw, h / ih) 21 | nw = int(iw * scale) 22 | nh = int(ih * scale) 23 | 24 | image = image.resize((nw, nh), Image.BICUBIC) 25 | new_image = Image.new('RGB', size, (128, 128, 128)) 26 | # new_image = Image.new('RGB', size, (0, 0, 0)) 27 | new_image.paste(image, ((w-nw)//2, (h-nh)//2)) 28 | return new_image 29 | 30 | def letterbox_image_tf_dynamic(image, size, resize_method=ResizeMethod.BILINEAR): 31 | """ Letterbox image that handles dynamic Tensor type """ 32 | if len(image.get_shape()) == 4: 33 | ih, iw = tf.shape(image)[1], tf.shape(image)[2] 34 | images = image 35 | else: 36 | ih, iw = tf.shape(image)[0], tf.shape(image)[1] 37 | images = [image] 38 | w, h = tf.constant(size[0]), tf.constant(size[1]) 39 | scale = tf.minimum(w / iw, h / ih) 40 | nw = tf.cast(tf.cast(iw, tf.float64) * scale, tf.int32) 41 | nh = tf.cast(tf.cast(ih, tf.float64) * scale, tf.int32) 42 | 43 | image_tensor = tf.image.resize_images(images, (nh, nw), method=resize_method, align_corners=True) 44 | 45 | h_pad = tf.cast((h-nh)//2, tf.int32) 46 | w_pad = tf.cast((w-nw)//2, tf.int32) 47 | c_pad = 0 48 | if len(image_tensor.shape) == 4: 49 | paddings = [[0,0], [h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]] 50 | else: 51 | paddings = [[h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]] 52 | 53 | image_tensor = tf.pad(image_tensor, paddings, constant_values=128. / 255.) 54 | return image_tensor 55 | 56 | 57 | 58 | def letterbox_image_tf_static(image, raw_size, tgt_size, resize_method=ResizeMethod.BILINEAR): 59 | """ Letterbox image that only handles static shape, but more efficiently.""" 60 | if len(image.shape) == 4: 61 | images = image 62 | else: 63 | images = [image] 64 | 65 | iw, ih = raw_size 66 | w, h = tgt_size 67 | scale = min(w / iw, h / ih) 68 | nw = int(iw * scale) 69 | nh = int(ih * scale) 70 | 71 | h_pad, w_pad, c_pad = (h - nh) // 2, (w - nw) // 2, 0 72 | 73 | image_tensor = tf.image.resize_images(images, (nh, nw), method=resize_method, align_corners=True) 74 | paddings = [[0,0], [h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]] 75 | 76 | image_tensor = tf.pad(image_tensor, paddings, constant_values=128. / 255.) 77 | return image_tensor 78 | 79 | 80 | def image_to_ndarray(image, expand_dims=True): 81 | """ Convert PIL Image to numpy.ndarray and add batch dimension 82 | 83 | Args: 84 | image: PIL.Image.Image 85 | 86 | Returns: 87 | image_data: numpy.ndarray (1, 416, 416, 3) or (416, 416, 3) 88 | 89 | """ 90 | image_data = np.array(image, dtype='float32') 91 | image_data /= 255. 92 | if expand_dims == True: 93 | image_data = np.expand_dims(image_data, 0) 94 | if image_data.shape[-1] == 4: 95 | image_data = image_data[...,0:-1] 96 | return image_data 97 | 98 | def ndarray_to_image(image_data): 99 | if len(image_data.shape) == 4: 100 | image_data = np.squeeze(image_data, axis=0) 101 | image_data = (image_data * 255).astype("uint8") 102 | return Image.fromarray(image_data) 103 | 104 | def load_yolov3_image(img_fpath): 105 | """ Load and resize an image for yolo3. """ 106 | model_image_size = (416, 416) 107 | image = Image.open(img_fpath) 108 | boxed_image = letterbox_image(image, tuple(reversed(model_image_size))) 109 | image_data = np.array(boxed_image, dtype='float32') 110 | image_data /= 255. 111 | image_data = np.expand_dims(image_data, 0) # Add batch dimension. 112 | return image_data 113 | 114 | def l1_diff(image1, image2): 115 | diff = np.abs(image1 - image2) 116 | return np.sum(diff) 117 | 118 | def l0_diff(image1, image2): 119 | diff = np.abs(image1 - image2) 120 | return np.count_nonzero(diff) 121 | 122 | def l_inf_diff(image1, image2): 123 | diff = np.abs(image1 - image2) 124 | return np.max(diff) 125 | 126 | def main(): 127 | image = Image.open('images/cat.jpg') 128 | 129 | boxed_image = letterbox_image(image, tuple(reversed((416,416)))) 130 | image_data_pil = image_to_ndarray(boxed_image, expand_dims=False) 131 | x_img_pil = tf.placeholder(tf.float32, shape=(416, 416, 3)) 132 | 133 | image_data_tf_dynamic = image_to_ndarray(image, expand_dims=False) 134 | x_img_tf_large = tf.placeholder(tf.float32, shape=(None,None, 3)) 135 | x_img_tf = letterbox_image_tf_dynamic(x_img_tf_large, (416,416)) 136 | 137 | image_data_tf_static = image_to_ndarray(image, expand_dims=False) 138 | x_img_tf_large_static = tf.placeholder(tf.float32, shape=(1080,1920, 3)) 139 | x_img_tf_static = letterbox_image_tf_static(x_img_tf_large_static, (1920, 1080), (416, 416)) 140 | 141 | with tf.Session() as sess: 142 | image_resized_pil = sess.run(x_img_pil, feed_dict={x_img_pil: image_data_pil}) 143 | image_resized_tf = sess.run(x_img_tf, feed_dict={x_img_tf_large: image_data_tf_dynamic}) 144 | image_resized_tf = np.squeeze(image_resized_tf, axis=0) 145 | image_resized_tf_static = sess.run(x_img_tf_static, feed_dict={x_img_tf_large_static: image_data_tf_static}) 146 | 147 | 148 | l1 = l1_diff(image_resized_tf, image_resized_tf_static) 149 | l0 = l0_diff(image_resized_tf, image_resized_tf_static) 150 | l_inf = l_inf_diff(image_resized_tf, image_resized_tf_static) 151 | 152 | print("l1 %f, l0 %d, l_inf %f" % (l1, l0, l_inf)) 153 | image_pil = ndarray_to_image(image_resized_pil) 154 | image_tf = ndarray_to_image(image_resized_tf) 155 | image_tf_static = ndarray_to_image(image_resized_tf_static) 156 | 157 | 158 | image_tf.save('tf.png') 159 | image_pil.save('pil.png') 160 | image_tf_static.save('tf_static.png') 161 | 162 | if __name__ == "__main__": 163 | main() -------------------------------------------------------------------------------- /utils/keras_utils.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | 3 | 4 | def compose(*funcs): 5 | """Compose arbitrarily many functions, evaluated left to right. 6 | 7 | Reference: https://mathieularose.com/function-composition-in-python/ 8 | """ 9 | # return lambda x: reduce(lambda v, f: f(v), funcs, x) 10 | if funcs: 11 | return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) 12 | else: 13 | raise ValueError('Composition of empty sequence not supported.') 14 | -------------------------------------------------------------------------------- /utils/load_DETRAC.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import glob 4 | import xml.etree.ElementTree as ET 5 | 6 | import pdb 7 | 8 | def get_ids(dir_path): 9 | img_dir = os.path.join(dir_path, 'Insight-MVT_Annotation_Train') 10 | id_list = os.listdir(img_dir) 11 | id_list.sort() 12 | return id_list 13 | 14 | def load_from_id(dir_path, id_name): 15 | img_dir = os.path.join(dir_path, 'Insight-MVT_Annotation_Train') 16 | det_dir = os.path.join(dir_path, 'R-CNN') 17 | gt_dir = os.path.join(dir_path, 'DETRAC-Train-Annotations-XML') 18 | imgs_path = glob.glob(os.path.join(img_dir, id_name, '*.jpg')) 19 | imgs_path.sort() 20 | num_frames = len(imgs_path) 21 | det_list = _load_det(os.path.join(det_dir, id_name + '_Det_R-CNN.txt'), num_frames) 22 | gt_list = _load_gt(os.path.join(gt_dir, id_name + '.xml'), num_frames) 23 | 24 | return imgs_path, det_list, gt_list 25 | 26 | def _load_det(path, num_frames): 27 | """Determines the locations of the cars in the image 28 | 29 | Args: 30 | image: camera image 31 | 32 | Returns: 33 | detected objects with: bbox, confident score, class index 34 | [ 35 | dictionary { 36 | bbox: np.array([left, up, right, down]) 37 | score: confident_score 38 | class_idx: class_idx 39 | class_name: class name category 40 | } 41 | ] 42 | 43 | """ 44 | 45 | with open(path, 'r') as f: 46 | lines = f.readlines() 47 | 48 | result_dic = {} 49 | for line in lines: 50 | line_list = line[:-1].split(',') 51 | frame_id = int(line_list[0]) 52 | bbox_id = int(line_list[1]) 53 | bbox = np.array([float(line_list[2]), float(line_list[3]), float(line_list[2]) + float(line_list[4]), float(line_list[3]) + float(line_list[5])]) 54 | confidence_score = float(line_list[-1]) 55 | 56 | temp_dic = { 57 | 'bbox' : bbox, 58 | 'score' : confidence_score, 59 | 'class_idx' : 0, 60 | 'class_name' : 'object', 61 | } 62 | 63 | if frame_id not in result_dic.keys(): 64 | result_dic[frame_id] = [] 65 | result_dic[frame_id].append(temp_dic) 66 | result = [] 67 | start_idx = 1 68 | while start_idx <= num_frames: 69 | if start_idx in result_dic.keys(): 70 | result.append(result_dic[start_idx]) 71 | else: 72 | result.append([]) 73 | start_idx += 1 74 | return result 75 | 76 | def _load_gt(path, num_frames): 77 | tree = ET.parse(path) 78 | root = tree.getroot() 79 | result_dic = {} 80 | for frame in root.findall('frame'): 81 | temp_list = {} 82 | frame_id = int(frame.attrib['num']) 83 | temp_list = [] 84 | for target in frame[0]: 85 | temp_dic = {} 86 | target_id = int(target.attrib['id']) 87 | bbox_dic = target.find('box').attrib 88 | bbox = np.array([float(bbox_dic['left']), float(bbox_dic['top']), float(bbox_dic['left']) + float(bbox_dic['width']), float(bbox_dic['top']) + float(bbox_dic['height'])]) 89 | class_name = target.find('attribute').attrib['vehicle_type'] 90 | temp_dic['bbox'] = bbox 91 | temp_dic['score'] = 1.0 92 | temp_dic['class_idx'] = 0 93 | temp_dic['class_name'] = class_name 94 | 95 | # used for single class detection 96 | temp_dic['class_name'] = 'object' 97 | 98 | temp_list.append(temp_dic) 99 | result_dic[frame_id] = temp_list 100 | result = [] 101 | start_idx = 1 102 | while start_idx <= num_frames: 103 | if start_idx in result_dic.keys(): 104 | result.append(result_dic[start_idx]) 105 | else: 106 | result.append([]) 107 | start_idx += 1 108 | return result 109 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | from PIL import Image 5 | 6 | import pdb 7 | 8 | def draw_box_label(img, detected_object, box_color=(0, 255, 255), thickness=4): 9 | ''' 10 | Helper funciton for drawing the bounding boxes and the labels 11 | bbox_cv2 = [left, top, right, bottom] 12 | ''' 13 | bbox_cv2 = detected_object['bbox'] 14 | bbox_cv2 = np.array(bbox_cv2).astype('int') 15 | #box_color= (0, 255, 255) 16 | font = cv2.FONT_HERSHEY_SIMPLEX 17 | font_size = 0.7 18 | font_color = (0, 0, 0) 19 | left, top, right, bottom = bbox_cv2[0], bbox_cv2[1], bbox_cv2[2], bbox_cv2[3] 20 | 21 | # Draw the bounding box 22 | cv2.rectangle(img, (left, top), (right, bottom), box_color, thickness) 23 | 24 | # Draw a filled box on top of the bounding box (as the background for the labels) 25 | cv2.rectangle(img, (left - 2, top - 45), (right + 2, top), box_color, -1, 1) 26 | 27 | # Output the labels that show the x and y coordinates of the bounding box center. 28 | text_score = str(detected_object['score']) 29 | cv2.putText(img, text_score, (left, top - 25), font, font_size, font_color, 1, cv2.LINE_AA) 30 | text_class_name = str(detected_object['class_idx']) 31 | cv2.putText(img, text_class_name, (left, top - 5), font, font_size, font_color, 1, cv2.LINE_AA) 32 | 33 | return img 34 | 35 | def box_iou(bb1, bb2): 36 | ''' 37 | Calculate IoU of two bounding boxes: bb=[left, up, right, down] 38 | input: 39 | bb1, bb2: 1*4 array or list 40 | output: 41 | scalar value 42 | ''' 43 | for idx in range(4): 44 | bb1[idx] = float(bb1[idx]) 45 | bb2[idx] = float(bb2[idx]) 46 | bi = [max(bb1[0], bb2[0]), max(bb1[1], bb2[1]), min(bb1[2], bb2[2]), min(bb1[3], bb2[3])] 47 | iw = bi[2] - bi[0] + 1 48 | ih = bi[3] - bi[1] + 1 49 | if iw > 0 and ih > 0: 50 | ua = (bb1[2] - bb1[0] + 1) * (bb1[3] - bb1[1] + 1) + (bb2[2] - bb2[0] + 1) * (bb2[3] - bb2[1] + 1) - iw * ih 51 | iou = iw * ih / ua 52 | else: 53 | iou = 0.0 54 | 55 | return iou 56 | 57 | def det4eval(det, file_id, dir_path='./det', tofile=False): 58 | result_list = [] 59 | file_path = os.path.join(dir_path, file_id + '.txt') 60 | if tofile: 61 | with open(file_path, 'w') as f: 62 | for temp_dic in det: 63 | left, top, right, bottom = temp_dic['bbox'].astype(int) 64 | line = temp_dic['class_name'] + ' ' + str(temp_dic['score']) + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom) + '\n' 65 | f.write(line) 66 | for temp_dic in det: 67 | left, top, right, bottom = temp_dic['bbox'].astype(int) 68 | line = temp_dic['class_name'] + ' ' + str(temp_dic['score']) + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom) 69 | result_list.append(line) 70 | return result_list 71 | 72 | 73 | def gt4eval(gt, file_id, dir_path='./gt', tofile=False): 74 | result_list = [] 75 | file_path = os.path.join(dir_path, file_id + '.txt') 76 | if tofile: 77 | with open(file_path, 'w') as f: 78 | for temp_gt in gt: 79 | left, top, right, bottom = temp_gt['bbox'].astype('int') 80 | line = temp_gt['class_name'] + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom) + '\n' 81 | f.write(line) 82 | for temp_gt in gt: 83 | left, top, right, bottom = temp_gt['bbox'].astype('int') 84 | line = temp_gt['class_name'] + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom) 85 | result_list.append(line) 86 | return result_list 87 | 88 | 89 | def trk4eval(trk, min_hits, file_id, dir_path='./trk', tofile=False): 90 | result_list = [] 91 | file_path = os.path.join(dir_path, file_id + '.txt') 92 | if tofile: 93 | with open(file_path, 'w') as f: 94 | for temp_trk in trk: 95 | if temp_trk.hits < min_hits: 96 | continue 97 | temp_obj = temp_trk.obj 98 | left, top, right, bottom = temp_obj['bbox'].astype(int) 99 | line = temp_obj['class_name'] + ' ' + str(temp_obj['score']) + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom) + '\n' 100 | f.write(line) 101 | for temp_trk in trk: 102 | if temp_trk.hits < min_hits: 103 | continue 104 | temp_obj = temp_trk.obj 105 | left, top, right, bottom = temp_obj['bbox'].astype(int) 106 | line = temp_obj['class_name'] + ' ' + str(temp_obj['score']) + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom) 107 | result_list.append(line) 108 | return result_list 109 | 110 | 111 | def letterbox_image( 112 | img_np, shape=(416, 416), data_format='channels_last'): 113 | """Returns a letterbox image of target fname. 114 | 115 | Parameters 116 | ---------- 117 | shape : list of integers 118 | The shape of the returned image (h, w). 119 | data_format : str 120 | "channels_first" or "channls_last". 121 | 122 | Returns 123 | ------- 124 | image : array_like 125 | The example image. 126 | 127 | """ 128 | assert len(shape) == 2 129 | assert data_format in ['channels_first', 'channels_last'] 130 | image = Image.fromarray(img_np) 131 | iw, ih = image.size 132 | h, w = shape 133 | scale = min(w / iw, h / ih) 134 | nw = int(iw * scale) 135 | nh = int(ih * scale) 136 | 137 | image = image.resize((nw, nh), Image.BICUBIC) 138 | new_image = Image.new('RGB', shape, (128, 128, 128)) 139 | new_image.paste(image, ((w - nw) // 2, (h - nh) // 2)) 140 | 141 | image = np.asarray(new_image, dtype=np.float32) 142 | image /= 255. 143 | image = image[:, :, :3] 144 | assert image.shape == shape + (3,) 145 | if data_format == 'channels_first': 146 | image = np.transpose(image, (2, 0, 1)) 147 | return image, (h, w) --------------------------------------------------------------------------------