├── .gitignore
├── README.md
├── assign_detection_to_trackers.py
├── attack
    ├── __init__.py
    └── attack_v2.py
├── data
    ├── move-in-zoomed
    │   ├── 01.mp4
    │   ├── 02.mp4
    │   ├── 03.mp4
    │   ├── 04.mp4
    │   ├── 05.mp4
    │   ├── 06.mp4
    │   ├── 07.mp4
    │   ├── 08.mp4
    │   ├── 09.mp4
    │   └── 10.mp4
    └── move-out-zoomed
    │   ├── 01.mp4
    │   ├── 02.mp4
    │   ├── 03.mp4
    │   ├── 04.mp4
    │   ├── 05.mp4
    │   ├── 06.mp4
    │   ├── 07.mp4
    │   ├── 08.mp4
    │   ├── 09.mp4
    │   └── 10.mp4
├── main.py
├── models
    ├── yolov3
    │   ├── image_utils.py
    │   ├── keras_utils.py
    │   ├── model_data
    │   │   ├── FiraMono-Medium.otf
    │   │   ├── coco_classes.txt
    │   │   └── yolov3_anchors.txt
    │   ├── yolov3_model.py
    │   └── yolov3_wrapper.py
    └── yolov3_wrapper.py
├── pipeline_center.py
├── tracker
    └── kalman_filter.py
└── utils
    ├── file_utils.py
    ├── image_utils.py
    ├── keras_utils.py
    ├── load_DETRAC.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | out/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # large model data
  8 | *.pny
  9 | *.h5
 10 | 
 11 | **/*.h5
 12 | # mac shit
 13 | .DS_Store 
 14 | 
 15 | # C extensions
 16 | *.so
 17 | 
 18 | # Distribution / packaging
 19 | .Python
 20 | output/*.png
 21 | build/
 22 | develop-eggs/
 23 | dist/
 24 | downloads/
 25 | eggs/
 26 | .eggs/
 27 | lib/
 28 | lib64/
 29 | parts/
 30 | sdist/
 31 | var/
 32 | wheels/
 33 | *.egg-info/
 34 | .installed.cfg
 35 | *.egg
 36 | MANIFEST
 37 | 
 38 | # PyInstaller
 39 | #  Usually these files are written by a python script from a template
 40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 41 | *.manifest
 42 | *.spec
 43 | 
 44 | # Installer logs
 45 | pip-log.txt
 46 | pip-delete-this-directory.txt
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # celery beat schedule file
 89 | celerybeat-schedule
 90 | 
 91 | # SageMath parsed files
 92 | *.sage.py
 93 | 
 94 | # Environments
 95 | .env
 96 | .venv
 97 | env/
 98 | venv/
 99 | ENV/
100 | env.bak/
101 | venv.bak/
102 | 
103 | # Spyder project settings
104 | .spyderproject
105 | .spyproject
106 | 
107 | # Rope project settings
108 | .ropeproject
109 | 
110 | # mkdocs documentation
111 | /site
112 | 
113 | # mypy
114 | .mypy_cache/
115 | 
116 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Tracker Hijacking Attack
 2 | First download the YOLOv3 weight file by:
 3 | ```
 4 | wget https://perceptron-benchmark.s3-us-west-1.amazonaws.com/models/coco/yolov3.h5 -P ./models/yolov3/model_data/
 5 | ```
 6 | Run the `main.py`
 7 | ```
 8 | python3 main.py
 9 | ```
10 | The output will be the number of frames required for launching a successful tracker hijacking attack, and the position for fabricate adversarial bounding box in each attack frame. 
11 | 


--------------------------------------------------------------------------------
/assign_detection_to_trackers.py:
--------------------------------------------------------------------------------
 1 | from utils import utils
 2 | 
 3 | from sklearn.utils.linear_assignment_ import linear_assignment
 4 | import cv2
 5 | import numpy as np
 6 | import math
 7 | 
 8 | import pdb
 9 | 
10 | weight_same_camera = {
11 |     'appearance' : 0.45,
12 |     'motion' : 0.4,
13 |     'shape' : 0.15,
14 |     'overlap' : 0.05,
15 | }
16 | 
17 | def assign_detections_to_trackers(trackers_obj, detections_obj, iou_thrd=0.3):
18 |     '''
19 |     From current list of trackers and new detections, output matched detections,
20 |     unmatchted trackers, unmatched detections.
21 |     '''
22 | 
23 |     trackers = [temp_obj['bbox'] for temp_obj in trackers_obj]
24 |     detections = [temp_obj['bbox'] for temp_obj in detections_obj]
25 | 
26 |     IOU_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32)
27 |     Motion_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32)
28 |     Shape_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32)
29 |     for t, trk in enumerate(trackers):
30 |         #trk = convert_to_cv2bbox(trk)
31 |         for d, det in enumerate(detections):
32 |          #   det = convert_to_cv2bbox(det)
33 |             IOU_mat[t, d] = utils.box_iou(trk, det)
34 |             Motion_mat[t, d] = get_motion_score(trk, det)
35 |             Shape_mat[t, d] = get_shape_score(trk, det)
36 | 
37 |     # Produces matches
38 |     # Solve the maximizing the sum of IOU assignment problem using the
39 |     # Hungarian algorithm (also known as Munkres algorithm)
40 | 
41 |     matched_idx = linear_assignment(-IOU_mat)
42 | 
43 |     unmatched_trackers, unmatched_detections = [], []
44 |     for t, trk in enumerate(trackers):
45 |         if(t not in matched_idx[:, 0]):
46 |             unmatched_trackers.append(t)
47 | 
48 |     for d, det in enumerate(detections):
49 |         if(d not in matched_idx[:, 1]):
50 |             unmatched_detections.append(d)
51 | 
52 |     matches = []
53 | 
54 |     # For creating trackers we consider any detection with an
55 |     # overlap less than iou_thrd to signifiy the existence of
56 |     # an untracked object
57 | 
58 |     for m in matched_idx:
59 |         if(IOU_mat[m[0], m[1]] < iou_thrd):
60 |             unmatched_trackers.append(m[0])
61 |             unmatched_detections.append(m[1])
62 |         else:
63 |             matches.append(m.reshape(1, 2))
64 | 
65 |     if(len(matches) == 0):
66 |         matches = np.empty((0, 2), dtype=int)
67 |     else:
68 |         matches = np.concatenate(matches, axis=0)
69 | 
70 |     return matches, np.array(
71 |         unmatched_detections), np.array(unmatched_trackers)
72 | 
73 | 
74 | def _gaussian(x, mu, sigma):
75 |     return math.exp(-(x - mu) * (x - mu) / (2 * sigma * sigma))
76 | 
77 | 
78 | def get_motion_score(trk, det):
79 |     center_det = [(det[2] + det[0]) / 2, (det[3] - det[1]) / 2]
80 |     center_trk = [(trk[2] + trk[0]) / 2, (trk[3] - trk[1]) / 2]
81 |     width_trk = trk[2] - trk[0] + 1
82 |     height_trk = trk[3] - trk[1] + 1
83 |     s = _gaussian(center_trk[0], center_det[0], width_trk) * \
84 |         _gaussian(center_trk[1], center_det[1], height_trk)
85 |     return s
86 | 
87 | def get_shape_score(trk, det):
88 |     width_trk = trk[2] - trk[0] + 1
89 |     height_trk = trk[3] - trk[1] + 1
90 |     width_det = det[2] - det[0] + 1
91 |     height_det = det[3] - det[1] + 1
92 | 
93 |     s = (height_det - height_trk) * (width_det - width_trk) / \
94 |         (width_det * height_det)
95 |     return -1 * abs(s)
96 | 


--------------------------------------------------------------------------------
/attack/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/attack/__init__.py


--------------------------------------------------------------------------------
/attack/attack_v2.py:
--------------------------------------------------------------------------------
  1 | '''apply only one fabrication attack'''
  2 | import sys
  3 | sys.path.append("../")
  4 | from models.yolov3_wrapper import YOLOv3
  5 | from pipeline_center import pipeline
  6 | from utils.utils import letterbox_image, box_iou
  7 | from PIL import Image
  8 | 
  9 | import cv2
 10 | import glob
 11 | import numpy as np
 12 | from keras import backend as K
 13 | import skvideo.io
 14 | import copy
 15 | import math
 16 | import os
 17 | from tqdm import tqdm
 18 | 
 19 | import pdb
 20 | 
 21 | class KerasYOLOv3Model_plus(YOLOv3):
 22 |     def detect_image(self, image):
 23 |         """Determines the locations of the cars in the image
 24 | 
 25 |         Args:
 26 |             image: numpy array
 27 | 
 28 |         Returns:
 29 |         detected objects with: bbox, confident score, class index
 30 |         [
 31 |             dictionary {
 32 |                 bbox: np.array([left, up, right, down])
 33 |                 score: confident_score
 34 |                 class_idx: class_idx
 35 |                 class_name: class name category
 36 |             }
 37 |         ]
 38 | 
 39 |         """
 40 |         pred_dic = self.predict(image)
 41 |         pred_list = self._dic2list(pred_dic)
 42 |         return pred_list
 43 | 
 44 |     def _dic2list(self, pred_dic):
 45 |         pred_list = []
 46 |         for temp_class, temp_score, temp_bbox in zip(pred_dic['classes'], pred_dic['scores'], pred_dic['boxes']):
 47 |             temp_dic = {}
 48 |             temp_dic['class_idx'] = temp_class
 49 |             temp_dic['score'] = temp_score
 50 |             temp_dic['bbox'] = [temp_bbox[1], temp_bbox[0], temp_bbox[3], temp_bbox[2]]
 51 |             try:
 52 |                 temp_dic['class_name'] = self._class_names[temp_class]
 53 |             except:
 54 |                 temp_dic['class_name'] = 'None'
 55 |             pred_list.append(temp_dic)
 56 |         return pred_list
 57 |             
 58 |         
 59 | 
 60 | def calculate_translation_center(bbox1, bbox2):
 61 |     '''
 62 |     calculate center translation vector of bbox1 to bbox2
 63 |     bbox : nparray ot list
 64 |         [left, top, right, bottom]
 65 |     ''' 
 66 |     bbox1 = np.array(bbox1).astype(float)
 67 |     bbox2 = np.array(bbox2).astype(float)
 68 |     center_1 = np.array([(bbox1[2] + bbox1[0]) / 2, (bbox1[3] + bbox1[1]) / 2])
 69 |     center_2 = np.array([(bbox2[2] + bbox2[0]) / 2, (bbox2[3] + bbox2[1]) / 2])
 70 |     return center_2 - center_1
 71 | 
 72 | def is_match(target_trk_id, target_det_id, match_info):
 73 |     match_list = match_info[0]
 74 |     for match_trk, match_det in match_list:
 75 |         if match_trk == target_trk_id and match_det == target_det_id:
 76 |             return True
 77 |     return False
 78 | 
 79 | def find_det_id_by_match_info(target_trk_id, match_info):
 80 |     match_list = match_info[0]
 81 |     unmatched_dets = match_info[1]
 82 |     unmatched_trks = match_info[2]
 83 |     if target_trk_id in unmatched_trks:
 84 |         raise ValueError('Target tracker is not matched to any detection.')
 85 |     for match_trk, match_det in match_list:
 86 |         if match_trk == target_trk_id:
 87 |             return match_det
 88 |     raise ValueError('Target tracker is not in tracker list.')
 89 | 
 90 | def bgr2rgb(bgr_array):
 91 |     temp = []
 92 |     temp.append(bgr_array[:,:,2])
 93 |     temp.append(bgr_array[:,:,1])
 94 |     temp.append(bgr_array[:,:,0])
 95 |     return np.transpose(np.array(temp),(1, 2, 0))
 96 | 
 97 | def find_match_trk(match_info, det_id):
 98 |     match_info_pair = match_info[0]
 99 |     for temp_pair in match_info_pair:
100 |         if temp_pair[0] == det_id:
101 |             return temp_pair[1]
102 |     return None
103 | 
104 | def sort_bbox_by_area(detected_objects_list):
105 |     if not detected_objects_list:
106 |         return detected_objects_list
107 |     area_list = []
108 |     for temp_det in detected_objects_list:
109 |         temp_bbox = temp_det['bbox']
110 |         temp_area = (temp_bbox[2] - temp_bbox[0]) * (temp_bbox[3] - temp_bbox[1])
111 |         area_list.append(temp_area)
112 |     sorted_idx = [i[0] for i in sorted(enumerate(area_list), key=lambda x:x[1], reverse = True)]
113 |     ret_list = []
114 |     for temp_idx in sorted_idx:
115 |         ret_list.append(detected_objects_list[temp_idx])
116 |     return ret_list
117 | 
118 | def _box_area(bbox):
119 |     return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
120 | 
121 | def nms_fine_tune(detected_objects_list, th=0.5):
122 |     ret_list = []
123 |     for i in range(len(detected_objects_list)):
124 |         is_append = True
125 |         for j in range(len(detected_objects_list)):
126 |             if i == j:
127 |                 continue
128 |             iou = box_iou(detected_objects_list[i]['bbox'], detected_objects_list[j]['bbox'])
129 |             if iou > th and _box_area(detected_objects_list[i]['bbox']) <= _box_area(detected_objects_list[j]['bbox']):
130 |                 is_append = False
131 |                 break
132 |         if is_append:
133 |             ret_list.append(detected_objects_list[i])
134 |     return ret_list
135 | 
136 | def is_missing_detection(detected_objects_list, target_bbox, target_id=0):
137 |     temp_bbox = detected_objects_list[target_id]['bbox']
138 |     temp_area = _box_area(temp_bbox)
139 |     target_area = _box_area(target_bbox)
140 |     if float(temp_area) / float(target_area) < 0.3:
141 |         return True
142 |     return False
143 | 
144 | def tracker_bbox_list(tracker_list):
145 |     ret = []
146 |     for tracker in tracker_list:
147 |         ret.append((tracker.obj))
148 |     return ret
149 | 
150 | def attack_video(params, video_path=None, attack_det_id_dict=None, patch_bbox=None, moving_direction=None, verbose=0, is_return=False):
151 | 
152 |     detector = KerasYOLOv3Model_plus(sess = K.get_session())
153 | 
154 |     n_attacks = None
155 | 
156 |     videogen = skvideo.io.FFmpegReader(video_path)
157 |     virtual_attack = False
158 |     detected_objects_list_prev = None
159 |     match_info_prev = None
160 | 
161 |     cal_dx_dy_flag = True
162 |     attack_frame_list = [*attack_det_id_dict]
163 |     attack_frame_list.sort()
164 | 
165 |     attacking_flag = False
166 |     attack_count_idx = 0
167 | 
168 |     is_init = True
169 |     params_min_hits = params['min_hits']
170 |     for frame_count, image in enumerate(videogen.nextFrame()): 
171 |         if frame_count > 1:
172 |             is_init = False
173 | 
174 |         image_yolo, _ = letterbox_image(image, shape=(416, 416), data_format='channels_last')
175 |         image = bgr2rgb((image_yolo * 255).astype(np.uint8))
176 |         image_yolo_pil = Image.fromarray((image_yolo * 255).astype(np.uint8))
177 |         detected_objects_list = detector.detect_image(image_yolo_pil)
178 |         detected_objects_list = nms_fine_tune(detected_objects_list)
179 | 
180 | 
181 |         detected_objects_list = sort_bbox_by_area(detected_objects_list)
182 |         if len(detected_objects_list) != 0:
183 |             nat_detected_objects_list = copy.deepcopy(detected_objects_list)
184 | 
185 |         if frame_count in attack_frame_list or attacking_flag == True:
186 |             target_det_id = attack_det_id_dict[frame_count - attack_count_idx][attack_count_idx]
187 | 
188 |             if attack_count_idx == 0:
189 |                 attacking_flag = True
190 |                 target_trk_id = find_match_trk(match_info_prev, target_det_id)
191 |                 target_init_bbox = detected_objects_list[target_det_id]['bbox']
192 |                 target_init_trk_bbox = (params_prev['tracker_list'][target_trk_id].obj)['bbox']
193 |                 print("Attack starts at frame {}".format(frame_count))
194 |                 print("Target bbox location in the original frame {}: {} ".format(frame_count, target_init_bbox))
195 |             if attack_count_idx != 0:
196 |                 _, _, match_info_nat = pipeline(image, nat_detected_objects_list, frame_count, params_prev, detect_output=True, verbose=0, virtual_attack=virtual_attack, return_match_info=True)
197 |                 attacking_flag = is_match(target_trk_id, target_det_id, match_info_nat)
198 |                 if not attacking_flag:
199 |                     detection_missing = is_missing_detection(nat_detected_objects_list, target_init_bbox, target_det_id)
200 |                     try:
201 |                         tracking_missing = is_missing_detection(tracker_bbox_list(params_prev['tracker_list']), target_init_trk_bbox, target_trk_id)
202 |                     except:
203 |                         pdb.set_trace()
204 |                     if detection_missing and not tracking_missing:
205 |                         attacking_flag = True
206 |                     else:
207 |                         print('Attack finished with {0} attacks.'.format(attack_count_idx))
208 |                         n_attacks =  attack_count_idx
209 |                         cal_dx_dy_flag = True
210 |                         attack_count_idx = 0
211 |                         return n_attacks
212 | 
213 |             if attacking_flag:
214 |                 temp_attack_obj = detected_objects_list_prev[target_det_id]
215 |                 target_det_prev = temp_attack_obj
216 |                 target_trk_prev = params_prev['tracker_list'][target_trk_id].obj
217 |                 translation_vecter_center = calculate_translation_center(target_trk_prev['bbox'], target_det_prev['bbox'])
218 | 
219 |                 attack_bbox = temp_attack_obj['bbox']
220 |                 attack_param = params_prev
221 |                 L = 5 #bbox moving pixel length
222 | 
223 |                 if cal_dx_dy_flag and moving_direction is None:
224 |                     if translation_vecter_center[0] == 0.:
225 |                         ratio = 1000.0
226 |                     else:
227 |                         ratio = abs(translation_vecter_center[1] / translation_vecter_center[0])
228 |                     dx = L * 1 / math.sqrt((1 + ratio * ratio))
229 |                     dy = dx * ratio
230 |                     if translation_vecter_center[0] > 0:
231 |                         dx *= -1
232 |                     if translation_vecter_center[1] > 0:
233 |                         dy *= -1
234 |                     cal_dx_dy_flag = False
235 |                 
236 |                 if attack_count_idx == 0:
237 |                     for sub_attack_count in range(100):
238 |                         if moving_direction is None:
239 |                             fake_det_bbox = (target_trk_prev['bbox'] +  np.array([dx, dy, dx, dy]) * (sub_attack_count + 1)).astype(int)
240 |                         else:
241 |                             fake_det_bbox = (target_trk_prev['bbox'] +  np.array(moving_direction) * (sub_attack_count + 1)).astype(int)
242 | 
243 |                         detected_objects_list[target_det_id]['bbox'] = fake_det_bbox
244 |                         _, param_attack, match_info = pipeline(image, detected_objects_list, frame_count, params, detect_output=True, verbose=0, virtual_attack=virtual_attack, return_match_info=True)
245 |                         if is_match(target_trk_id, target_det_id, match_info):
246 |                             attack_bbox = fake_det_bbox
247 |                             attack_param = param_attack
248 |                             if box_iou(patch_bbox, fake_det_bbox) <= 0.0:
249 |                                 break
250 |                         else:
251 |                             break
252 |                     detected_objects_list[target_det_id]['bbox'] = attack_bbox
253 |                 else:
254 |                     del detected_objects_list[target_det_id]
255 | 
256 |                 print("Fabricate bbox location {} at frame {}".format(attack_bbox, frame_count))
257 |                 image_yolo_pil.save('./output/' + 'ori_' + str(frame_count) + '.png')
258 |                 attack_count_idx += 1
259 | 
260 |         image_track, params, match_info = pipeline(image, detected_objects_list, frame_count, params, detect_output=True, verbose=verbose, virtual_attack=virtual_attack, return_match_info=True, is_init=is_init)
261 | 
262 |         cv2.imwrite('./output/track/' + str(frame_count) + '.png', image_track)
263 | 
264 |         match_info_prev = copy.deepcopy(match_info)
265 |         detected_objects_list_prev = copy.deepcopy(nat_detected_objects_list)
266 |         params_prev = copy.deepcopy(params)
267 | 
268 |     return n_attacks
269 | 
270 | def cal_success_rate(input_list):
271 |     results = []
272 |     total_num = len(input_list)
273 |     xs = [1, 2, 3, 4, 5, 6, 7, 8]
274 |     for x in xs:
275 |         count = 0
276 |         for ret in input_list:
277 |             if ret <= x:
278 |                 count += 1
279 |         results.append(float(count) / float(total_num))
280 |     return results
281 | 
282 | 
283 | 
284 | 


--------------------------------------------------------------------------------
/data/move-in-zoomed/01.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/01.mp4


--------------------------------------------------------------------------------
/data/move-in-zoomed/02.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/02.mp4


--------------------------------------------------------------------------------
/data/move-in-zoomed/03.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/03.mp4


--------------------------------------------------------------------------------
/data/move-in-zoomed/04.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/04.mp4


--------------------------------------------------------------------------------
/data/move-in-zoomed/05.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/05.mp4


--------------------------------------------------------------------------------
/data/move-in-zoomed/06.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/06.mp4


--------------------------------------------------------------------------------
/data/move-in-zoomed/07.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/07.mp4


--------------------------------------------------------------------------------
/data/move-in-zoomed/08.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/08.mp4


--------------------------------------------------------------------------------
/data/move-in-zoomed/09.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/09.mp4


--------------------------------------------------------------------------------
/data/move-in-zoomed/10.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-in-zoomed/10.mp4


--------------------------------------------------------------------------------
/data/move-out-zoomed/01.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/01.mp4


--------------------------------------------------------------------------------
/data/move-out-zoomed/02.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/02.mp4


--------------------------------------------------------------------------------
/data/move-out-zoomed/03.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/03.mp4


--------------------------------------------------------------------------------
/data/move-out-zoomed/04.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/04.mp4


--------------------------------------------------------------------------------
/data/move-out-zoomed/05.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/05.mp4


--------------------------------------------------------------------------------
/data/move-out-zoomed/06.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/06.mp4


--------------------------------------------------------------------------------
/data/move-out-zoomed/07.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/07.mp4


--------------------------------------------------------------------------------
/data/move-out-zoomed/08.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/08.mp4


--------------------------------------------------------------------------------
/data/move-out-zoomed/09.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/09.mp4


--------------------------------------------------------------------------------
/data/move-out-zoomed/10.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/data/move-out-zoomed/10.mp4


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | sys.path.append(os.path.abspath('./'))
 4 | from attack.attack_v2 import attack_video
 5 | from collections import deque
 6 | import numpy as np
 7 | from tqdm import tqdm
 8 | from attack.attack_v2 import cal_success_rate
 9 | 
10 | if __name__ == "__main__":
11 |     videos_info = [
12 |             #  video file name, attack start frame,  patch coords:[left, top, right, bottom]
13 |             ('move-in-zoomed/01.mp4', 3, [188, 284, 247, 309], None),
14 |             ('move-in-zoomed/02.mp4', 3, [195, 233, 289, 264], None),
15 |             ('move-in-zoomed/03.mp4', 3, [238, 194, 319, 256], None),
16 |             ('move-in-zoomed/04.mp4', 3, [229, 231, 305, 267], None),
17 |             ('move-in-zoomed/05.mp4', 3, [202, 172, 277, 222], None),
18 |             ('move-in-zoomed/06.mp4', 3, [236, 230, 334, 298], None),
19 |             ('move-in-zoomed/07.mp4', 3, [195, 203, 252, 260], None),
20 |             ('move-in-zoomed/08.mp4', 3, [136, 193, 247, 280], None),
21 |             ('move-in-zoomed/09.mp4', 3, [246, 210, 373, 340], None),
22 |             ('move-in-zoomed/10.mp4', 5, [196, 205, 300, 287], None),
23 |             ('move-out-zoomed/01.mp4', 3, [192, 213, 310, 273], [5, 0, 5, 0]),
24 |             ('move-out-zoomed/02.mp4', 3, [143, 222, 303, 302], [5, 0, 5, 0]),
25 |             ('move-out-zoomed/03.mp4', 3, [158, 192, 300, 283], [-5, 0, -5, 0]),
26 |             ('move-out-zoomed/04.mp4', 3, [154, 230, 281, 289], [-5, 0, -5, 0]),
27 |             ('move-out-zoomed/05.mp4', 3, [194, 167, 297, 249], [5, 0, 5, 0]),
28 |             ('move-out-zoomed/06.mp4', 3, [174, 166, 326, 280], [-5, 0, -5, 0]),
29 |             ('move-out-zoomed/07.mp4', 3, [182, 211, 304, 271], [5, 0, 5, 0]),
30 |             ('move-out-zoomed/08.mp4', 3, [100, 131, 304, 307], [-5, 0, -5, 0]),
31 |             ('move-out-zoomed/09.mp4', 3, [144, 188, 293, 310], [-5, 0, -5, 0]),
32 |             ('move-out-zoomed/10.mp4', 3, [171, 159, 264, 238], [5, 0, 5, 0]),
33 |     ]
34 | 
35 |     dir_path = './data/'
36 |     results = []
37 |     for idx, video_info in enumerate(tqdm(videos_info)):
38 |         print(video_info[0])
39 |         (video_path, temp_attack_frame, patch_bbox, moving_direction) = video_info
40 |         video_path = os.path.join(dir_path, video_path)
41 |         temp_attack_frame_id_list = []
42 |         for _ in range(100):
43 |             temp_attack_frame_id_list.append(0)
44 |         attack_det_id_dict = {temp_attack_frame : temp_attack_frame_id_list}
45 | 
46 |         params = {
47 |             'max_age' :  60,  #4
48 |             'min_hits' : 6,  #1
49 |             'tracker_list' : [],
50 |         }
51 |         id_list = []
52 |         for idx in range(100):
53 |             id_list.append(str(idx))
54 |         params['track_id_list'] = deque(id_list)
55 | 
56 |         ret = attack_video(params, video_path=video_path, attack_det_id_dict=attack_det_id_dict, patch_bbox=patch_bbox, moving_direction=moving_direction, is_return=True)
57 |         results.append(ret)
58 | 


--------------------------------------------------------------------------------
/models/yolov3/image_utils.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image
  2 | import numpy as np
  3 | 
  4 | #Debug
  5 | import tensorflow as tf
  6 | from tensorflow.image import ResizeMethod
  7 | 
  8 | def letterbox_image(image, size):
  9 |     """ Resize image with unchanged aspect ratio using padding.
 10 | 
 11 |     Args:
 12 |         image: PIL.Image.Image (Jpeg or PNG)
 13 |         size: Tuple (416, 416)
 14 |     
 15 |     Returns:
 16 |         new_image: PIL.Image.Image
 17 |     """
 18 |     iw, ih = image.size
 19 |     w, h = size
 20 |     scale = min(w / iw, h / ih)
 21 |     nw = int(iw * scale)
 22 |     nh = int(ih * scale)
 23 | 
 24 |     image = image.resize((nw, nh), Image.BICUBIC)
 25 |     new_image = Image.new('RGB', size, (128, 128, 128))
 26 |     # new_image = Image.new('RGB', size, (0, 0, 0))
 27 |     new_image.paste(image, ((w-nw)//2, (h-nh)//2))
 28 |     return new_image
 29 | 
 30 | def letterbox_image_tf_dynamic(image, size, resize_method=ResizeMethod.BILINEAR):
 31 |     """ Letterbox image that handles dynamic Tensor type """
 32 |     if len(image.get_shape()) == 4:
 33 |         ih, iw = tf.shape(image)[1], tf.shape(image)[2]
 34 |         images = image
 35 |     else:
 36 |         ih, iw = tf.shape(image)[0], tf.shape(image)[1]
 37 |         images = [image]
 38 |     w, h = tf.constant(size[0]), tf.constant(size[1])
 39 |     scale = tf.minimum(w / iw, h / ih)
 40 |     nw = tf.cast(tf.cast(iw, tf.float64) * scale, tf.int32)
 41 |     nh = tf.cast(tf.cast(ih, tf.float64) * scale, tf.int32)
 42 |     
 43 |     image_tensor = tf.image.resize_images(images, (nh, nw), method=resize_method, align_corners=True)
 44 |     
 45 |     h_pad = tf.cast((h-nh)//2, tf.int32)
 46 |     w_pad = tf.cast((w-nw)//2, tf.int32)
 47 |     c_pad = 0
 48 |     if len(image_tensor.shape) == 4:
 49 |         paddings = [[0,0], [h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]]
 50 |     else:
 51 |         paddings = [[h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]]
 52 |     
 53 |     image_tensor = tf.pad(image_tensor, paddings, constant_values=128. / 255.)
 54 |     return image_tensor
 55 | 
 56 |     
 57 | 
 58 | def letterbox_image_tf_static(image, raw_size, tgt_size, resize_method=ResizeMethod.BILINEAR):
 59 |     """ Letterbox image that only handles static shape, but more efficiently."""
 60 |     if len(image.shape) == 4:
 61 |         images = image
 62 |     else:
 63 |         images = [image]
 64 |     
 65 |     iw, ih = raw_size
 66 |     w, h = tgt_size
 67 |     scale = min(w / iw, h / ih)
 68 |     nw = int(iw * scale)
 69 |     nh = int(ih * scale)
 70 | 
 71 |     h_pad, w_pad, c_pad = (h - nh) // 2, (w - nw) // 2, 0
 72 | 
 73 |     image_tensor = tf.image.resize_images(images, (nh, nw), method=resize_method, align_corners=True)
 74 |     paddings = [[0,0], [h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]]
 75 | 
 76 |     image_tensor = tf.pad(image_tensor, paddings, constant_values=128. / 255.)
 77 |     return image_tensor
 78 | 
 79 | 
 80 | def image_to_ndarray(image, expand_dims=True):
 81 |     """ Convert PIL Image to numpy.ndarray and add batch dimension
 82 |     
 83 |         Args:
 84 |             image: PIL.Image.Image
 85 |         
 86 |         Returns:
 87 |             image_data: numpy.ndarray (1, 416, 416, 3) or (416, 416, 3)
 88 | 
 89 |     """
 90 |     image_data = np.array(image, dtype='float32')
 91 |     image_data /= 255.
 92 |     if expand_dims == True:
 93 |         image_data = np.expand_dims(image_data, 0)
 94 |     if image_data.shape[-1] == 4:
 95 |         image_data = image_data[...,0:-1]
 96 |     return image_data
 97 | 
 98 | def ndarray_to_image(image_data):
 99 |     if len(image_data.shape) == 4:
100 |         image_data = np.squeeze(image_data, axis=0)
101 |     image_data = (image_data * 255).astype("uint8")
102 |     return Image.fromarray(image_data)
103 | 
104 | def load_yolov3_image(img_fpath):
105 |     """ Load and resize an image for yolo3. """
106 |     model_image_size = (416, 416)
107 |     image = Image.open(img_fpath)
108 |     boxed_image = letterbox_image(image, tuple(reversed(model_image_size)))
109 |     image_data = np.array(boxed_image, dtype='float32')
110 |     image_data /= 255.
111 |     image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
112 |     return image_data
113 | 
114 | def l1_diff(image1, image2):
115 |     diff = np.abs(image1 - image2)
116 |     return np.sum(diff)
117 | 
118 | def l0_diff(image1, image2):
119 |     diff = np.abs(image1 - image2)
120 |     return np.count_nonzero(diff)
121 | 
122 | def l_inf_diff(image1, image2):
123 |     diff = np.abs(image1 - image2)
124 |     return np.max(diff)
125 | 
126 | def main():
127 |     image = Image.open('images/cat.jpg')
128 | 
129 |     boxed_image = letterbox_image(image, tuple(reversed((416,416))))
130 |     image_data_pil = image_to_ndarray(boxed_image, expand_dims=False)
131 |     x_img_pil = tf.placeholder(tf.float32, shape=(416, 416, 3))
132 | 
133 |     image_data_tf_dynamic = image_to_ndarray(image, expand_dims=False)
134 |     x_img_tf_large = tf.placeholder(tf.float32, shape=(None,None, 3))
135 |     x_img_tf = letterbox_image_tf_dynamic(x_img_tf_large, (416,416))
136 | 
137 |     image_data_tf_static = image_to_ndarray(image, expand_dims=False)
138 |     x_img_tf_large_static = tf.placeholder(tf.float32, shape=(1080,1920, 3))
139 |     x_img_tf_static = letterbox_image_tf_static(x_img_tf_large_static, (1920, 1080), (416, 416))
140 |     
141 |     with tf.Session() as sess:
142 |         image_resized_pil = sess.run(x_img_pil, feed_dict={x_img_pil: image_data_pil})
143 |         image_resized_tf = sess.run(x_img_tf, feed_dict={x_img_tf_large: image_data_tf_dynamic})
144 |         image_resized_tf = np.squeeze(image_resized_tf, axis=0)
145 |         image_resized_tf_static = sess.run(x_img_tf_static, feed_dict={x_img_tf_large_static: image_data_tf_static})
146 | 
147 | 
148 |         l1 = l1_diff(image_resized_tf, image_resized_tf_static)
149 |         l0 = l0_diff(image_resized_tf, image_resized_tf_static)
150 |         l_inf = l_inf_diff(image_resized_tf, image_resized_tf_static)
151 | 
152 |         print("l1 %f, l0 %d, l_inf %f" % (l1, l0, l_inf))
153 |         image_pil = ndarray_to_image(image_resized_pil)
154 |         image_tf = ndarray_to_image(image_resized_tf)
155 |         image_tf_static = ndarray_to_image(image_resized_tf_static)
156 | 
157 | 
158 |         image_tf.save('tf.png')
159 |         image_pil.save('pil.png')
160 |         image_tf_static.save('tf_static.png')
161 | 
162 | if __name__ == "__main__":
163 |     main()


--------------------------------------------------------------------------------
/models/yolov3/keras_utils.py:
--------------------------------------------------------------------------------
 1 | from functools import reduce
 2 | 
 3 | 
 4 | def compose(*funcs):
 5 |     """Compose arbitrarily many functions, evaluated left to right.
 6 | 
 7 |     Reference: https://mathieularose.com/function-composition-in-python/
 8 |     """
 9 |     # return lambda x: reduce(lambda v, f: f(v), funcs, x)
10 |     if funcs:
11 |         return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
12 |     else:
13 |         raise ValueError('Composition of empty sequence not supported.')
14 | 


--------------------------------------------------------------------------------
/models/yolov3/model_data/FiraMono-Medium.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anonymousjack/hijacking/1ba81d151c59569d20e2ec590821dd39b3370ffc/models/yolov3/model_data/FiraMono-Medium.otf


--------------------------------------------------------------------------------
/models/yolov3/model_data/coco_classes.txt:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/models/yolov3/model_data/yolov3_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
2 | 


--------------------------------------------------------------------------------
/models/yolov3/yolov3_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Baidu Inc. All Rights Reserved
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ========================================================================
 15 | 
 16 | """YOLO_v3 Model Defined in Keras."""
 17 | 
 18 | from functools import wraps
 19 | 
 20 | import pdb
 21 | import numpy as np
 22 | import tensorflow as tf
 23 | from keras import backend as K
 24 | from keras.layers import Conv2D, Add,\
 25 |     ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D
 26 | from keras.layers.advanced_activations import LeakyReLU
 27 | from keras.layers.normalization import BatchNormalization
 28 | from keras.models import Model
 29 | from keras.regularizers import l2
 30 | 
 31 | from models.yolov3.keras_utils import compose
 32 | 
 33 | 
 34 | @wraps(Conv2D)
 35 | def DarknetConv2D(*args, **kwargs):
 36 |     """ Wrapper to set Darknet parameters for Convolution2D. 
 37 |     
 38 |     Args:
 39 |         args: Non-keyword variable length argument list from upper layer function.
 40 |         kwargs: Keyworded variable length of arguments from upper layer function.
 41 |     
 42 |     Returns:
 43 |         4D tensor with shape: (batch, channels, rows, cols) if 
 44 |         data_format is "channels_first" or 4D tensor with shape:
 45 |         (batch, rows, cols, channels) if data_format is "channels_last".
 46 |     """
 47 |     darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
 48 |     darknet_conv_kwargs['padding'] =\
 49 |         'valid' if kwargs.get('strides') == (2, 2) else 'same'
 50 |     darknet_conv_kwargs.update(kwargs)
 51 |     return Conv2D(*args, **darknet_conv_kwargs)
 52 | 
 53 | 
 54 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
 55 |     """Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
 56 |     no_bias_kwargs = {'use_bias': False}
 57 |     no_bias_kwargs.update(kwargs)
 58 |     return compose(
 59 |         DarknetConv2D(*args, **no_bias_kwargs),
 60 |         BatchNormalization(),
 61 |         LeakyReLU(alpha=0.1))
 62 | 
 63 | 
 64 | def resblock_body(x, num_filters, num_blocks):
 65 |     '''A series of resblocks starting with a downsampling Convolution2D'''
 66 |     # Darknet uses left and top padding instead of 'same' mode
 67 |     x = ZeroPadding2D(((1, 0), (1, 0)))(x)
 68 |     x = DarknetConv2D_BN_Leaky(num_filters, (3, 3), strides=(2, 2))(x)
 69 |     for i in range(num_blocks):
 70 |         y = compose(
 71 |                 DarknetConv2D_BN_Leaky(num_filters//2, (1, 1)),
 72 |                 DarknetConv2D_BN_Leaky(num_filters, (3, 3)))(x)
 73 |         x = Add()([x, y])
 74 |     return x
 75 | 
 76 | 
 77 | def darknet_body(x):
 78 |     """ Darknet layers.
 79 |     
 80 |     Darknet have 52 Convolution2D layers
 81 | 
 82 |     Args:
 83 |         Tensor object passing through each layer
 84 |     """
 85 |     x = DarknetConv2D_BN_Leaky(32, (3, 3))(x)
 86 |     x = resblock_body(x, 64, 1)
 87 |     x = resblock_body(x, 128, 2)
 88 |     x = resblock_body(x, 256, 8)
 89 |     x = resblock_body(x, 512, 8)
 90 |     x = resblock_body(x, 1024, 4)
 91 |     return x
 92 | 
 93 | 
 94 | def make_last_layers(x, num_filters, out_filters):
 95 |     """ Last few layers for detecting objects with different sizes.
 96 |         6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer.
 97 |         www.cyberailab.com/home/a-closer-look-at-yolov3
 98 |     """
 99 |     x = compose(
100 |             DarknetConv2D_BN_Leaky(num_filters, (1, 1)),
101 |             DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
102 |             DarknetConv2D_BN_Leaky(num_filters, (1, 1)),
103 |             DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
104 |             DarknetConv2D_BN_Leaky(num_filters, (1, 1)))(x)
105 |     y = compose(
106 |             DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
107 |             DarknetConv2D(out_filters, (1, 1)))(x)
108 |     return x, y
109 | 
110 | 
111 | def yolo_body(inputs, num_anchors, num_classes):
112 |     """ Create YOLOv3 model CNN body in Keras. 
113 |         y1, y2, y3 for detecting small, medium, and large objects.
114 |     
115 |     Args:
116 |         inputs: Tensor model.inputs [1, 416, 416, 3].
117 |         num_anchors: anchors.
118 |         num_classes: number of classes.
119 |     
120 |     Returns:
121 |         model: Keras model, output shape is:
122 |                [(1, 13, 13, 255), (1, 13, 13, 255), (1, 13, 13, 255)].
123 |                255 = 85 (80 classes, 1 logits, 4 box parameters) * 3 (anchor boxes).
124 |                3 elements corresponding to 3 object size (small, medium, large).
125 |     """
126 |     darknet = Model(inputs, darknet_body(inputs))
127 |     x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5))
128 | 
129 |     x = compose(
130 |             DarknetConv2D_BN_Leaky(256, (1, 1)),
131 |             UpSampling2D(2))(x)
132 |     x = Concatenate()([x, darknet.layers[152].output])
133 |     x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5))
134 | 
135 |     x = compose(
136 |             DarknetConv2D_BN_Leaky(128, (1, 1)),
137 |             UpSampling2D(2))(x)
138 |     x = Concatenate()([x, darknet.layers[92].output])
139 |     x, y3 = make_last_layers(x, 128, num_anchors*(num_classes + 5))
140 | 
141 |     return Model(inputs, [y1, y2, y3])
142 | 
143 | 
144 | def tiny_yolo_body(inputs, num_anchors, num_classes):
145 |     '''Create Tiny YOLO_v3 model CNN body in keras.'''
146 |     x1 = compose(
147 |             DarknetConv2D_BN_Leaky(16, (3, 3)),
148 |             MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
149 |             DarknetConv2D_BN_Leaky(32, (3, 3)),
150 |             MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
151 |             DarknetConv2D_BN_Leaky(64, (3, 3)),
152 |             MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
153 |             DarknetConv2D_BN_Leaky(128, (3, 3)),
154 |             MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
155 |             DarknetConv2D_BN_Leaky(256, (3, 3)))(inputs)
156 |     x2 = compose(
157 |             MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
158 |             DarknetConv2D_BN_Leaky(512, (3, 3)),
159 |             MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'),
160 |             DarknetConv2D_BN_Leaky(1024, (3, 3)),
161 |             DarknetConv2D_BN_Leaky(256, (1, 1)))(x1)
162 |     y1 = compose(
163 |             DarknetConv2D_BN_Leaky(512, (3, 3)),
164 |             DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)
165 | 
166 |     x2 = compose(
167 |             DarknetConv2D_BN_Leaky(128, (1, 1)),
168 |             UpSampling2D(2))(x2)
169 |     y2 = compose(
170 |             Concatenate(),
171 |             DarknetConv2D_BN_Leaky(256, (3, 3)),
172 |             DarknetConv2D(num_anchors * (num_classes+5), (1, 1)))([x2, x1])
173 | 
174 |     return Model(inputs, [y1, y2])
175 | 
176 | 
177 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
178 |     """ Convert final layer features to bounding box parameters. No threshold
179 |         or nms applied yet. 
180 |     
181 |     Args:
182 |         feats: Elements in the output list from K.model.output:
183 |                shape = (N, 13, 13, 255)
184 |         anchors: anchors.
185 |         num_classes: num of classes.
186 |         input_shape: input shape obtained from model output grid information.
187 |     
188 |     Returns:
189 |         Breaking the 85 output logits into box_xy, box_wh, box_confidence, and
190 |         box_class_probs.
191 |     """
192 | 
193 |     num_anchors = len(anchors)
194 |     # Reshape to batch, height, width, num_anchors, box_params.
195 |     anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
196 | 
197 |     grid_shape = K.shape(feats)[1:3]  # height, width
198 |     grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
199 |                     [1, grid_shape[1], 1, 1])
200 |     grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
201 |                     [grid_shape[0], 1, 1, 1])
202 |     grid = K.concatenate([grid_x, grid_y])
203 |     grid = K.cast(grid, K.dtype(feats))
204 | 
205 |     feats = K.reshape(
206 |         feats,
207 |         [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
208 | 
209 |     # The last dimension: x,y,w,h,objectness,80_class_conf
210 |     # Adjust preditions to each spatial grid point and anchor size.
211 |     box_xy = (K.sigmoid(feats[..., :2]) + grid) /\
212 |         K.cast(grid_shape[::-1], K.dtype(feats))
213 | 
214 |     box_wh = K.exp(feats[..., 2:4]) *\
215 |         anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
216 |     box_confidence = K.sigmoid(feats[..., 4:5])
217 |     box_class_probs = K.sigmoid(feats[..., 5:])
218 | 
219 |     box_coord_logits = feats[..., :4]
220 |     box_confidence_logits = feats[..., 4:5]
221 |     box_class_probs_logits = feats[..., 5:]
222 | 
223 |     if calc_loss is True:
224 |         return grid, feats, box_xy, box_wh
225 |     return box_xy, box_wh, box_confidence,\
226 |         box_class_probs, box_coord_logits,\
227 |         box_confidence_logits, box_class_probs_logits
228 | 
229 | 
230 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
231 |     """ Scale boxes to original image shape.
232 | 
233 |     Args:
234 |         input_shape: shape of model input (416, 416)
235 |     
236 |     Taking (1920, 1080) pic as an example
237 | 
238 |     """
239 |     box_yx = box_xy[..., ::-1] # reverse last dimension list order
240 |     box_hw = box_wh[..., ::-1]
241 |     input_shape = K.cast(input_shape, K.dtype(box_yx)) # (416, 416)
242 |     image_shape = K.cast(image_shape, K.dtype(box_yx)) # (416, 416)
243 |     new_shape = K.round(image_shape * K.min(input_shape/image_shape))
244 |     offset = (input_shape-new_shape)/2./input_shape
245 |     scale = input_shape/new_shape
246 |     box_yx = (box_yx - offset) * scale # rescale to [1080, 1920]
247 |     box_hw *= scale
248 | 
249 |     box_mins = box_yx - (box_hw / 2.)
250 |     box_maxes = box_yx + (box_hw / 2.)
251 |     boxes = K.concatenate([
252 |         box_mins[..., 0:1],  # y_min
253 |         box_mins[..., 1:2],  # x_min
254 |         box_maxes[..., 0:1],  # y_max
255 |         box_maxes[..., 1:2]  # x_max
256 |     ])
257 | 
258 |     # Scale boxes back to original image shape.
259 |     boxes *= K.concatenate([image_shape, image_shape])
260 |     return boxes
261 | 
262 | 
263 | def yolo_boxes_and_scores(feats, anchors,
264 |                           num_classes, input_shape, image_shape):
265 |     """ Convert Conv layer output to boxes 
266 |     
267 |     Multiply box_confidence with class_confidence to get real box_scores for each class
268 | 
269 |     Args:
270 |         feats: Elements in the output list from K.model.output:
271 |                shape = (N, 13, 13, 255)
272 |         anchors: anchors.
273 |         num_classes: num of classes.
274 |         input_shape: input shape obtained from model output grid information.
275 |         image_shape: placeholder for ORIGINAL image data shape.
276 | 
277 |     """
278 |     box_xy, box_wh, box_confidence, box_class_probs,\
279 |         box_coord_logits, box_confidence_logits,\
280 |         box_class_probs_logits = yolo_head(
281 |             feats, anchors, num_classes, input_shape)
282 | 
283 |     boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
284 | 
285 |     boxes = K.reshape(boxes, [-1, 4])
286 | 
287 |     box_scores = box_confidence * box_class_probs
288 |     box_scores = K.reshape(box_scores, [-1, num_classes])
289 | 
290 |     box_coord_logits = K.reshape(box_coord_logits, [-1, 4])
291 |     box_confidence_logits = K.reshape(box_confidence_logits, [-1])
292 |     box_class_probs_logits =\
293 |         K.reshape(box_class_probs_logits, [-1, num_classes])
294 |     return boxes, box_scores, box_coord_logits,\
295 |         box_confidence_logits, box_class_probs_logits
296 | 
297 | 
298 | def yolo_eval(yolo_outputs,
299 |               anchors,
300 |               num_classes,
301 |               image_shape,
302 |               max_boxes=20,
303 |               score_threshold=.6,
304 |               iou_threshold=.5):
305 |     """Evaluate YOLO model on given input and return filtered boxes."""
306 |     num_layers = len(yolo_outputs)
307 |     # import pdb
308 |     # pdb.set_trace()
309 |     anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]\
310 |         if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
311 |     input_shape = K.shape(yolo_outputs[0])[1:3] * 32
312 |     boxes = []
313 |     box_scores = []
314 | 
315 |     box_coord_logits = []
316 |     box_confidence_logits = []
317 |     box_class_probs_logits = []
318 | 
319 |     for l in range(num_layers):
320 |         _boxes, _box_scores, _box_coord_logits,\
321 |             _box_confidence_logits, _box_class_probs_logits =\
322 |             yolo_boxes_and_scores(
323 |                 yolo_outputs[l],
324 |                 anchors[anchor_mask[l]],
325 |                 num_classes, input_shape, image_shape)
326 |         boxes.append(_boxes)
327 |         box_scores.append(_box_scores)
328 | 
329 |         box_coord_logits.append(_box_coord_logits)
330 |         box_confidence_logits.append(_box_confidence_logits)
331 |         box_class_probs_logits.append(_box_class_probs_logits)
332 | 
333 |     boxes = K.concatenate(boxes, axis=0)
334 |     box_scores = K.concatenate(box_scores, axis=0)
335 | 
336 |     box_coord_logits = K.concatenate(box_coord_logits, axis=0)
337 |     box_confidence_logits = K.concatenate(box_confidence_logits, axis=0)
338 |     box_class_probs_logits = K.concatenate(box_class_probs_logits, axis=0)
339 | 
340 |     mask = box_scores >= score_threshold
341 |     max_boxes_tensor = K.constant(max_boxes, dtype='int32')
342 |     boxes_ = []
343 |     scores_ = []
344 |     classes_ = []
345 |     for c in range(num_classes):
346 |         # TODO: use keras backend instead of tf.
347 |         class_boxes = tf.boolean_mask(boxes, mask[:, c])
348 |         class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
349 |         nms_index = tf.image.non_max_suppression(
350 |             class_boxes,
351 |             class_box_scores,
352 |             max_boxes_tensor,
353 |             iou_threshold=iou_threshold)
354 |         class_boxes = K.gather(class_boxes, nms_index)
355 |         class_box_scores = K.gather(class_box_scores, nms_index)
356 |         classes = K.ones_like(class_box_scores, 'int32') * c
357 |         boxes_.append(class_boxes)
358 |         scores_.append(class_box_scores)
359 |         classes_.append(classes)
360 |     boxes_ = K.concatenate(boxes_, axis=0)
361 |     scores_ = K.concatenate(scores_, axis=0)
362 |     classes_ = K.concatenate(classes_, axis=0)
363 | 
364 |     return boxes_, scores_, classes_
365 | 
366 | 
367 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
368 |     '''Preprocess true boxes to training input format
369 | 
370 |     Parameters
371 |     ----------
372 |     true_boxes: array, shape=(m, T, 5)
373 |         Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
374 |     input_shape: array-like, hw, multiples of 32
375 |     anchors: array, shape=(N, 2), wh
376 |     num_classes: integer
377 | 
378 |     Returns
379 |     -------
380 |     y_true: list of array, shape like yolo_outputs, xywh are reletive value
381 | 
382 |     '''
383 |     assert (true_boxes[..., 4] < num_classes).all(),\
384 |         'class id must be less than num_classes'
385 |     num_layers = len(anchors)//3  # default setting
386 |     anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]\
387 |         if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
388 | 
389 |     true_boxes = np.array(true_boxes, dtype='float32')
390 |     input_shape = np.array(input_shape, dtype='int32')
391 |     boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
392 |     boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
393 |     true_boxes[..., 0:2] = boxes_xy / input_shape[::-1]
394 |     true_boxes[..., 2:4] = boxes_wh / input_shape[::-1]
395 | 
396 |     m = true_boxes.shape[0]
397 |     grid_shapes = [input_shape//{0: 32, 1: 16, 2: 8}[l]
398 |                    for l in range(num_layers)]
399 |     y_true = [np.zeros((m, grid_shapes[l][0],
400 |                         grid_shapes[l][1],
401 |                         len(anchor_mask[l]),
402 |                         5 + num_classes),
403 |                        dtype='float32')
404 |               for l in range(num_layers)]
405 | 
406 |     # Expand dim to apply broadcasting.
407 |     anchors = np.expand_dims(anchors, 0)
408 |     anchor_maxes = anchors / 2.
409 |     anchor_mins = -anchor_maxes
410 |     valid_mask = boxes_wh[..., 0] > 0
411 | 
412 |     for b in range(m):
413 |         # Discard zero rows.
414 |         wh = boxes_wh[b, valid_mask[b]]
415 |         if len(wh) == 0:
416 |             continue
417 |         # Expand dim to apply broadcasting.
418 |         wh = np.expand_dims(wh, -2)
419 |         box_maxes = wh / 2.
420 |         box_mins = -box_maxes
421 | 
422 |         intersect_mins = np.maximum(box_mins, anchor_mins)
423 |         intersect_maxes = np.minimum(box_maxes, anchor_maxes)
424 |         intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
425 |         intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
426 |         box_area = wh[..., 0] * wh[..., 1]
427 |         anchor_area = anchors[..., 0] * anchors[..., 1]
428 |         iou = intersect_area / (box_area + anchor_area - intersect_area)
429 | 
430 |         # Find best anchor for each true box
431 |         best_anchor = np.argmax(iou, axis=-1)
432 | 
433 |         for t, n in enumerate(best_anchor):
434 |             for l in range(num_layers):
435 |                 if n in anchor_mask[l]:
436 |                     i = np.floor(true_boxes[b, t, 0] *
437 |                                  grid_shapes[l][1]).astype('int32')
438 |                     j = np.floor(true_boxes[b, t, 1] *
439 |                                  grid_shapes[l][0]).astype('int32')
440 |                     k = anchor_mask[l].index(n)
441 |                     c = true_boxes[b, t, 4].astype('int32')
442 |                     y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]
443 |                     y_true[l][b, j, i, k, 4] = 1
444 |                     y_true[l][b, j, i, k, 5 + c] = 1
445 | 
446 |     return y_true
447 | 
448 | 
449 | def box_iou(b1, b2):
450 |     '''Return iou tensor
451 | 
452 |     Parameters
453 |     ----------
454 |     b1: tensor, shape=(i1,...,iN, 4), xywh
455 |     b2: tensor, shape=(j, 4), xywh
456 | 
457 |     Returns
458 |     -------
459 |     iou: tensor, shape=(i1,...,iN, j)
460 | 
461 |     '''
462 | 
463 |     # Expand dim to apply broadcasting.
464 |     b1 = K.expand_dims(b1, -2)
465 |     b1_xy = b1[..., :2]
466 |     b1_wh = b1[..., 2:4]
467 |     b1_wh_half = b1_wh/2.
468 |     b1_mins = b1_xy - b1_wh_half
469 |     b1_maxes = b1_xy + b1_wh_half
470 | 
471 |     # Expand dim to apply broadcasting.
472 |     b2 = K.expand_dims(b2, 0)
473 |     b2_xy = b2[..., :2]
474 |     b2_wh = b2[..., 2:4]
475 |     b2_wh_half = b2_wh/2.
476 |     b2_mins = b2_xy - b2_wh_half
477 |     b2_maxes = b2_xy + b2_wh_half
478 | 
479 |     intersect_mins = K.maximum(b1_mins, b2_mins)
480 |     intersect_maxes = K.minimum(b1_maxes, b2_maxes)
481 |     intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
482 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
483 |     b1_area = b1_wh[..., 0] * b1_wh[..., 1]
484 |     b2_area = b2_wh[..., 0] * b2_wh[..., 1]
485 |     iou = intersect_area / (b1_area + b2_area - intersect_area)
486 | 
487 |     return iou
488 | 
489 | 
490 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
491 |     '''Return yolo_loss tensor
492 | 
493 |     Parameters
494 |     ----------
495 |     yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
496 |     y_true: list of array, the output of preprocess_true_boxes
497 |     anchors: array, shape=(N, 2), wh
498 |     num_classes: integer
499 |     ignore_thresh: float, the iou threshold whether
500 |         to ignore object confidence loss
501 | 
502 |     Returns
503 |     -------
504 |     loss: tensor, shape=(1,)
505 | 
506 |     '''
507 |     num_layers = len(anchors)//3  # default setting
508 |     yolo_outputs = args[:num_layers]
509 |     y_true = args[num_layers:]
510 |     anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]\
511 |         if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
512 |     input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] *
513 |                          32, K.dtype(y_true[0]))
514 |     grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3],
515 |                           K.dtype(y_true[0])) for l in range(num_layers)]
516 |     loss = 0
517 |     m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
518 |     mf = K.cast(m, K.dtype(yolo_outputs[0]))
519 | 
520 |     for l in range(num_layers):
521 |         object_mask = y_true[l][..., 4:5]
522 |         true_class_probs = y_true[l][..., 5:]
523 | 
524 |         grid, raw_pred, pred_xy, pred_wh = yolo_head(
525 |             yolo_outputs[l],
526 |             anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
527 |         pred_box = K.concatenate([pred_xy, pred_wh])
528 | 
529 |         # Darknet raw box to calculate loss.
530 |         raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
531 |         raw_true_wh = K.log(y_true[l][..., 2:4] /
532 |                             anchors[anchor_mask[l]] * input_shape[::-1])
533 |         raw_true_wh = K.switch(object_mask,
534 |                                raw_true_wh, K.zeros_like(raw_true_wh))
535 |         box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]
536 | 
537 |         # Find ignore mask, iterate over each of batch.
538 |         ignore_mask = tf.TensorArray(
539 |             K.dtype(y_true[0]),
540 |             size=1,
541 |             dynamic_size=True)
542 |         object_mask_bool = K.cast(object_mask, 'bool')
543 | 
544 |         def loop_body(b, ignore_mask):
545 |             true_box = tf.boolean_mask(
546 |                 y_true[l][b, ..., 0:4],
547 |                 object_mask_bool[b, ..., 0])
548 |             iou = box_iou(pred_box[b], true_box)
549 |             best_iou = K.max(iou, axis=-1)
550 |             ignore_mask = ignore_mask.write(
551 |                 b,
552 |                 K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
553 |             return b+1, ignore_mask
554 |         _, ignore_mask = K.control_flow_ops.while_loop(
555 |             lambda b,
556 |             *args: b < m,
557 |             loop_body,
558 |             [0, ignore_mask])
559 |         ignore_mask = ignore_mask.stack()
560 |         ignore_mask = K.expand_dims(ignore_mask, -1)
561 | 
562 |         # K.binary_crossentropy is helpful to avoid exp overflow.
563 |         xy_loss = object_mask * box_loss_scale *\
564 |             K.binary_crossentropy(
565 |                 raw_true_xy,
566 |                 raw_pred[..., 0:2],
567 |                 from_logits=True)
568 |         wh_loss = object_mask * box_loss_scale * 0.5\
569 |             * K.square(raw_true_wh-raw_pred[..., 2:4])
570 |         confidence_loss = object_mask *\
571 |             K.binary_crossentropy(
572 |                 object_mask,
573 |                 raw_pred[..., 4:5],
574 |                 from_logits=True) +\
575 |             (1-object_mask) *\
576 |             K.binary_crossentropy(
577 |                 object_mask,
578 |                 raw_pred[..., 4:5],
579 |                 from_logits=True) *\
580 |             ignore_mask
581 |         class_loss = object_mask * K.binary_crossentropy(
582 |             true_class_probs,
583 |             raw_pred[..., 5:],
584 |             from_logits=True)
585 | 
586 |         xy_loss = K.sum(xy_loss) / mf
587 |         wh_loss = K.sum(wh_loss) / mf
588 |         confidence_loss = K.sum(confidence_loss) / mf
589 |         class_loss = K.sum(class_loss) / mf
590 |         loss += xy_loss + wh_loss + confidence_loss + class_loss
591 |         if print_loss:
592 |             loss = tf.Print(
593 |                 loss,
594 |                 [loss,
595 |                  xy_loss,
596 |                  wh_loss,
597 |                  confidence_loss,
598 |                  class_loss,
599 |                  K.sum(ignore_mask)],
600 |                 message='loss: ')
601 |     return loss
602 | 


--------------------------------------------------------------------------------
/models/yolov3/yolov3_wrapper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append("/home/yantao/workspace/projects/baidu/bbox_std")
  3 | 
  4 | import tensorflow as tf
  5 | import os
  6 | import numpy as np
  7 | import colorsys
  8 | import logging
  9 | import json
 10 | import pickle
 11 | 
 12 | from PIL import Image, ImageFont, ImageDraw
 13 | from collections import defaultdict
 14 | from keras import backend as K
 15 | from keras.models import Model
 16 | from keras.layers import Input, Lambda
 17 | from models.yolov3.yolov3_model import yolo_body, yolo_eval
 18 | from models.yolov3.image_utils import letterbox_image, image_to_ndarray, letterbox_image_tf_dynamic
 19 | 
 20 | import pdb
 21 | 
 22 | class YOLOv3(object):
 23 |     _defaults = {
 24 |         "model_path": 'models/yolov3/model_data/yolov3.h5',
 25 |         "anchors_path": 'models/yolov3/model_data/yolov3_anchors.txt',
 26 |         "classes_path": 'models/yolov3/model_data/coco_classes.txt',
 27 |         "box_score_threshold": 0.3,
 28 |         "nms_iou_threshold": 0.45,
 29 |         "mAP_iou_threshold": 0.5,
 30 |         "model_image_size": (416, 416),
 31 |         "gpu_num": 1,
 32 |     }
 33 |     @classmethod
 34 |     def get_defaults(cls, n):
 35 |         if n in cls._defaults:
 36 |             return cls._defaults[n]
 37 |         else:
 38 |             return "Unrecognized attribute name'" + n + "'"
 39 |         
 40 |     def _get_class(self):
 41 |         classes_path = os.path.expanduser(self.classes_path)
 42 |         with open(classes_path) as f:
 43 |             class_names = f.readlines()
 44 |         class_names = [c.strip() for c in class_names]
 45 |         return class_names
 46 |     
 47 |     def _get_anchors(self):
 48 |         anchors_path = os.path.expanduser(self.anchors_path)
 49 |         with open(anchors_path) as f:
 50 |             anchors = f.readline()
 51 |         anchors = [float(x) for x in anchors.split(',')]
 52 |         return np.array(anchors).reshape(-1, 2)
 53 | 
 54 |     def __init__(self, **kwargs):
 55 |         self.__dict__.update(self._defaults)
 56 |         self.__dict__.update(kwargs)
 57 |         K.set_session(self.sess)
 58 |         self.logger = logging.getLogger(self.__class__.__name__)
 59 |         self.class_names = self._get_class()
 60 |         self.num_classes = len(self.class_names)
 61 |         self.anchors = self._get_anchors()
 62 |         self.logger.info("Loading %s model ...", self.__class__.__name__)
 63 |         self.model = self.create_model()
 64 |         self.logger.info("Model loaded.")
 65 |         self.input_image_shape = K.placeholder(shape=(2,))
 66 |         self.boxes, self.scores, self.classes = yolo_eval(self.model.output, 
 67 |                                 self.anchors, self.num_classes, 
 68 |                                 self.input_image_shape,
 69 |                                 score_threshold=self.box_score_threshold,
 70 |                                 iou_threshold=self.nms_iou_threshold)
 71 | 
 72 |     def create_model(self):
 73 | 
 74 |         self.input_image = tf.placeholder(tf.float32, (None, None, None, 3))
 75 |         boxed_image = letterbox_image_tf_dynamic(self.input_image, (416, 416))
 76 |         input = Input(tensor=boxed_image)
 77 |         model = yolo_body(input, len(self.anchors)//3, len(self.class_names))
 78 |         
 79 |         model.load_weights(self.model_path)
 80 |         return model
 81 | 
 82 |     def _feed_forward(self, image):
 83 |         image_data = image_to_ndarray(image)
 84 |         image_shape = [image.size[1], image.size[0]] # Original image dimension
 85 |         out_boxes, out_scores, out_classes = self.sess.run(
 86 |             [self.boxes, self.scores, self.classes],    
 87 |             feed_dict={
 88 |                 self.input_image: image_data,
 89 |                 self.input_image_shape: image_shape,
 90 |                 K.learning_phase(): 0
 91 |             })
 92 |         return out_boxes, out_scores, out_classes
 93 | 
 94 |     def predict(self, image, show_image=False):
 95 |         '''
 96 |         return dictionary of list
 97 | 
 98 |         Output:
 99 |         {
100 |             'boxes' : [[top, left, bottom, right], ...]
101 |             'scores' : [float, ...]
102 |             'classes' : [int, ...]
103 |         }
104 |         '''
105 |         
106 |         out_boxes, out_scores, out_classes = self._feed_forward(image)
107 |         prediction = {}
108 |         prediction['boxes'] = []
109 |         prediction['scores'] = []
110 |         prediction['classes'] = []
111 |         for temp_box, temp_score, temp_class in zip(out_boxes, out_scores, out_classes):
112 |             prediction['boxes'].append(temp_box.tolist())
113 |             prediction['scores'].append(temp_score)
114 |             prediction['classes'].append(temp_class)
115 | 
116 |         return prediction
117 |         
118 | def main():
119 |     image = Image.open('images/cat.jpg')
120 |     model = YOLOv3(sess = K.get_session())
121 | 
122 | if __name__ == "__main__":
123 |     main()


--------------------------------------------------------------------------------
/models/yolov3_wrapper.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import tensorflow as tf
  4 | import os
  5 | import numpy as np
  6 | import colorsys
  7 | import logging
  8 | import json
  9 | import pickle
 10 | 
 11 | from PIL import Image, ImageFont, ImageDraw
 12 | from collections import defaultdict
 13 | from keras import backend as K
 14 | from keras.models import Model
 15 | from keras.layers import Input, Lambda
 16 | from models.yolov3.yolov3_model import yolo_body, yolo_eval
 17 | from models.yolov3.image_utils import letterbox_image, image_to_ndarray, letterbox_image_tf_dynamic
 18 | 
 19 | import pdb
 20 | 
 21 | class YOLOv3(object):
 22 |     _defaults = {
 23 |         "model_path": 'models/yolov3/model_data/yolov3.h5',
 24 |         "anchors_path": 'models/yolov3/model_data/yolov3_anchors.txt',
 25 |         "classes_path": 'models/yolov3/model_data/coco_classes.txt',
 26 |         "box_score_threshold": 0.3,
 27 |         "nms_iou_threshold": 0.45,
 28 |         "mAP_iou_threshold": 0.5,
 29 |         "model_image_size": (416, 416),
 30 |         "gpu_num": 1,
 31 |     }
 32 |     @classmethod
 33 |     def get_defaults(cls, n):
 34 |         if n in cls._defaults:
 35 |             return cls._defaults[n]
 36 |         else:
 37 |             return "Unrecognized attribute name'" + n + "'"
 38 |         
 39 |     def _get_class(self):
 40 |         classes_path = os.path.expanduser(self.classes_path)
 41 |         with open(classes_path) as f:
 42 |             class_names = f.readlines()
 43 |         class_names = [c.strip() for c in class_names]
 44 |         return class_names
 45 |     
 46 |     def _get_anchors(self):
 47 |         anchors_path = os.path.expanduser(self.anchors_path)
 48 |         with open(anchors_path) as f:
 49 |             anchors = f.readline()
 50 |         anchors = [float(x) for x in anchors.split(',')]
 51 |         return np.array(anchors).reshape(-1, 2)
 52 | 
 53 |     def __init__(self, **kwargs):
 54 |         self.__dict__.update(self._defaults)
 55 |         self.__dict__.update(kwargs)
 56 |         K.set_session(self.sess)
 57 |         self.logger = logging.getLogger(self.__class__.__name__)
 58 |         self.class_names = self._get_class()
 59 |         self.num_classes = len(self.class_names)
 60 |         self.anchors = self._get_anchors()
 61 |         self.logger.info("Loading %s model ...", self.__class__.__name__)
 62 |         self.model = self.create_model()
 63 |         self.logger.info("Model loaded.")
 64 |         self.input_image_shape = K.placeholder(shape=(2,))
 65 |         self.boxes, self.scores, self.classes = yolo_eval(self.model.output, 
 66 |                                 self.anchors, self.num_classes, 
 67 |                                 self.input_image_shape,
 68 |                                 score_threshold=self.box_score_threshold,
 69 |                                 iou_threshold=self.nms_iou_threshold)
 70 | 
 71 |     def create_model(self):
 72 | 
 73 |         self.input_image = tf.placeholder(tf.float32, (None, None, None, 3))
 74 |         boxed_image = letterbox_image_tf_dynamic(self.input_image, (416, 416))
 75 |         input = Input(tensor=boxed_image)
 76 |         model = yolo_body(input, len(self.anchors)//3, len(self.class_names))
 77 |         
 78 |         model.load_weights(self.model_path)
 79 |         return model
 80 | 
 81 |     def _feed_forward(self, image):
 82 |         image_data = image_to_ndarray(image)
 83 |         image_shape = [image.size[1], image.size[0]] # Original image dimension
 84 |         out_boxes, out_scores, out_classes = self.sess.run(
 85 |             [self.boxes, self.scores, self.classes],    
 86 |             feed_dict={
 87 |                 self.input_image: image_data,
 88 |                 self.input_image_shape: image_shape,
 89 |                 K.learning_phase(): 0
 90 |             })
 91 |         return out_boxes, out_scores, out_classes
 92 | 
 93 |     def predict(self, image, show_image=False):
 94 |         '''
 95 |         return dictionary of list
 96 | 
 97 |         Output:
 98 |         {
 99 |             'boxes' : [[top, left, bottom, right], ...]
100 |             'scores' : [float, ...]
101 |             'classes' : [int, ...]
102 |         }
103 |         '''
104 |         
105 |         out_boxes, out_scores, out_classes = self._feed_forward(image)
106 |         prediction = {}
107 |         prediction['boxes'] = []
108 |         prediction['scores'] = []
109 |         prediction['classes'] = []
110 |         for temp_box, temp_score, temp_class in zip(out_boxes, out_scores, out_classes):
111 |             prediction['boxes'].append(temp_box.tolist())
112 |             prediction['scores'].append(temp_score)
113 |             prediction['classes'].append(temp_class)
114 | 
115 |         return prediction
116 |         
117 | def main():
118 |     model = YOLOv3(sess = K.get_session())
119 | 
120 | if __name__ == "__main__":
121 |     main()


--------------------------------------------------------------------------------
/pipeline_center.py:
--------------------------------------------------------------------------------
  1 | from tracker.kalman_filter import Tracker_center as Tracker
  2 | from utils import utils
  3 | from assign_detection_to_trackers import assign_detections_to_trackers
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from PIL import Image
  8 | import copy
  9 | 
 10 | import pdb
 11 | 
 12 | def pipeline(img, det, frame_count, params_ori, is_init=False, detect_output=False, verbose=1, virtual_attack=False, return_match_info=False):
 13 |     '''
 14 |     Pipeline function for detection and tracking
 15 |     Args:
 16 |         img : nparray
 17 |             input image array
 18 |         det : object or list
 19 |             detector or detection results
 20 |         frame_count : int
 21 |             frame index
 22 |         params : dic
 23 |             parameters used for tracking
 24 |         detect_output : bool
 25 |             If True, det is detection results
 26 |         verbose : int
 27 |             verbose
 28 |         virtual_attack : bool
 29 |             If true, apply virtual attack.
 30 |     '''
 31 |     params = copy.deepcopy(params_ori)
 32 |     if detect_output:
 33 |         assert isinstance(det, list) or det == None
 34 | 
 35 |     tracker_list = params['tracker_list']
 36 |     max_age = params['max_age']
 37 |     min_hits = params['min_hits']
 38 |     track_id_list = params['track_id_list']
 39 |     
 40 |     frame_count += 1
 41 |     if detect_output:
 42 |         detected_objects_list = det
 43 |     else:
 44 |         detected_objects_list = det.detect_image(img)
 45 | 
 46 |     if virtual_attack:
 47 |         detected_objects_list = []
 48 | 
 49 |     if verbose == 1:
 50 |         print('Frame:', frame_count)
 51 |         print('Detected objects: ', detected_objects_list)
 52 | 
 53 |     x_obj = []
 54 |     img_bbox = img.copy()
 55 |     for idx, detected_object in enumerate(detected_objects_list):
 56 |         img_bbox= utils.draw_box_label(img_bbox, detected_object, box_color=(255, 0, 0), thickness=10)
 57 |     
 58 |     if len(tracker_list) > 0:
 59 |         for trk in tracker_list:
 60 |             x_obj.append(trk.obj)
 61 |     
 62 |     z_obj = [obj for obj in detected_objects_list]
 63 | 
 64 |     matched, unmatched_dets, unmatched_trks = assign_detections_to_trackers(x_obj, z_obj, iou_thrd = 0.5)  #0.3
 65 | 
 66 |     if verbose == 1:
 67 |         print('Detection: ', z_obj)
 68 |         print('x_obj: ', x_obj)
 69 |         print('matched:', matched)
 70 |         print('unmatched_det:', unmatched_dets)
 71 |         print('unmatched_trks:', unmatched_trks)
 72 |     
 73 |     # Deal with matched detections     
 74 |     if matched.size > 0:
 75 |         for trk_idx, det_idx in matched:
 76 |             z = z_obj[det_idx]['bbox']
 77 | 
 78 |             z_center = np.array([(z[0] + z[2]) / 2, (z[1] + z[3]) / 2])
 79 |             z_center = np.expand_dims(z_center, axis=0).T
 80 |             z_wh = np.array([z[2] - z[0] + 1, z[3] - z[1] + 1])
 81 |             tmp_trk = tracker_list[trk_idx]
 82 |             tmp_trk.kalman_filter(z_center, z_wh)
 83 |             xx_state = tmp_trk.get_x_state().T[0].tolist()
 84 |             xx_center = [xx_state[0], xx_state[2]]
 85 |             xx_wh = tmp_trk.whRCF.get_state()
 86 | 
 87 |             temp_bbox = np.array([xx_center[0] - xx_wh[0] / 2, xx_center[1] - xx_wh[1] / 2, xx_center[0] + xx_wh[0] / 2, xx_center[1] + xx_wh[1] / 2]).astype('int')
 88 |             x_obj[trk_idx]['bbox'] = temp_bbox
 89 |             tmp_trk.obj['bbox'] = temp_bbox
 90 |             x_obj[trk_idx]['score'] = z_obj[det_idx]['score']
 91 |             tmp_trk.obj['score'] = z_obj[det_idx]['score']
 92 |             x_obj[trk_idx]['class_idx'] = z_obj[det_idx]['class_idx']
 93 |             tmp_trk.obj['class_idx'] = z_obj[det_idx]['class_idx']
 94 |             x_obj[trk_idx]['class_name'] = z_obj[det_idx]['class_name']
 95 |             tmp_trk.obj['class_name'] = z_obj[det_idx]['class_name']
 96 | 
 97 |             if not is_init:
 98 |                 tmp_trk.hits += 1
 99 |             else:
100 |                 tmp_trk.hits = params_ori['min_hits']
101 |             tmp_trk.no_losses = 0
102 |     
103 |     # Deal with unmatched detections      
104 |     if len(unmatched_dets) > 0:
105 |         for idx in unmatched_dets:
106 |             z = z_obj[idx]['bbox']
107 | 
108 |             z_center = np.array([(z[0] + z[2]) / 2, (z[1] + z[3]) / 2])
109 |             z_center = np.expand_dims(z_center, axis=0).T
110 |             z_wh = np.array([z[2] - z[0] + 1, z[3] - z[1] + 1])
111 |             tmp_trk = Tracker() # Create a new tracker
112 |             x = np.array([[z_center[0], 0, z_center[1], 0]]).T
113 |             tmp_trk.Init(x, z_wh)
114 |             tmp_trk.predict_only()
115 |             xx_state = tmp_trk.get_x_state()
116 |             xx_state = xx_state.T[0].tolist()
117 |             xx_center =[xx_state[0], xx_state[2]]
118 |             xx_wh = tmp_trk.whRCF.get_state()
119 | 
120 |             temp_bbox = np.array([xx_center[0] - xx_wh[0] / 2, xx_center[1] - xx_wh[1] / 2, xx_center[0] + xx_wh[0] / 2, xx_center[1] + xx_wh[1] / 2]).astype('int')
121 |             tmp_trk.obj['bbox'] = temp_bbox
122 |             tmp_trk.obj['score'] = z_obj[idx]['score']
123 |             tmp_trk.obj['class_idx'] = z_obj[idx]['class_idx']
124 |             tmp_trk.obj['class_name'] = z_obj[idx]['class_name']
125 | 
126 |             tmp_trk.id = track_id_list.popleft() # assign an ID for the tracker
127 |             tracker_list.append(tmp_trk)
128 |             x_obj.append(tmp_trk.obj)
129 |     
130 |     # Deal with unmatched tracks       
131 |     if len(unmatched_trks)>0:
132 |         for trk_idx in unmatched_trks:
133 |             tmp_trk = tracker_list[trk_idx]
134 |             tmp_trk.no_losses += 1
135 |             tmp_trk.predict_only()
136 |             xx_state = tmp_trk.get_x_state()
137 |             xx_state = xx_state.T[0].tolist()
138 |             xx_center =[xx_state[0], xx_state[2]]
139 |             xx_wh = tmp_trk.whRCF.get_state()
140 | 
141 |             temp_bbox = np.array([xx_center[0] - xx_wh[0] / 2, xx_center[1] - xx_wh[1] / 2, xx_center[0] + xx_wh[0] / 2, xx_center[1] + xx_wh[1] / 2])
142 |             tmp_trk.obj['bbox'] = temp_bbox
143 |             x_obj[trk_idx]['bbox'] = temp_bbox
144 |                    
145 |     img_bbox_track = img_bbox.copy()
146 |     # The list of tracks to be annotated  
147 |     good_tracker_list =[]
148 |     for trk in tracker_list:
149 |         if ((trk.hits >= min_hits) and (trk.no_losses <= max_age)):
150 |             good_tracker_list.append(trk)
151 |             x_cv2 = trk.obj['bbox']
152 |             if verbose == 1:
153 |                 print('updated box: ', x_cv2)
154 |             img_bbox_track = utils.draw_box_label(img_bbox, trk.obj) # Draw the bounding boxes on the 
155 | 
156 |     # Book keeping
157 |     deleted_tracks = filter(lambda x: x.no_losses > max_age, tracker_list)  
158 |     
159 |     for trk in deleted_tracks:
160 |             track_id_list.append(trk.id)
161 |     
162 |     tracker_list = [x for x in tracker_list if x.no_losses<=max_age]
163 |     if verbose == 1:
164 |         print('Ending tracker_list: ',len(tracker_list))
165 |         print('Ending good tracker_list: ',len(good_tracker_list))
166 |     
167 |     params_new = {}
168 |     params_new['tracker_list'] = tracker_list 
169 |     params_new['max_age'] = max_age
170 |     params_new['min_hits'] = min_hits
171 |     params_new['track_id_list'] = track_id_list
172 |     
173 |     if return_match_info:
174 |         return img_bbox_track, params_new, (matched, unmatched_dets, unmatched_trks)
175 |     else:
176 |         return img_bbox_track, params_new


--------------------------------------------------------------------------------
/tracker/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy import dot
  3 | from scipy.linalg import inv, block_diag
  4 | 
  5 | import pdb
  6 | 
  7 | class FirstOrderRCLowPassFilter():
  8 |     def __init__(self):
  9 |         self.alpha_ = 0.0
 10 |         self.inited_ = False
 11 |         self.state_ = np.array([0.0, 0.0])
 12 |     
 13 |     def SetAlpha(self, alpha):
 14 |         self.alpha_ = alpha
 15 |         self.inited_ = False
 16 | 
 17 |     def AddMeasure(self, z):
 18 |         if self.inited_:
 19 |             self.state_ = z + self.alpha_ * (self.state_ - z)
 20 |         else:
 21 |             self.state_ = z
 22 |             self.inited_ = True
 23 | 
 24 |     def AddMeasure_noinput(self):
 25 |         z = self.state_
 26 |         self.AddMeasure(z)
 27 | 
 28 | 
 29 |     def get_state(self):
 30 |         return self.state_
 31 | 
 32 |     def isInited(self):
 33 |         return self.inited_
 34 | 
 35 | 
 36 | class Tracker_center(): # kalman filter which only track the center of bbox
 37 |     def __init__(self):
 38 |         self.inited_ = False
 39 |         self.id = 0 # tracker's id 
 40 |         self.obj = {} # object information
 41 | 
 42 |         self.hits = 0 # number of detection matches
 43 |         self.no_losses = 0 # number of unmatched tracks (track loss)
 44 | 
 45 |         # Initialize parameters for Kalman Filtering
 46 |         # The state is the (x, y) coordinates of the center of detection box
 47 |         # state: [center_c, center_c_dot, center_r, center_r_dot]
 48 | 
 49 |         self.x_state_ = []
 50 |         self.whRCF = FirstOrderRCLowPassFilter()
 51 |         self.whRCF.SetAlpha(0.5)
 52 |         self.dt = 1
 53 | 
 54 |         #state transition matrix F
 55 |         self.F = np.array([[1, 0, 0, 0],
 56 |                            [0, 1, 0, 0],
 57 |                            [0, 0, 1, 0],
 58 |                            [0, 0, 0, 1]])
 59 |         self.F[0, 1] = self.dt
 60 |         self.F[2, 3] = self.dt
 61 | 
 62 |         # Measurement matrix H, assuming we can only measure the coordinates
 63 |         self.H = np.array([[1, 0, 0, 0],
 64 |                            [0, 0, 1, 0]])
 65 |         
 66 |         # Initialize the state covariance P
 67 |         self.L = 10.0  #10.0 #no change
 68 |         self.P = np.diag(self.L * np.ones(4))
 69 | 
 70 |         # Initialize the process covariance
 71 |         self.Q_comp_mat = np.array([[self.dt**4/4., self.dt**3/2.],
 72 |                                     [self.dt**3/2., self.dt**2]])
 73 |         self.Q = block_diag(self.Q_comp_mat, self.Q_comp_mat)
 74 |         
 75 |         # Initialize the measurement covariance
 76 |         self.R_scaler = 1.0 #1.0
 77 |         self.R_diag_array = self.R_scaler * np.array([self.L, self.L])
 78 |         self.R = np.diag(self.R_diag_array)
 79 | 
 80 |     def Init(self, x, wh):
 81 |         self.x_state_ = x
 82 |         self.inited_ = True
 83 |         self.whRCF.AddMeasure(wh)
 84 | 
 85 |     def update_R(self):   
 86 |         R_diag_array = self.R_scaler * np.array([self.L, self.L])
 87 |         self.R = np.diag(R_diag_array)
 88 | 
 89 |     def isInited(self):
 90 |         if not self.inited_:
 91 |             return False
 92 |         if not self.whRCF.isInited():
 93 |             return False
 94 |         return True
 95 | 
 96 |     def get_x_state(self):
 97 |         if not self.isInited():
 98 |             raise ValueError('tracker not initiated.')
 99 | 
100 |         return self.x_state_
101 | 
102 |     def kalman_filter(self, z, wh): 
103 |         '''
104 |         Implement the Kalman Filter, including the predict and the update stages,
105 |         with the measurement z
106 |         '''
107 |         if not self.isInited():
108 |             raise ValueError('tracker not initiated.')
109 | 
110 |         x = self.x_state_.astype('float')
111 |         # Predict
112 |         x = dot(self.F, x)
113 |         self.P = dot(self.F, self.P).dot(self.F.T) + self.Q
114 | 
115 |         #Update
116 |         S = dot(self.H, self.P).dot(self.H.T) + self.R
117 |         K = dot(self.P, self.H.T).dot(inv(S)) # Kalman gain
118 |         y = z - dot(self.H, x) # residual
119 | 
120 |         x += dot(K, y)
121 |         self.P = self.P - dot(K, self.H).dot(self.P)
122 |         self.x_state_ = x.astype(int) # convert to integer coordinates 
123 |                                      #(pixel values)
124 | 
125 |         self.whRCF.AddMeasure(wh)
126 |         
127 |     def predict_only(self):  
128 |         '''
129 |         Implment only the predict stage. This is used for unmatched detections and 
130 |         unmatched tracks
131 |         '''
132 |         if not self.isInited():
133 |             raise ValueError('tracker not initiated.')
134 |         x = self.x_state_
135 |         # Predict
136 |         x = dot(self.F, x)
137 |         self.P = dot(self.F, self.P).dot(self.F.T) + self.Q
138 |         self.x_state_ = x.astype(int)
139 | 
140 |         self.whRCF.AddMeasure_noinput()
141 | 
142 | class Tracker(): # class for Kalman Filter-based tracker
143 |     def __init__(self):
144 |         # Initialize parametes for tracker (history)
145 |         self.id = 0  # tracker's id 
146 |         self.obj = {}
147 | 
148 |         self.hits = 0 # number of detection matches
149 |         self.no_losses = 0 # number of unmatched tracks (track loss)
150 |         
151 |         # Initialize parameters for Kalman Filtering
152 |         # The state is the (x, y) coordinates of the detection box
153 |         # state: [left, left_dot, up, up_dot, right, right_dot, down, down_dot]
154 |         # or[left, left_dot, up, up_dot, width, width_dot, height, height_dot]
155 |         self.x_state=[] 
156 |         self.dt = 1.   # time interval
157 |         
158 |         # Process matrix, assuming constant velocity model
159 |         self.F = np.array([[1, self.dt, 0,  0,  0,  0,  0, 0],
160 |                            [0, 1,  0,  0,  0,  0,  0, 0],
161 |                            [0, 0,  1,  self.dt, 0,  0,  0, 0],
162 |                            [0, 0,  0,  1,  0,  0,  0, 0],
163 |                            [0, 0,  0,  0,  1,  self.dt, 0, 0],
164 |                            [0, 0,  0,  0,  0,  1,  0, 0],
165 |                            [0, 0,  0,  0,  0,  0,  1, self.dt],
166 |                            [0, 0,  0,  0,  0,  0,  0,  1]])
167 |         
168 |         # Measurement matrix, assuming we can only measure the coordinates
169 |         
170 |         self.H = np.array([[1, 0, 0, 0, 0, 0, 0, 0],
171 |                            [0, 0, 1, 0, 0, 0, 0, 0],
172 |                            [0, 0, 0, 0, 1, 0, 0, 0], 
173 |                            [0, 0, 0, 0, 0, 0, 1, 0]])
174 |         
175 |         
176 |         # Initialize the state covariance
177 |         self.L = 10.0
178 |         self.P = np.diag(self.L*np.ones(8))
179 |         
180 |         
181 |         # Initialize the process covariance
182 |         self.Q_comp_mat = np.array([[self.dt**4/4., self.dt**3/2.],
183 |                                     [self.dt**3/2., self.dt**2]])
184 |         self.Q = block_diag(self.Q_comp_mat, self.Q_comp_mat, 
185 |                             self.Q_comp_mat, self.Q_comp_mat)
186 |         
187 |         # Initialize the measurement covariance
188 |         self.R_scaler = 1.0
189 |         self.R_diag_array = self.R_scaler * np.array([self.L, self.L, self.L, self.L])
190 |         self.R = np.diag(self.R_diag_array)
191 |         
192 |         
193 |     def update_R(self):   
194 |         R_diag_array = self.R_scaler * np.array([self.L, self.L, self.L, self.L])
195 |         self.R = np.diag(R_diag_array)
196 |         
197 |         
198 |         
199 |         
200 |     def kalman_filter(self, z): 
201 |         '''
202 |         Implement the Kalman Filter, including the predict and the update stages,
203 |         with the measurement z
204 |         '''
205 |         x = self.x_state
206 |         # Predict
207 |         x = dot(self.F, x)
208 |         self.P = dot(self.F, self.P).dot(self.F.T) + self.Q
209 | 
210 |         #Update
211 |         S = dot(self.H, self.P).dot(self.H.T) + self.R
212 |         K = dot(self.P, self.H.T).dot(inv(S)) # Kalman gain
213 |         y = z - dot(self.H, x) # residual
214 |         x += dot(K, y)
215 |         self.P = self.P - dot(K, self.H).dot(self.P)
216 |         self.x_state = x.astype(int) # convert to integer coordinates 
217 |                                      #(pixel values)
218 |         
219 |     def predict_only(self):  
220 |         '''
221 |         Implment only the predict stage. This is used for unmatched detections and 
222 |         unmatched tracks
223 |         '''
224 |         x = self.x_state
225 |         # Predict
226 |         x = dot(self.F, x)
227 |         self.P = dot(self.F, self.P).dot(self.F.T) + self.Q
228 |         self.x_state = x.astype(int)


--------------------------------------------------------------------------------
/utils/file_utils.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import shutil
 4 | import numpy as np
 5 | 
 6 | folder_name = '/home/yunhan/Documents/data/apollo/'
 7 | 
 8 | filenames = []
 9 | count = 0
10 | for filename in glob.iglob('/home/yunhan/Documents/data/apollo/output_highway/images/**/*.jpg', recursive=True):
11 |     print(count)
12 |     shutil.move(filename, '/home/yunhan/Documents/data/detection/%05d.jpg' % count)
13 |     count = count + 1
14 | 


--------------------------------------------------------------------------------
/utils/image_utils.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image
  2 | import numpy as np
  3 | 
  4 | #Debug
  5 | import tensorflow as tf
  6 | from tensorflow.image import ResizeMethod
  7 | 
  8 | def letterbox_image(image, size):
  9 |     """ Resize image with unchanged aspect ratio using padding.
 10 | 
 11 |     Args:
 12 |         image: PIL.Image.Image (Jpeg or PNG)
 13 |         size: Tuple (416, 416)
 14 |     
 15 |     Returns:
 16 |         new_image: PIL.Image.Image
 17 |     """
 18 |     iw, ih = image.size
 19 |     w, h = size
 20 |     scale = min(w / iw, h / ih)
 21 |     nw = int(iw * scale)
 22 |     nh = int(ih * scale)
 23 | 
 24 |     image = image.resize((nw, nh), Image.BICUBIC)
 25 |     new_image = Image.new('RGB', size, (128, 128, 128))
 26 |     # new_image = Image.new('RGB', size, (0, 0, 0))
 27 |     new_image.paste(image, ((w-nw)//2, (h-nh)//2))
 28 |     return new_image
 29 | 
 30 | def letterbox_image_tf_dynamic(image, size, resize_method=ResizeMethod.BILINEAR):
 31 |     """ Letterbox image that handles dynamic Tensor type """
 32 |     if len(image.get_shape()) == 4:
 33 |         ih, iw = tf.shape(image)[1], tf.shape(image)[2]
 34 |         images = image
 35 |     else:
 36 |         ih, iw = tf.shape(image)[0], tf.shape(image)[1]
 37 |         images = [image]
 38 |     w, h = tf.constant(size[0]), tf.constant(size[1])
 39 |     scale = tf.minimum(w / iw, h / ih)
 40 |     nw = tf.cast(tf.cast(iw, tf.float64) * scale, tf.int32)
 41 |     nh = tf.cast(tf.cast(ih, tf.float64) * scale, tf.int32)
 42 |     
 43 |     image_tensor = tf.image.resize_images(images, (nh, nw), method=resize_method, align_corners=True)
 44 |     
 45 |     h_pad = tf.cast((h-nh)//2, tf.int32)
 46 |     w_pad = tf.cast((w-nw)//2, tf.int32)
 47 |     c_pad = 0
 48 |     if len(image_tensor.shape) == 4:
 49 |         paddings = [[0,0], [h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]]
 50 |     else:
 51 |         paddings = [[h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]]
 52 |     
 53 |     image_tensor = tf.pad(image_tensor, paddings, constant_values=128. / 255.)
 54 |     return image_tensor
 55 | 
 56 |     
 57 | 
 58 | def letterbox_image_tf_static(image, raw_size, tgt_size, resize_method=ResizeMethod.BILINEAR):
 59 |     """ Letterbox image that only handles static shape, but more efficiently."""
 60 |     if len(image.shape) == 4:
 61 |         images = image
 62 |     else:
 63 |         images = [image]
 64 |     
 65 |     iw, ih = raw_size
 66 |     w, h = tgt_size
 67 |     scale = min(w / iw, h / ih)
 68 |     nw = int(iw * scale)
 69 |     nh = int(ih * scale)
 70 | 
 71 |     h_pad, w_pad, c_pad = (h - nh) // 2, (w - nw) // 2, 0
 72 | 
 73 |     image_tensor = tf.image.resize_images(images, (nh, nw), method=resize_method, align_corners=True)
 74 |     paddings = [[0,0], [h_pad, h_pad], [w_pad, w_pad], [c_pad, c_pad]]
 75 | 
 76 |     image_tensor = tf.pad(image_tensor, paddings, constant_values=128. / 255.)
 77 |     return image_tensor
 78 | 
 79 | 
 80 | def image_to_ndarray(image, expand_dims=True):
 81 |     """ Convert PIL Image to numpy.ndarray and add batch dimension
 82 |     
 83 |         Args:
 84 |             image: PIL.Image.Image
 85 |         
 86 |         Returns:
 87 |             image_data: numpy.ndarray (1, 416, 416, 3) or (416, 416, 3)
 88 | 
 89 |     """
 90 |     image_data = np.array(image, dtype='float32')
 91 |     image_data /= 255.
 92 |     if expand_dims == True:
 93 |         image_data = np.expand_dims(image_data, 0)
 94 |     if image_data.shape[-1] == 4:
 95 |         image_data = image_data[...,0:-1]
 96 |     return image_data
 97 | 
 98 | def ndarray_to_image(image_data):
 99 |     if len(image_data.shape) == 4:
100 |         image_data = np.squeeze(image_data, axis=0)
101 |     image_data = (image_data * 255).astype("uint8")
102 |     return Image.fromarray(image_data)
103 | 
104 | def load_yolov3_image(img_fpath):
105 |     """ Load and resize an image for yolo3. """
106 |     model_image_size = (416, 416)
107 |     image = Image.open(img_fpath)
108 |     boxed_image = letterbox_image(image, tuple(reversed(model_image_size)))
109 |     image_data = np.array(boxed_image, dtype='float32')
110 |     image_data /= 255.
111 |     image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
112 |     return image_data
113 | 
114 | def l1_diff(image1, image2):
115 |     diff = np.abs(image1 - image2)
116 |     return np.sum(diff)
117 | 
118 | def l0_diff(image1, image2):
119 |     diff = np.abs(image1 - image2)
120 |     return np.count_nonzero(diff)
121 | 
122 | def l_inf_diff(image1, image2):
123 |     diff = np.abs(image1 - image2)
124 |     return np.max(diff)
125 | 
126 | def main():
127 |     image = Image.open('images/cat.jpg')
128 | 
129 |     boxed_image = letterbox_image(image, tuple(reversed((416,416))))
130 |     image_data_pil = image_to_ndarray(boxed_image, expand_dims=False)
131 |     x_img_pil = tf.placeholder(tf.float32, shape=(416, 416, 3))
132 | 
133 |     image_data_tf_dynamic = image_to_ndarray(image, expand_dims=False)
134 |     x_img_tf_large = tf.placeholder(tf.float32, shape=(None,None, 3))
135 |     x_img_tf = letterbox_image_tf_dynamic(x_img_tf_large, (416,416))
136 | 
137 |     image_data_tf_static = image_to_ndarray(image, expand_dims=False)
138 |     x_img_tf_large_static = tf.placeholder(tf.float32, shape=(1080,1920, 3))
139 |     x_img_tf_static = letterbox_image_tf_static(x_img_tf_large_static, (1920, 1080), (416, 416))
140 |     
141 |     with tf.Session() as sess:
142 |         image_resized_pil = sess.run(x_img_pil, feed_dict={x_img_pil: image_data_pil})
143 |         image_resized_tf = sess.run(x_img_tf, feed_dict={x_img_tf_large: image_data_tf_dynamic})
144 |         image_resized_tf = np.squeeze(image_resized_tf, axis=0)
145 |         image_resized_tf_static = sess.run(x_img_tf_static, feed_dict={x_img_tf_large_static: image_data_tf_static})
146 | 
147 | 
148 |         l1 = l1_diff(image_resized_tf, image_resized_tf_static)
149 |         l0 = l0_diff(image_resized_tf, image_resized_tf_static)
150 |         l_inf = l_inf_diff(image_resized_tf, image_resized_tf_static)
151 | 
152 |         print("l1 %f, l0 %d, l_inf %f" % (l1, l0, l_inf))
153 |         image_pil = ndarray_to_image(image_resized_pil)
154 |         image_tf = ndarray_to_image(image_resized_tf)
155 |         image_tf_static = ndarray_to_image(image_resized_tf_static)
156 | 
157 | 
158 |         image_tf.save('tf.png')
159 |         image_pil.save('pil.png')
160 |         image_tf_static.save('tf_static.png')
161 | 
162 | if __name__ == "__main__":
163 |     main()


--------------------------------------------------------------------------------
/utils/keras_utils.py:
--------------------------------------------------------------------------------
 1 | from functools import reduce
 2 | 
 3 | 
 4 | def compose(*funcs):
 5 |     """Compose arbitrarily many functions, evaluated left to right.
 6 | 
 7 |     Reference: https://mathieularose.com/function-composition-in-python/
 8 |     """
 9 |     # return lambda x: reduce(lambda v, f: f(v), funcs, x)
10 |     if funcs:
11 |         return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
12 |     else:
13 |         raise ValueError('Composition of empty sequence not supported.')
14 | 


--------------------------------------------------------------------------------
/utils/load_DETRAC.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import os
  3 | import glob
  4 | import xml.etree.ElementTree as ET
  5 | 
  6 | import pdb
  7 | 
  8 | def get_ids(dir_path):
  9 |     img_dir = os.path.join(dir_path, 'Insight-MVT_Annotation_Train')
 10 |     id_list = os.listdir(img_dir)
 11 |     id_list.sort()
 12 |     return id_list
 13 | 
 14 | def load_from_id(dir_path, id_name):
 15 |     img_dir = os.path.join(dir_path, 'Insight-MVT_Annotation_Train')
 16 |     det_dir = os.path.join(dir_path, 'R-CNN')
 17 |     gt_dir = os.path.join(dir_path, 'DETRAC-Train-Annotations-XML')
 18 |     imgs_path = glob.glob(os.path.join(img_dir, id_name, '*.jpg'))
 19 |     imgs_path.sort()
 20 |     num_frames = len(imgs_path)
 21 |     det_list = _load_det(os.path.join(det_dir, id_name + '_Det_R-CNN.txt'), num_frames)
 22 |     gt_list = _load_gt(os.path.join(gt_dir, id_name + '.xml'), num_frames)
 23 | 
 24 |     return imgs_path, det_list, gt_list
 25 | 
 26 | def _load_det(path, num_frames):
 27 |     """Determines the locations of the cars in the image
 28 | 
 29 |         Args:
 30 |             image: camera image
 31 | 
 32 |         Returns:
 33 |         detected objects with: bbox, confident score, class index
 34 |         [
 35 |             dictionary {
 36 |                 bbox: np.array([left, up, right, down])
 37 |                 score: confident_score
 38 |                 class_idx: class_idx
 39 |                 class_name: class name category
 40 |             }
 41 |         ]
 42 | 
 43 |         """
 44 | 
 45 |     with open(path, 'r') as f:
 46 |         lines = f.readlines()
 47 | 
 48 |     result_dic = {}
 49 |     for line in lines:
 50 |         line_list = line[:-1].split(',')
 51 |         frame_id = int(line_list[0])
 52 |         bbox_id = int(line_list[1])
 53 |         bbox = np.array([float(line_list[2]), float(line_list[3]), float(line_list[2]) + float(line_list[4]), float(line_list[3]) + float(line_list[5])])
 54 |         confidence_score = float(line_list[-1])
 55 | 
 56 |         temp_dic = {
 57 |             'bbox' : bbox,
 58 |             'score' : confidence_score,
 59 |             'class_idx' : 0,
 60 |             'class_name' : 'object',
 61 |         }
 62 | 
 63 |         if frame_id not in result_dic.keys():
 64 |             result_dic[frame_id] = []
 65 |         result_dic[frame_id].append(temp_dic)
 66 |     result = []
 67 |     start_idx = 1
 68 |     while start_idx <= num_frames:
 69 |         if start_idx in result_dic.keys():
 70 |             result.append(result_dic[start_idx])
 71 |         else:
 72 |             result.append([])
 73 |         start_idx += 1
 74 |     return result
 75 |         
 76 | def _load_gt(path, num_frames):
 77 |     tree = ET.parse(path)
 78 |     root = tree.getroot()
 79 |     result_dic = {}
 80 |     for frame in root.findall('frame'):
 81 |         temp_list = {}
 82 |         frame_id = int(frame.attrib['num'])
 83 |         temp_list = []
 84 |         for target in frame[0]:
 85 |             temp_dic = {}
 86 |             target_id = int(target.attrib['id'])
 87 |             bbox_dic = target.find('box').attrib
 88 |             bbox = np.array([float(bbox_dic['left']), float(bbox_dic['top']), float(bbox_dic['left']) + float(bbox_dic['width']), float(bbox_dic['top']) + float(bbox_dic['height'])])
 89 |             class_name = target.find('attribute').attrib['vehicle_type']
 90 |             temp_dic['bbox'] = bbox
 91 |             temp_dic['score'] = 1.0
 92 |             temp_dic['class_idx'] = 0
 93 |             temp_dic['class_name'] = class_name
 94 | 
 95 |             # used for single class detection
 96 |             temp_dic['class_name'] = 'object'
 97 | 
 98 |             temp_list.append(temp_dic)
 99 |         result_dic[frame_id] = temp_list
100 |     result = []
101 |     start_idx = 1
102 |     while start_idx <= num_frames:
103 |         if start_idx in result_dic.keys():
104 |             result.append(result_dic[start_idx])
105 |         else:
106 |             result.append([])
107 |         start_idx += 1
108 |     return result
109 |             


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import cv2
  3 | import os
  4 | from PIL import Image
  5 | 
  6 | import pdb
  7 | 
  8 | def draw_box_label(img, detected_object, box_color=(0, 255, 255), thickness=4):
  9 |     '''
 10 |     Helper funciton for drawing the bounding boxes and the labels
 11 |     bbox_cv2 = [left, top, right, bottom]
 12 |     '''
 13 |     bbox_cv2 = detected_object['bbox']
 14 |     bbox_cv2 = np.array(bbox_cv2).astype('int')
 15 |     #box_color= (0, 255, 255)
 16 |     font = cv2.FONT_HERSHEY_SIMPLEX
 17 |     font_size = 0.7
 18 |     font_color = (0, 0, 0)
 19 |     left, top, right, bottom = bbox_cv2[0], bbox_cv2[1], bbox_cv2[2], bbox_cv2[3]
 20 |     
 21 |     # Draw the bounding box
 22 |     cv2.rectangle(img, (left, top), (right, bottom), box_color, thickness)
 23 |     
 24 |     # Draw a filled box on top of the bounding box (as the background for the labels)
 25 |     cv2.rectangle(img, (left - 2, top - 45), (right + 2, top), box_color, -1, 1)
 26 | 
 27 |     # Output the labels that show the x and y coordinates of the bounding box center.
 28 |     text_score = str(detected_object['score'])
 29 |     cv2.putText(img, text_score, (left, top - 25), font, font_size, font_color, 1, cv2.LINE_AA)
 30 |     text_class_name = str(detected_object['class_idx'])
 31 |     cv2.putText(img, text_class_name, (left, top - 5), font, font_size, font_color, 1, cv2.LINE_AA)
 32 |     
 33 |     return img  
 34 | 
 35 | def box_iou(bb1, bb2):
 36 |     '''
 37 |     Calculate IoU of two bounding boxes: bb=[left, up, right, down]
 38 |     input: 
 39 |         bb1, bb2: 1*4 array or list
 40 |     output:
 41 |         scalar value
 42 |     '''
 43 |     for idx in range(4):
 44 |         bb1[idx] = float(bb1[idx])
 45 |         bb2[idx] = float(bb2[idx])
 46 |     bi = [max(bb1[0], bb2[0]), max(bb1[1], bb2[1]), min(bb1[2], bb2[2]), min(bb1[3], bb2[3])]
 47 |     iw = bi[2] - bi[0] + 1
 48 |     ih = bi[3] - bi[1] + 1
 49 |     if iw > 0 and ih > 0:
 50 |         ua = (bb1[2] - bb1[0] + 1) * (bb1[3] - bb1[1] + 1) + (bb2[2] - bb2[0] + 1) * (bb2[3] - bb2[1] + 1) - iw * ih
 51 |         iou = iw * ih / ua
 52 |     else:
 53 |         iou = 0.0
 54 | 
 55 |     return iou
 56 | 
 57 | def det4eval(det, file_id, dir_path='./det', tofile=False):
 58 |     result_list = []
 59 |     file_path = os.path.join(dir_path, file_id + '.txt')
 60 |     if tofile:
 61 |         with open(file_path, 'w') as f:
 62 |             for temp_dic in det:
 63 |                 left, top, right, bottom = temp_dic['bbox'].astype(int)
 64 |                 line = temp_dic['class_name'] + ' ' + str(temp_dic['score']) + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom) + '\n'
 65 |                 f.write(line)
 66 |     for temp_dic in det:
 67 |         left, top, right, bottom = temp_dic['bbox'].astype(int)
 68 |         line = temp_dic['class_name'] + ' ' + str(temp_dic['score']) + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom)
 69 |         result_list.append(line)
 70 |     return result_list
 71 | 
 72 | 
 73 | def gt4eval(gt, file_id, dir_path='./gt', tofile=False):
 74 |     result_list = []
 75 |     file_path = os.path.join(dir_path, file_id + '.txt')
 76 |     if tofile:
 77 |         with open(file_path, 'w') as f:
 78 |             for temp_gt in gt:
 79 |                 left, top, right, bottom = temp_gt['bbox'].astype('int')
 80 |                 line = temp_gt['class_name'] + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom) + '\n'
 81 |                 f.write(line)
 82 |     for temp_gt in gt:
 83 |         left, top, right, bottom = temp_gt['bbox'].astype('int')
 84 |         line = temp_gt['class_name'] + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom)
 85 |         result_list.append(line)
 86 |     return result_list
 87 | 
 88 | 
 89 | def trk4eval(trk, min_hits, file_id, dir_path='./trk', tofile=False):
 90 |     result_list = []
 91 |     file_path = os.path.join(dir_path, file_id + '.txt')
 92 |     if tofile:
 93 |         with open(file_path, 'w') as f:
 94 |             for temp_trk in trk:
 95 |                 if temp_trk.hits < min_hits:
 96 |                     continue
 97 |                 temp_obj = temp_trk.obj 
 98 |                 left, top, right, bottom = temp_obj['bbox'].astype(int)
 99 |                 line = temp_obj['class_name'] + ' ' + str(temp_obj['score']) + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom) + '\n'
100 |                 f.write(line)
101 |     for temp_trk in trk:
102 |         if temp_trk.hits < min_hits:
103 |             continue
104 |         temp_obj = temp_trk.obj 
105 |         left, top, right, bottom = temp_obj['bbox'].astype(int)
106 |         line = temp_obj['class_name'] + ' ' + str(temp_obj['score']) + ' ' + str(left) + ' ' + str(top) + ' ' + str(right) + ' ' + str(bottom)
107 |         result_list.append(line)
108 |     return result_list
109 | 
110 | 
111 | def letterbox_image(
112 |         img_np, shape=(416, 416), data_format='channels_last'):
113 |     """Returns a letterbox image of target fname.
114 | 
115 |     Parameters
116 |     ----------
117 |     shape : list of integers
118 |         The shape of the returned image (h, w).
119 |     data_format : str
120 |         "channels_first" or "channls_last".
121 | 
122 |     Returns
123 |     -------
124 |     image : array_like
125 |         The example image.
126 | 
127 |     """
128 |     assert len(shape) == 2
129 |     assert data_format in ['channels_first', 'channels_last']
130 |     image = Image.fromarray(img_np)
131 |     iw, ih = image.size
132 |     h, w = shape
133 |     scale = min(w / iw, h / ih)
134 |     nw = int(iw * scale)
135 |     nh = int(ih * scale)
136 | 
137 |     image = image.resize((nw, nh), Image.BICUBIC)
138 |     new_image = Image.new('RGB', shape, (128, 128, 128))
139 |     new_image.paste(image, ((w - nw) // 2, (h - nh) // 2))
140 | 
141 |     image = np.asarray(new_image, dtype=np.float32)
142 |     image /= 255.
143 |     image = image[:, :, :3]
144 |     assert image.shape == shape + (3,)
145 |     if data_format == 'channels_first':
146 |         image = np.transpose(image, (2, 0, 1))
147 |     return image, (h, w)


--------------------------------------------------------------------------------