├── LICENSE ├── README.md ├── index.html ├── picam.py └── yad2k ├── about ├── models ├── keras_darknet19.py └── keras_yolo.py └── utils └── utils.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 PiSimo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PiCamNN 2 | Survelliance system with deep learning based people detection (YAD2K YOLO implementation ), and notification with Telegram. 3 |

The program is made of two different threads, one is always looking for movements and if there are some it's also writing the frames to video-file.The other thread get the frames in which were detected movements and then with a deep neural network (YOLO) is searching for persons, and if there are some it's sending the images to you with telegram.

4 |

The code has been tested on raspberry pi 3B with whom I got 2/3s per frame (if you run it on GPU you should reach 200frames per second), with previous versions of the raspberry pi you probably will not get good perormances :'( .

5 |

6 | 7 |

Requirements:

Instructions for Raspbian:

Download the tiny yolo weights(for keras 2) converted with YAD2k :

24 | wget https://www.dropbox.com/s/xastcd4c0dv2kty/tiny.h5?dl=0 -O tiny.h5 25 |
sudo mv index.html /var/www/html/
26 | (NOTE: If you aren't on raspbian apache's base folder might not be /var/www/html/ so check before!)

27 |

Before starting the main script you should change in picam.py some variables:

28 |

maxDays = 7 If you have stored more then maxDays videos on your devices the oldest one will be removed

29 |

baseFolder = "/var/www/html/" Change this variable if your apache hasn't created that folder

30 |

scriptFolder = "/home/pi/PiCamNN/" Change this variable with the path which contains the scripts and the weights

31 |

num_cam = -1 Number of cam to use (-1 means open the first one the system has read)

32 |

frame_check = 17 Number of empty frames to wait before killing the main process

33 |

time_chunck = 15 Seconds to wait before considering a new action

34 |

telegram_user = "" Your Telegram username you will se all the images on the chat with yourself

35 | 36 |
37 |

To run the code :

38 |

sudo python3 picam.py

39 |

After the main loop is started,every time a person get detected by the neural net you will receive the photo on telegram (on the chat with yourself).

40 |

To see the recorded videos, from your local network you have to go with your browser on the ip of your device which is running PiCamNN and from that page you will be able to download all the videos (eg. http://192.168.0.17 ).

41 | 42 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | PiCam 5 | 6 | to else picam will# 10 | # not work ! # 11 | ########################################################### 12 | --> 13 | 14 | 88 | 89 |

90 |

91 |

PiCam

92 |

93 |

94 | 95 |

96 | Detection Logs > 97 | 98 |

99 |

100 |

Video :

101 | 102 | 103 | 104 |

105 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /picam.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import cv2 4 | import time 5 | import threading 6 | import subprocess 7 | import numpy as np 8 | from sys import exit 9 | from os import system 10 | from keras import backend as K 11 | from keras.models import load_model 12 | from yad2k.models.keras_yolo import yolo_eval, yolo_head 13 | 14 | ''' 15 | ---# GLOBAL VARIABLES #--- 16 | ''' 17 | #USER SETTINGS : 18 | maxDays = 7 #The recorded videos will be destroyed after "maxDays" days 19 | baseFolder = "/var/www/html/" #Apache's base folder 20 | scriptFolder = "/home/pi/PiCamNN/" #The folder which contains main script (picam.py) 21 | num_cam = -1 #Number of the camera (if -1 it will be the first camera read by the system) 22 | frame_check = 17 #Frames to check before quit 23 | time_chunck = 15 #Time to consider for a new action 24 | telegram_user = "" #Your telegram user name so all the images will be sent to your telegram char with yourself 25 | 26 | #IMAGES VARIABLES: 27 | w = 0 #width 28 | h = 0 #height 29 | #Image processing vars: 30 | blur1 = 2 31 | blur2 = 1 32 | erodeval = 7 33 | #Volatile arrays: 34 | frames = [] 35 | times = [] 36 | #General Log file 37 | flog = open(baseFolder+"logs","a") 38 | 39 | ''' 40 | ---# END #--- 41 | ''' 42 | 43 | #when an error occur 44 | def printExit(out): 45 | print(out,"\nClosing....!") 46 | exit(-1) 47 | 48 | #Updating index.html : 49 | # 1 Opening index.html 50 | # 2 Adding new link for the new video 51 | # 3 If there are more then maxDays links removing oldest one 52 | # 4 Removing also the oldest video 53 | def handleFile(name): 54 | print("[PiCamNN] Updating file...") 55 | f = open(baseFolder+"index.html","r") 56 | cont = f.read() 57 | f.close() 58 | if cont.find(name) != -1: 59 | print("[PiCamNN] File has been update yet !") 60 | return False 61 | f = open(baseFolder+"index.html","w") 62 | lines = cont.split("\n") 63 | day = 0 64 | go = False 65 | for i in range(len(lines)-1): 66 | if lines[i].find("") != -1: 67 | i += 1 68 | f.write("\n {}.avi
\n".format(name,name)) 69 | day += 1 70 | go = True 71 | elif lines[i].find("") != -1: 72 | f.write("{}\n".format(lines[i])) 73 | go = False 74 | if day > maxDays: 75 | rm = lines[i-1] 76 | rm = rm[rm.find("\"")+1:len(rm)] 77 | rm = rm[0:rm.find("\"")] 78 | try: 79 | system("rm {}".format(baseFolder+rm)) 80 | print("[PiCamNN] Old file removed.") 81 | except: 82 | print("[PiCamNN] An error occured while removing old file!") 83 | elif go: 84 | day +=1 85 | if day <= maxDays:f.write("{}\n".format(lines[i])) 86 | else:f.write("{}\n".format(lines[i])) 87 | f.close() 88 | print("[PiCamNN] index.html UPDATED") 89 | return True 90 | 91 | # Some morphological operations on two input frames to check for movements 92 | def movement(mat_1,mat_2): 93 | mat_1_gray = cv2.cvtColor(mat_1.copy(),cv2.COLOR_BGR2GRAY) 94 | mat_1_gray = cv2.blur(mat_1_gray,(blur1,blur1)) 95 | _,mat_1_gray = cv2.threshold(mat_1_gray,100,255,0) 96 | mat_2_gray = cv2.cvtColor(mat_2.copy(),cv2.COLOR_BGR2GRAY) 97 | mat_2_gray = cv2.blur(mat_2_gray,(blur1,blur1)) 98 | _,mat_2_gray = cv2.threshold(mat_2_gray,100,255,0) 99 | mat_2_gray = cv2.bitwise_xor(mat_1_gray,mat_2_gray) 100 | mat_2_gray = cv2.blur(mat_2_gray,(blur2,blur2)) 101 | _,mat_2_gray = cv2.threshold(mat_2_gray,70,255,0) 102 | mat_2_gray = cv2.erode(mat_2_gray,np.ones((erodeval,erodeval))) 103 | mat_2_gray = cv2.dilate(mat_2_gray,np.ones((4,4))) 104 | _, contours,__ = cv2.findContours(mat_2_gray,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) 105 | if len(contours) > 0:return True #If there were any movements 106 | return False #if not 107 | 108 | 109 | #Pedestrian Recognition Thread 110 | def yoloThread(): 111 | global frames,times 112 | model_path = scriptFolder+"tiny.h5" #Model weights 113 | sess = K.get_session() 114 | print("[PiCam] Loading anchors file...") 115 | anchors = [1.08,1.19,3.42,4.41,6.63,11.38,9.42,5.11,16.62,10.52] #Tiny Yolo anchors' values 116 | anchors = np.array(anchors).reshape(-1, 2) 117 | print("[PiCam] Loading yolo model ({})...".format(scriptFolder+"tiny.h5")) 118 | yolo_model = load_model(model_path) #Loading Tiny YOLO 119 | num_anchors = len(anchors) 120 | print('[PiCam] YOLO model loaded !'.format(model_path)) 121 | 122 | model_image_size = yolo_model.layers[0].input_shape[1:3] #Get input shape 123 | yolo_outputs = yolo_head(yolo_model.output, anchors, 20) 124 | input_image_shape = K.placeholder(shape=(2, )) 125 | boxes, scores, classes = yolo_eval(yolo_outputs,input_image_shape,score_threshold=0.3,iou_threshold=0.4) 126 | num = 0 #Starting Photo's name 127 | old_time = 0.0 #Latest time 128 | 129 | print("[PiCam] YOLO Thread started!") 130 | ### Loop: 131 | while True: 132 | if len(frames) != 0: 133 | try: 134 | cv2.waitKey(17) 135 | mat = frames[0] #Get First frame with movements 136 | mat = cv2.resize(mat,(model_image_size[0],model_image_size[1])) 137 | in_mat = np.array(mat,dtype='float32') 138 | in_mat /= 255. #Removing mean 139 | in_mat = np.expand_dims(in_mat, 0) 140 | if (times[0] - old_time) > time_chunck: 141 | #Searching for detection: 142 | out_boxes, out_scores, out_classes = sess.run([boxes, scores, classes],feed_dict={yolo_model.input: in_mat,input_image_shape: [mat.shape[1], mat.shape[0]],K.learning_phase(): 0}) 143 | if len(out_boxes) > 0: 144 | writ = False 145 | xs,ys = [],[] #X's and Y's coordinate 146 | for i, c in reversed(list(enumerate(out_classes))): 147 | if c == 14: #14 is the label for persons 148 | writ = True 149 | box = out_boxes[i] 150 | top, left, bottom, right = box 151 | top = max(0, np.floor(top + 0.5).astype('int32')) 152 | left = max(0, np.floor(left + 0.5).astype('int32')) 153 | bottom = min(mat.shape[1], np.floor(bottom + 0.5).astype('int32')) 154 | right = min(mat.shape[0], np.floor(right + 0.5).astype('int32')) 155 | xs.append(left+i) 156 | xs.append(right-i) 157 | ys.append(top+i) 158 | ys.append(bottom-i) 159 | if writ: 160 | img_name = scriptFolder+"imgs/{}.png".format(num) 161 | cv2.imwrite(img_name,mat[min(ys):max(ys),min(xs):max(xs)]) #Only saving the rectangle in which persons' got detected 162 | out_s = "[{}] Detected person (taken {}s)!\n".format(time.strftime("%H:%M:%S"),round(time.time()-times[0])) #Log output 163 | print(out_s) 164 | flog.write(out_s) 165 | flog.flush() 166 | try: #Preventig Problems like no connection #I've used subprocess to set a timeout 167 | subprocess.call("telegram-cli -W -e \'send_photo {} {} \' ".format(telegram_user,img_name),timeout=30,shell=True) 168 | except Exception as exc: 169 | print("[PiCam] Some error occured in YOLO Thread ({}) :".format(time.strftime("%H:%M:%S")),exc) 170 | num += 1 171 | old_time = times[0] #Updating detection time 172 | except Exception as ex: 173 | print("[PiCam] Some error occured in YOLO Thread ({}) :".format(time.strftime("%H:%M:%S")),ex) 174 | del times[0] #Deleting first Detection time 175 | del frames[0] #Deleting first Frame 176 | cv2.waitKey(50) 177 | 178 | ''' 179 | Main code from here : 180 | ''' 181 | 182 | if __name__ == "__main__": 183 | print("Starting PiCam....") 184 | name = time.strftime("%c").replace(" ","_")[0:10] 185 | #Camera Input 186 | cap = None 187 | try: 188 | cap = cv2.VideoCapture(num_cam) #Trying to open camera 189 | _,dim = cap.read() 190 | if not _ or dim.shape == (0,0,0) : 191 | printExit("[PiCam] Error occured when opening the camera stream!") 192 | h = dim.shape[0] 193 | w = dim.shape[1] 194 | print("[PiCam] Height:{} | Width:{}".format(h,w)) 195 | except: 196 | printExit("[PiCam] Error occured when opening the camera stream!") 197 | #Video Output 198 | writer = None 199 | err = "[PiCam] Error occured when opening the output video stream!" 200 | load = handleFile(name) #Updating web_file 201 | if not load:system("mv {}.avi {}_.avi".format(baseFolder+name,baseFolder+name)) 202 | writer = cv2.VideoWriter(baseFolder+name+".avi", cv2.VideoWriter_fourcc(*"MJPG"), 21,(w,h), True) 203 | if not writer.isOpened(): 204 | printExit(err) #If output Stream is unavailable 205 | #Loading video file of the same day: 206 | if not load: 207 | try: 208 | print("[PiCam] Loading old video File...",end="") 209 | read = cv2.VideoCapture(baseFolder+name+"_.avi") 210 | _,mat = read.read() 211 | while _ : 212 | if mat.shape == (0,0,0) or mat.shape[0] != h or mat.shape[1] != w: 213 | print("[PiCam] Couldn't load old file skipping(shape {})...!".format(mat.shape)) 214 | break 215 | writer.write(mat) 216 | _,mat = read.read() 217 | del read,mat 218 | print("loaded!") 219 | except: 220 | print("\n[PiCam] Couldn't load old file skipping...!") 221 | system("rm {}_.avi".format(baseFolder+name)) #Removing old video file 222 | 223 | #Starting Yolo thread 224 | yoloThread = threading.Thread(target=yoloThread) 225 | print("[PiCam] Starting Yolo Thread....") 226 | yoloThread.start() 227 | 228 | 229 | day = time.strftime("%d") #startin' day 230 | frc = 0 #Frame count 231 | #Loop's start 232 | print("[PiCam] Starting main loop...") 233 | while True: 234 | try: 235 | _,a = cap.read() 236 | if a.shape == (0,0,0): 237 | #If Frame was empty trying frame_check times and then breaking program 238 | for i in range(frame_check): 239 | _,a = cap.read() 240 | if a.shape != (0,0,0):break 241 | if i == frame_check-1:printExit("[PiCam] Error with camera stream!") 242 | 243 | cv2.waitKey(33) 244 | _,b = cap.read() 245 | move = movement(a,b) 246 | 247 | #if we have got a movement 248 | if move: 249 | print("[PiCam] Movement ({})".format(time.strftime("%H:%M:%S"))) 250 | if frc % 2 == 0: #Only each two frames with movement are passed to YOLO Thread 251 | frames.append(b.copy()) #Frame with movement 252 | times.append(time.time())#Detection Time 253 | frc += 1 #FrameCount +1 254 | cv2.putText(b,name.replace("_"," ")+" "+time.strftime("%H:%M:%S"),(50,h - 50),cv2.FONT_HERSHEY_SIMPLEX, 2,(255,255,255)) 255 | writer.write(cv2.resize(b,(w,h))) #Adding to File 256 | 257 | if time.strftime("%d") != day: 258 | writer.release() #Closing old video output 259 | frc = 0 260 | print("[PiCam] Cleaning imgs dir...") 261 | system("rm {}".format(scriptFolder+"imgs/*")) 262 | #Handling FLOG: 263 | flog.close() 264 | system("echo '### PiCam Live Logs###' > {}".format(baseFolder+"logs")) #Cleaning Logs file 265 | flog = open(baseFolder+"logs","a") 266 | #FLOG end 267 | print("[PiCam] New day! Restarting video output....") 268 | name = time.strftime("%c").replace(" ","_")[0:10] 269 | writer = cv2.VideoWriter(baseFolder+name+".avi", cv2.VideoWriter_fourcc(*"MJPG"), 21,(w,h), True) 270 | print("[PiCam] Updating index.html...") 271 | handleFile(name) 272 | day = time.strftime("%d") 273 | print("[PiCam] Done! Resuming main loop...") 274 | except Exception as ex: 275 | print("Some error occured : ",ex) 276 | sys.exit(-1) 277 | -------------------------------------------------------------------------------- /yad2k/about: -------------------------------------------------------------------------------- 1 | This code has been created by allanzelener to visit the original project page go to https://github.com/allanzelener/YAD2K. 2 | -------------------------------------------------------------------------------- /yad2k/models/keras_darknet19.py: -------------------------------------------------------------------------------- 1 | """Darknet19 Model Defined in Keras.""" 2 | import functools 3 | from functools import partial 4 | 5 | from keras.layers import Convolution2D, MaxPooling2D 6 | from keras.layers.advanced_activations import LeakyReLU 7 | from keras.layers.normalization import BatchNormalization 8 | from keras.models import Model 9 | from keras.regularizers import l2 10 | 11 | from ..utils import compose 12 | 13 | # Partial wrapper for Convolution2D with static default argument. 14 | _DarknetConv2D = partial(Convolution2D, border_mode='same') 15 | 16 | 17 | @functools.wraps(Convolution2D) 18 | def DarknetConv2D(*args, **kwargs): 19 | """Wrapper to set Darknet weight regularizer for Convolution2D.""" 20 | darknet_conv_kwargs = {'W_regularizer': l2(5e-4)} 21 | darknet_conv_kwargs.update(kwargs) 22 | return _DarknetConv2D(*args, **darknet_conv_kwargs) 23 | 24 | 25 | def DarknetConv2D_BN_Leaky(*args, **kwargs): 26 | """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" 27 | return compose( 28 | DarknetConv2D(*args, **kwargs), 29 | BatchNormalization(), 30 | LeakyReLU(alpha=0.1)) 31 | 32 | 33 | def bottleneck_block(nb_outer, nb_bottleneck): 34 | """Bottleneck block of 3x3, 1x1, 3x3 convolutions.""" 35 | return compose( 36 | DarknetConv2D_BN_Leaky(nb_outer, 3, 3), 37 | DarknetConv2D_BN_Leaky(nb_bottleneck, 1, 1), 38 | DarknetConv2D_BN_Leaky(nb_outer, 3, 3)) 39 | 40 | 41 | def bottleneck_x2_block(nb_outer, nb_bottleneck): 42 | """Bottleneck block of 3x3, 1x1, 3x3, 1x1, 3x3 convolutions.""" 43 | return compose( 44 | bottleneck_block(nb_outer, nb_bottleneck), 45 | DarknetConv2D_BN_Leaky(nb_bottleneck, 1, 1), 46 | DarknetConv2D_BN_Leaky(nb_outer, 3, 3)) 47 | 48 | 49 | def darknet_body(): 50 | """Generate first 18 conv layers of Darknet-19.""" 51 | return compose( 52 | DarknetConv2D_BN_Leaky(32, 3, 3), 53 | MaxPooling2D(), 54 | DarknetConv2D_BN_Leaky(64, 3, 3), 55 | MaxPooling2D(), 56 | bottleneck_block(128, 64), 57 | MaxPooling2D(), 58 | bottleneck_block(256, 128), 59 | MaxPooling2D(), 60 | bottleneck_x2_block(512, 256), 61 | MaxPooling2D(), 62 | bottleneck_x2_block(1024, 512)) 63 | 64 | 65 | def darknet19(inputs): 66 | """Generate Darknet-19 model for Imagenet classification.""" 67 | body = darknet_body()(inputs) 68 | logits = DarknetConv2D(1000, 1, 1)(body) 69 | return Model(inputs, logits) 70 | -------------------------------------------------------------------------------- /yad2k/models/keras_yolo.py: -------------------------------------------------------------------------------- 1 | """YOLO_v2 Model Defined in Keras.""" 2 | import sys 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | from keras import backend as K 7 | from keras.layers import Lambda, Reshape, merge 8 | from keras.models import Model 9 | 10 | from ..utils import compose 11 | from .keras_darknet19 import (DarknetConv2D, DarknetConv2D_BN_Leaky, 12 | darknet_body) 13 | 14 | sys.path.append('..') 15 | 16 | voc_anchors = np.array( 17 | [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]]) 18 | 19 | voc_classes = [ 20 | "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", 21 | "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", 22 | "pottedplant", "sheep", "sofa", "train", "tvmonitor" 23 | ] 24 | 25 | 26 | def space_to_depth_x2(x): 27 | """Thin wrapper for Tensorflow space_to_depth with block_size=2.""" 28 | # Import currently required to make Lambda work. 29 | # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273 30 | import tensorflow as tf 31 | return tf.space_to_depth(x, block_size=2) 32 | 33 | 34 | def space_to_depth_x2_output_shape(input_shape): 35 | """Determine space_to_depth output shape for block_size=2. 36 | 37 | Note: For Lambda with TensorFlow backend, output shape may not be needed. 38 | """ 39 | return (input_shape[0], input_shape[1] // 2, input_shape[2] // 2, 4 * 40 | input_shape[3]) if input_shape[1] else (input_shape[0], None, None, 41 | 4 * input_shape[3]) 42 | 43 | 44 | def yolo_body(inputs, num_anchors, num_classes): 45 | """Create YOLO_V2 model CNN body in Keras.""" 46 | darknet = Model(inputs, darknet_body()(inputs)) 47 | conv13 = darknet.get_layer('batchnormalization_13').output 48 | conv20 = compose( 49 | DarknetConv2D_BN_Leaky(1024, 3, 3), 50 | DarknetConv2D_BN_Leaky(1024, 3, 3))(darknet.output) 51 | 52 | # TODO: Allow Keras Lambda to use func arguments for output_shape? 53 | conv13_reshaped = Lambda( 54 | space_to_depth_x2, 55 | output_shape=space_to_depth_x2_output_shape, 56 | name='space_to_depth')(conv13) 57 | 58 | # Concat conv13 with conv20. 59 | x = merge([conv13_reshaped, conv20], mode='concat') 60 | x = DarknetConv2D_BN_Leaky(1024, 3, 3)(x) 61 | x = DarknetConv2D(num_anchors * (num_classes + 5), 1, 1)(x) 62 | return Model(inputs, x) 63 | 64 | 65 | def yolo_head(feats, anchors, num_classes): 66 | """Convert final layer features to bounding box parameters. 67 | 68 | Parameters 69 | ---------- 70 | feats : tensor 71 | Final convolutional layer features. 72 | anchors : array-like 73 | Anchor box widths and heights. 74 | num_classes : int 75 | Number of target classes. 76 | 77 | Returns 78 | ------- 79 | box_xy : tensor 80 | x, y box predictions adjusted by spatial location in conv layer. 81 | box_wh : tensor 82 | w, h box predictions adjusted by anchors and conv spatial resolution. 83 | box_conf : tensor 84 | Probability estimate for whether each box contains any object. 85 | box_class_pred : tensor 86 | Probability distribution estimate for each box over class labels. 87 | """ 88 | num_anchors = len(anchors) 89 | # Reshape to batch, height, width, num_anchors, box_params. 90 | anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) 91 | 92 | # Static implementation for fixed models. 93 | # TODO: Remove or add option for static implementation. 94 | # _, conv_height, conv_width, _ = K.int_shape(feats) 95 | # conv_dims = K.variable([conv_width, conv_height]) 96 | 97 | # Dynamic implementation of conv dims for fully convolutional model. 98 | conv_dims = K.shape(feats)[1:3] # assuming channels last 99 | # In YOLO the height index is the inner most iteration. 100 | conv_height_index = K.arange(0, stop=conv_dims[0]) 101 | conv_width_index = K.arange(0, stop=conv_dims[1]) 102 | conv_height_index = K.tile(conv_height_index, [conv_dims[0]]) 103 | 104 | # TODO: Repeat_elements and tf.split doesn't support dynamic splits. 105 | # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) 106 | conv_width_index = K.tile( 107 | K.expand_dims(conv_width_index, 0), [conv_dims[1], 1]) 108 | conv_width_index = K.flatten(K.transpose(conv_width_index)) 109 | conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) 110 | conv_index = K.reshape(conv_index, [conv_dims[0], conv_dims[1], 2]) 111 | conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) 112 | conv_index = K.cast(conv_index, K.dtype(feats)) 113 | 114 | feats = K.reshape( 115 | feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) 116 | conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) 117 | 118 | # Static generation of conv_index: 119 | # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) 120 | # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. 121 | # conv_index = K.variable( 122 | # conv_index.reshape(1, conv_height, conv_width, 1, 2)) 123 | # feats = Reshape( 124 | # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) 125 | 126 | box_xy = K.sigmoid(feats[..., :2]) 127 | box_wh = K.exp(feats[..., 2:4]) 128 | box_confidence = K.sigmoid(feats[..., 4:5]) 129 | box_class_probs = K.softmax(feats[..., 5:]) 130 | 131 | # Adjust preditions to each spatial grid point and anchor size. 132 | # Note: YOLO iterates over height index before width index. 133 | box_xy = (box_xy + conv_index) / conv_dims 134 | box_wh = box_wh * anchors_tensor / conv_dims 135 | 136 | return box_xy, box_wh, box_confidence, box_class_probs 137 | 138 | 139 | def yolo_boxes_to_corners(box_xy, box_wh): 140 | """Convert YOLO box predictions to bounding box corners.""" 141 | box_mins = box_xy - (box_wh / 2.) 142 | box_maxes = box_xy + (box_wh / 2.) 143 | 144 | return K.concatenate([ 145 | box_mins[..., 1:2], # y_min 146 | box_mins[..., 0:1], # x_min 147 | box_maxes[..., 1:2], # y_max 148 | box_maxes[..., 0:1] # x_max 149 | ]) 150 | 151 | 152 | def yolo(inputs, anchors, num_classes): 153 | """Generate a complete YOLO_v2 localization model.""" 154 | num_anchors = len(anchors) 155 | body = yolo_body(inputs, num_anchors, num_classes) 156 | outputs = yolo_head(body.output, anchors, num_classes) 157 | return outputs 158 | 159 | 160 | def yolo_filter_boxes(boxes, box_confidence, box_class_probs, threshold=.6): 161 | """Filter YOLO boxes based on object and class confidence.""" 162 | box_scores = box_confidence * box_class_probs 163 | box_classes = K.argmax(box_scores, axis=-1) 164 | box_class_scores = K.max(box_scores, axis=-1) 165 | prediction_mask = box_class_scores >= threshold 166 | 167 | # TODO: Expose tf.boolean_mask to Keras backend? 168 | boxes = tf.boolean_mask(boxes, prediction_mask) 169 | scores = tf.boolean_mask(box_class_scores, prediction_mask) 170 | classes = tf.boolean_mask(box_classes, prediction_mask) 171 | return boxes, scores, classes 172 | 173 | 174 | def yolo_eval(yolo_outputs, 175 | image_shape, 176 | max_boxes=10, 177 | score_threshold=.6, 178 | iou_threshold=.5): 179 | """Evaluate YOLO model on given input batch and return filtered boxes.""" 180 | box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs 181 | boxes = yolo_boxes_to_corners(box_xy, box_wh) 182 | boxes, scores, classes = yolo_filter_boxes( 183 | boxes, box_confidence, box_class_probs, threshold=score_threshold) 184 | 185 | # Scale boxes back to original image shape. 186 | height = image_shape[0] 187 | width = image_shape[1] 188 | image_dims = K.stack([height, width, height, width]) 189 | image_dims = K.reshape(image_dims, [1, 4]) 190 | boxes = boxes * image_dims 191 | 192 | # TODO: Something must be done about this ugly hack! 193 | max_boxes_tensor = K.variable(max_boxes, dtype='int32') 194 | K.get_session().run(tf.variables_initializer([max_boxes_tensor])) 195 | nms_index = tf.image.non_max_suppression( 196 | boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) 197 | boxes = K.gather(boxes, nms_index) 198 | scores = K.gather(scores, nms_index) 199 | classes = K.gather(classes, nms_index) 200 | return boxes, scores, classes 201 | -------------------------------------------------------------------------------- /yad2k/utils/utils.py: -------------------------------------------------------------------------------- 1 | """Miscellaneous utility functions.""" 2 | 3 | from functools import reduce 4 | 5 | 6 | def compose(*funcs): 7 | """Compose arbitrarily many functions, evaluated left to right. 8 | 9 | Reference: https://mathieularose.com/function-composition-in-python/ 10 | """ 11 | # return lambda x: reduce(lambda v, f: f(v), funcs, x) 12 | if funcs: 13 | return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs) 14 | else: 15 | raise ValueError('Composition of empty sequence not supported.') 16 | --------------------------------------------------------------------------------