├── .gitignore ├── Makefile ├── README.md ├── create_graph.py ├── data ├── __init__.py ├── datahandler.py └── dog.jpg ├── docs └── Screenshot from 2019-09-06 00-14-44.png ├── images ├── tensorboard.png ├── train_tensorboard.png ├── train_tensorboard2.png └── training.png ├── scripts ├── clean_data.py └── get_coco.sh ├── tensorboard ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.zip 2 | *.tgz 3 | *.jpg 4 | *.txt 5 | __pycache__ 6 | graph 7 | train_graph 8 | data/images 9 | data/labels 10 | 11 | # OS Generated # 12 | .DS_Store* 13 | ehthumbs.db 14 | Icon? 15 | Thumbs.db 16 | *.swp 17 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | create: 2 | python3 ./create_graph.py 3 | train: create 4 | python3 ./train.py 5 | test: 6 | python3 ./test.py -i data/dog.jpg 7 | clean: 8 | rm -r graph 9 | rm -r train_graph 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TINY YOLO v3 model in tensorflow 2 | 3 | ![alt text](https://raw.githubusercontent.com/khanhhhh/tiny-yolo-tensorflow/master/docs/Screenshot%20from%202019-09-06%2000-14-44.png) 4 | 5 | Some notes in extracting weights from darknet to tensorflow in Notes.txt 6 | 7 | # DEPENDENCIES 8 | 9 | `numpy` 10 | `tensorflow` 11 | `opencv` 12 | 13 | # USAGE 14 | 15 | `make create` 16 | 17 | `make train` 18 | 19 | `make test` 20 | 21 | # yolov3-tiny from darknet 22 | ``` 23 | layer filters size input output 24 | 0 conv 16 3 x 3 / 1 416 x 416 x 3 -> 416 x 416 x 16 0.150 BFLOPs 25 | 1 max 2 x 2 / 2 416 x 416 x 16 -> 208 x 208 x 16 26 | 2 conv 32 3 x 3 / 1 208 x 208 x 16 -> 208 x 208 x 32 0.399 BFLOPs 27 | 3 max 2 x 2 / 2 208 x 208 x 32 -> 104 x 104 x 32 28 | 4 conv 64 3 x 3 / 1 104 x 104 x 32 -> 104 x 104 x 64 0.399 BFLOPs 29 | 5 max 2 x 2 / 2 104 x 104 x 64 -> 52 x 52 x 64 30 | 6 conv 128 3 x 3 / 1 52 x 52 x 64 -> 52 x 52 x 128 0.399 BFLOPs 31 | 7 max 2 x 2 / 2 52 x 52 x 128 -> 26 x 26 x 128 32 | 8 conv 256 3 x 3 / 1 26 x 26 x 128 -> 26 x 26 x 256 0.399 BFLOPs 33 | 9 max 2 x 2 / 2 26 x 26 x 256 -> 13 x 13 x 256 34 | 10 conv 512 3 x 3 / 1 13 x 13 x 256 -> 13 x 13 x 512 0.399 BFLOPs 35 | 11 max 2 x 2 / 1 13 x 13 x 512 -> 13 x 13 x 512 36 | 12 conv 1024 3 x 3 / 1 13 x 13 x 512 -> 13 x 13 x1024 1.595 BFLOPs 37 | 13 conv 256 1 x 1 / 1 13 x 13 x1024 -> 13 x 13 x 256 0.089 BFLOPs 38 | 14 conv 512 3 x 3 / 1 13 x 13 x 256 -> 13 x 13 x 512 0.399 BFLOPs 39 | 15 conv 255 1 x 1 / 1 13 x 13 x 512 -> 13 x 13 x 255 0.044 BFLOPs 40 | 16 yolo 41 | 17 route 13 42 | 18 conv 128 1 x 1 / 1 13 x 13 x 256 -> 13 x 13 x 128 0.011 BFLOPs 43 | 19 upsample 2x 13 x 13 x 128 -> 26 x 26 x 128 44 | 20 route 19 8 45 | 21 conv 256 3 x 3 / 1 26 x 26 x 384 -> 26 x 26 x 256 1.196 BFLOPs 46 | 22 conv 255 1 x 1 / 1 26 x 26 x 256 -> 26 x 26 x 255 0.088 BFLOPs 47 | 23 yolo 48 | ``` 49 | 50 | tensorflow model 51 | ![alt text](https://raw.githubusercontent.com/khanh1412/tiny-yolo-tensorflow/master/images/training.png) 52 | ![alt text](https://raw.githubusercontent.com/khanh1412/tiny-yolo-tensorflow/master/images/tensorboard.png) 53 | ![alt text](https://raw.githubusercontent.com/khanh1412/tiny-yolo-tensorflow/master/images/train_tensorboard.png) 54 | ![alt text](https://raw.githubusercontent.com/khanh1412/tiny-yolo-tensorflow/master/images/train_tensorboard2.png) 55 | -------------------------------------------------------------------------------- /create_graph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import tensorflow as tf 3 | import numpy as np 4 | import sys 5 | import os 6 | import shutil 7 | import pdb 8 | 9 | g1 = tf.Graph() 10 | 11 | with g1.as_default() as g: 12 | with g.name_scope("YOLO"): 13 | def drop(n, in_name, keep_prob = 1): 14 | in_tensor = g.get_tensor_by_name(in_name) 15 | with g.name_scope("drop_{}".format(n)): 16 | drop = tf.nn.dropout(in_tensor, keep_prob) 17 | drop = tf.identity(drop, name = "out") 18 | return drop 19 | 20 | def conv(n, in_name, out_channels, kernel_size, stride, nonlin="relu", batchnorm=1): 21 | in_tensor = g.get_tensor_by_name(in_name) 22 | batch_size, height, width, in_channels = in_tensor.get_shape().as_list() 23 | with g.name_scope("conv_{}".format(n)): 24 | kernel = tf.Variable(tf.random_uniform(shape = [kernel_size, kernel_size, in_channels, out_channels])/ (kernel_size*kernel_size*out_channels) , dtype = tf.float32, name = "kernel") 25 | scale = tf.Variable(tf.random_normal(shape = [1, 1, 1, out_channels]), dtype = tf.float32, name = "scales") 26 | bias = tf.Variable(tf.random_normal(shape = [1, 1, 1, out_channels]), dtype = tf.float32, name = "biases") 27 | ''' 28 | conv 29 | batchnorm + bias + scale 30 | nonlin 31 | ''' 32 | strides = (1, stride, stride, 1) 33 | conv = tf.nn.conv2d(in_tensor, kernel, strides, padding="SAME", name = "conv") 34 | if (batchnorm): 35 | mean_conv, var_conv = tf.nn.moments(conv, axes = [1,2,3], keep_dims = True) 36 | batchnorm = tf.nn.batch_normalization(conv, mean_conv, var_conv, bias, scale, 1e-100, name = "batchnorm") 37 | else: 38 | batchnorm = tf.add(conv, bias, name = "batchnorm") 39 | if nonlin == "relu": 40 | nonlin = tf.nn.leaky_relu(batchnorm) 41 | elif nonlin == "sigmoid": 42 | nonlin = tf.sigmoid(batchnorm) 43 | elif nonlin == "linear": 44 | nonlin = tf.identity(batchnorm) 45 | else: 46 | raise Exception(" \"{}\" is not a nonlinear function!".format(nonlin)) 47 | conv = tf.identity(nonlin, name = "out") 48 | return conv 49 | 50 | def maxpool(n, in_name, kernel_size, stride): 51 | in_tensor = g.get_tensor_by_name(in_name) 52 | batch_size, height, width, in_channels = in_tensor.get_shape().as_list() 53 | with g.name_scope("maxpool_{}".format(n)): 54 | ksize = [1, kernel_size, kernel_size, 1] 55 | strides = [1, stride, stride, 1] 56 | ''' 57 | maxpool 58 | ''' 59 | maxpool = tf.nn.max_pool(in_tensor, ksize, strides, padding="SAME") 60 | maxpool = tf.identity(maxpool, name = "out") 61 | return maxpool 62 | 63 | def route(n, n1_name, n2_name): 64 | 65 | if (n2_name==None): 66 | n1 = g.get_tensor_by_name(n1_name) 67 | route = tf.identity(n1) 68 | else: 69 | n1 = g.get_tensor_by_name(n1_name) 70 | n2 = g.get_tensor_by_name(n2_name) 71 | route = tf.concat([n1, n2], 3) 72 | with g.name_scope("route_{}".format(n)): 73 | route = tf.identity(route, name = "out") 74 | return route 75 | 76 | def upsample(n, in_name, stride): 77 | in_tensor = g.get_tensor_by_name(in_name) 78 | batch_size, height, width, in_channels = in_tensor.get_shape().as_list() 79 | out_channels = in_channels 80 | with g.name_scope("upsample_{}".format(n)): 81 | kernel = tf.ones([stride, stride, in_channels, out_channels], name = "kernel") 82 | output_shape = [batch_size, stride*height, stride*width, in_channels] 83 | strides = [1, stride, stride, 1] 84 | padding = "SAME" 85 | unsample = tf.nn.conv2d_transpose(in_tensor, kernel, output_shape, strides, name = "out") 86 | return unsample 87 | 88 | def yolo(n, in_name, anchor, thresh=0.5):#in tensor has shape (batch_size, height, width, 255) 89 | in_tensor = g.get_tensor_by_name(in_name) 90 | batch_size, height, width, in_channels = in_tensor.get_shape().as_list() 91 | split = tf.split(in_tensor, 3, axis = 3) 92 | new_split = [] 93 | offset_x_np = np.zeros((batch_size, height, width, 1)) 94 | for i in range(width): 95 | offset_x_np[:, :, i, :] = i/width 96 | offset_y_np = np.zeros((batch_size, height, width, 1)) 97 | for i in range(height): 98 | offset_y_np[:, i, :, :] = i/height 99 | offset_x = tf.constant(offset_x_np, dtype = tf.float32) 100 | offset_y = tf.constant(offset_y_np, dtype = tf.float32) 101 | 102 | for i in range(3): 103 | o = split[i][:, :, :, 0:1] 104 | o = tf.sigmoid(o) 105 | x = split[i][:, :, :, 1:2] 106 | x = tf.sigmoid(x)/width + offset_x 107 | y = split[i][:, :, :, 2:3] 108 | y = tf.sigmoid(y)/height + offset_y 109 | wh = split[i][:, :, :, 3:5] 110 | wh = tf.constant(anchor[i], dtype = tf.float32) * tf.exp(wh) 111 | c = split[i][:, :, :, 5: ] 112 | c = tf.sigmoid(c) 113 | new_split.append(o) 114 | new_split.append(x) 115 | new_split.append(y) 116 | new_split.append(wh) 117 | new_split.append(c) 118 | #obj,x,y,w,h,classes 119 | 120 | with g.name_scope("yolo_{}".format(n)): 121 | yolo = tf.concat(new_split, 3, name = "out") 122 | return yolo 123 | 124 | height = 416 125 | width = 416 126 | anchor1 = ((344,319), (135,169), (81,82)) 127 | anchor2 = ((37,58), (23,27), (10,14)) 128 | classes = 80 129 | batch_size = 32 130 | image_depth = 3 131 | 132 | out_height = height//32 133 | out_width = width//32 134 | out_depth = 3*(5 + classes) 135 | 136 | X = tf.placeholder(shape = (batch_size, height, width, image_depth), dtype = tf.float32, name = "input") 137 | dropout = tf.placeholder(shape = (), dtype = tf.float32, name = "dropout") 138 | #0 139 | conv_0 = conv(0, "YOLO/input:0", 16, 3, 1) 140 | #1 141 | maxpool(1, "YOLO/conv_0/out:0", 2, 2) 142 | #2 143 | conv(2, "YOLO/maxpool_1/out:0", 32, 3, 1) 144 | #3 145 | maxpool(3, "YOLO/conv_2/out:0", 2, 2) 146 | #4 147 | conv(4, "YOLO/maxpool_3/out:0", 64, 3, 1) 148 | #5 149 | maxpool(5, "YOLO/conv_4/out:0", 2, 2) 150 | #6 151 | conv(6, "YOLO/maxpool_5/out:0", 128, 3, 1) 152 | #7 153 | maxpool(7, "YOLO/conv_6/out:0", 2, 2) 154 | #8 155 | conv(8, "YOLO/maxpool_7/out:0", 256, 3, 1) 156 | #9 157 | maxpool(9, "YOLO/conv_8/out:0", 2, 2) 158 | #10 159 | conv(10, "YOLO/maxpool_9/out:0", 512, 3, 1) 160 | #11 161 | maxpool(11, "YOLO/conv_10/out:0", 2, 1) 162 | #12 163 | conv(12, "YOLO/maxpool_11/out:0", 1024, 3, 1) 164 | #13 165 | conv(13, "YOLO/conv_12/out:0", 256, 1, 1) 166 | #14 167 | conv(14, "YOLO/conv_13/out:0", 512, 3, 1) 168 | drop(14, "YOLO/conv_14/out:0", dropout) 169 | #15 170 | conv(15, "YOLO/drop_14/out:0", 255, 1, 1, nonlin = "linear", batchnorm=0) 171 | #16 172 | yolo(16, "YOLO/conv_15/out:0", anchor1) 173 | #17 174 | route(17, "YOLO/conv_13/out:0", None) 175 | #18 176 | conv(18, "YOLO/route_17/out:0", 128, 1, 1) 177 | drop(18, "YOLO/conv_18/out:0", dropout) 178 | #19 179 | upsample(19, "YOLO/drop_18/out:0", 2) 180 | #20 181 | route(20, "YOLO/upsample_19/out:0", "YOLO/conv_8/out:0") 182 | #21 183 | conv(21, "YOLO/route_20/out:0", 256, 3, 1) 184 | #22 185 | conv(22, "YOLO/conv_21/out:0", 255, 1, 1, nonlin = "linear", batchnorm=0) 186 | #23 187 | yolo(23, "YOLO/conv_22/out:0", anchor2) 188 | 189 | h1 = tf.identity(g.get_tensor_by_name("YOLO/yolo_16/out:0"), "output1") 190 | h2 = tf.identity(g.get_tensor_by_name("YOLO/yolo_23/out:0"), "output2") 191 | 192 | if os.path.exists("./graph"): 193 | shutil.rmtree("./graph") 194 | os.mkdir("./graph") 195 | 196 | tf.summary.FileWriter("./graph", g) 197 | 198 | with tf.Session(graph = g) as sess: 199 | sess.run(tf.global_variables_initializer()) 200 | saver = tf.train.Saver() 201 | saver.save(sess, "./graph/tiny-yolo.ckpt") 202 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/data/__init__.py -------------------------------------------------------------------------------- /data/datahandler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sh 3 | import os 4 | import random 5 | import numpy as np 6 | import cv2 7 | 8 | data_path = "./data" 9 | 10 | images_path = os.path.join(data_path, "images") 11 | labels_path = os.path.join(data_path, "labels") 12 | 13 | images_list = os.listdir(images_path) 14 | 15 | def create(input_size, flip=1, crop=0.9, angle=10, color = 0.05): 16 | image_name = random.choice(images_list) 17 | image_path = os.path.join(images_path, image_name) 18 | label_name = image_name.split(".")[0] + ".txt" 19 | label_path = os.path.join(labels_path, label_name) 20 | 21 | #image 22 | im = cv2.imread(image_path).astype(np.float) 23 | h, w, _ = im.shape 24 | 25 | #rotate 26 | rot = random.uniform(-angle, +angle) 27 | M = cv2.getRotationMatrix2D((w/2, h/2), rot, 1) 28 | im = cv2.warpAffine(im, M, (w, h)) 29 | 30 | #crop 31 | size = int(min(w, h) * random.uniform(crop, 1)) 32 | x_min = int(random.uniform(0, w - size)) 33 | y_min = int(random.uniform(0, h - size)) 34 | x_max = x_min + size 35 | y_max = y_min + size 36 | im = im[y_min:y_max, x_min:x_max, :] 37 | 38 | #flip 39 | fl = random.random() < 0.5 40 | if fl: 41 | im = cv2.flip(im, 1) 42 | 43 | #color 44 | red = random.uniform(1-color, 1+color) 45 | blu = random.uniform(1-color, 1+color) 46 | gre = random.uniform(1-color, 1+color) 47 | 48 | col = np.array([blu, gre, red]) 49 | im = im*col 50 | im[im<0] = 0 51 | im[im>255] = 255 52 | #resize to inputsize 53 | image = cv2.resize(im, (input_size, input_size), interpolation = cv2.INTER_CUBIC) 54 | image = image.reshape((1, input_size, input_size, 3)) 55 | 56 | #label 57 | 58 | label = [] 59 | if os.path.exists(label_path): 60 | with open(label_path, "r") as f: 61 | labeltxt = f.read() 62 | for objtxt in labeltxt.split("\n"): 63 | if objtxt == "": continue 64 | cls, x0, y0, w0, h0, _ = objtxt.split(" ") 65 | cls = int(cls) 66 | x0 = float(x0) 67 | y0 = float(y0) 68 | w0 = float(w0) 69 | h0 = float(h0) 70 | #convert back 71 | 72 | #rotate 73 | rot = np.deg2rad(rot) 74 | M = np.array([[np.cos(rot), np.sin(rot)], [-np.sin(rot), np.cos(rot)]]) 75 | x0, y0 = 0.5+np.matmul(M, np.array([x0-0.5, y0-0.5])) 76 | #w0 h0 remain 77 | 78 | #crop 79 | if x0 < x_min/w or x0 > x_max/w or y0 < y_min/h or y0 > y_max/h: continue 80 | x0 = (x0*w - x_min)/size 81 | y0 = (y0*h - y_min)/size 82 | w0 = w0*w/size 83 | h0 = h0*h/size 84 | 85 | #flip 86 | if fl: 87 | x0 = 1-x0 88 | 89 | label.append((cls, x0, y0, w0, h0)) 90 | return image, label 91 | 92 | def IoU(box1, box2): 93 | w1, h1 = box1 94 | w2, h2 = box2 95 | iou = min(w1, w2) * min(h1, h2) 96 | return iou 97 | 98 | def which_anchor(box): 99 | anchor = ((10,14), (23,27), (37,58), (81,82), (135,169), (344,319)) 100 | dist = [] 101 | for i in range(6): 102 | dist.append(IoU(anchor[i], box)) 103 | i = dist.index(max(dist)) 104 | return i 105 | 106 | def create_array(input_size): 107 | image, label = create(input_size) 108 | _, height, width, depth = image.shape 109 | classes = 80 110 | out_height = height//32 111 | out_width = width//32 112 | out_depth = 3*(5+classes) 113 | 114 | X = image 115 | Y1 = np.random.random((1, out_height, out_width, out_depth)) 116 | Y2 = np.random.random((1, 2*out_height, 2*out_width, out_depth)) 117 | for i in range(3): 118 | Y1[:, :, :, i*(out_depth//3)] = 1 119 | Y2[:, :, :, i*(out_depth//3)] = 1 120 | #convert label to array 121 | for obj in label: 122 | cls, x0, y0, w0, h0 = obj 123 | if x0<0 or x0>=1 or y0<0 or y0>=1: continue 124 | box = (w0, h0) 125 | i = which_anchor(box) 126 | if (i<3): #anchor1 127 | x = int(out_width*x0) 128 | y = int(out_height*y0) 129 | Y1[0, y, x, 0+i*(out_depth//3)] = 1 130 | Y1[0, y, x, 1+i*(out_depth//3)] = x0 131 | Y1[0, y, x, 2+i*(out_depth//3)] = y0 132 | Y1[0, y, x, 3+i*(out_depth//3)] = w0 133 | Y1[0, y, x, 4+i*(out_depth//3)] = h0 134 | Y1[0, y, x, 4:(i+1)*(out_depth//3)] = 0 135 | Y1[0, y, x, cls] = 1 136 | else: #anchor2 137 | i = i - 3 138 | x = int(2*out_width*x0) 139 | y = int(2*out_height*y0) 140 | Y2[0, y, x, 0+i*(2*out_depth//3)] = 1 141 | Y2[0, y, x, 1+i*(2*out_depth//3)] = x0 142 | Y2[0, y, x, 2+i*(2*out_depth//3)] = y0 143 | Y2[0, y, x, 3+i*(2*out_depth//3)] = w0 144 | Y2[0, y, x, 4+i*(2*out_depth//3)] = h0 145 | Y2[0, y, x, 4:(i+1)*(2*out_depth//3)] = 0 146 | Y2[0, y, x, cls] = 1 147 | X[X<1e-37] = 1e-37 148 | Y1[Y1<1e-37] = 1e-37 149 | Y2[Y2<1e-37] = 1e-37 150 | 151 | return X, Y1, Y2 152 | 153 | def create_many_arrays(batch_size, input_size): 154 | X = [] 155 | Y1 = [] 156 | Y2 = [] 157 | for i in range(batch_size): 158 | x, y1, y2 = create_array(input_size) 159 | X.append(x) 160 | Y1.append(y1) 161 | Y2.append(y2) 162 | X = np.vstack(X) 163 | Y1 = np.vstack(Y1) 164 | Y2 = np.vstack(Y2) 165 | return X, Y1, Y2 166 | 167 | def shuffle(batch_size, input_size): 168 | step = 0 169 | while (1): 170 | if (step == 0): 171 | yield step, None, None, None 172 | else: 173 | yield step, X, Y1, Y2 174 | del X 175 | del Y1 176 | del Y2 177 | step += 1 178 | X, Y1, Y2 = create_many_arrays(batch_size, input_size) 179 | 180 | if __name__ == "__main__": 181 | image, label = create(416) 182 | image = image.astype(np.int32).reshape(416,416,3) 183 | print(image.shape) 184 | for obj in label: 185 | cls, x0, y0, w0, h0 = obj 186 | x1 = int((x0 - w0/2)*416) 187 | x2 = int((x0 + w0/2)*416) 188 | y1 = int((y0 - h0/2)*416) 189 | y2 = int((y0 + h0/2)*416) 190 | cv2.rectangle(image, (x1, y1), (x2, y2), (0,0,0),2) 191 | 192 | cv2.imwrite("temp.jpg", image) 193 | sh.eog("temp.jpg") 194 | sh.rm("temp.jpg") 195 | -------------------------------------------------------------------------------- /data/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/data/dog.jpg -------------------------------------------------------------------------------- /docs/Screenshot from 2019-09-06 00-14-44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/docs/Screenshot from 2019-09-06 00-14-44.png -------------------------------------------------------------------------------- /images/tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/images/tensorboard.png -------------------------------------------------------------------------------- /images/train_tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/images/train_tensorboard.png -------------------------------------------------------------------------------- /images/train_tensorboard2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/images/train_tensorboard2.png -------------------------------------------------------------------------------- /images/training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/images/training.png -------------------------------------------------------------------------------- /scripts/clean_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | #import sh 4 | data_path = "../data" 5 | 6 | images_path = os.path.join(data_path, "images") 7 | labels_path = os.path.join(data_path, "labels") 8 | 9 | count = 0 10 | import pdb 11 | for image_name in os.listdir(images_path): 12 | 13 | image_path = os.path.join(images_path, image_name) 14 | label_name = image_name.split(".")[0] + ".txt" 15 | label_path = os.path.join(labels_path, label_name) 16 | 17 | if not os.path.exists(label_path): 18 | count += 1 19 | print(count, image_path) 20 | #sh.touch(label_path) 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /scripts/get_coco.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | wget -c https://pjreddie.com/media/files/train2014.zip 3 | wget -c https://pjreddie.com/media/files/coco/labels.tgz 4 | unzip train2014.zip -d ./ 5 | tar xzf labels.tgz -C ./ 6 | mkdir ./data 7 | mv ./labels/train2014 ./data/labels 8 | mv ./train2014 ./data/images 9 | rm -r ./labels 10 | -------------------------------------------------------------------------------- /tensorboard: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | tensorboard --logdir=train_graph 3 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import tensorflow as tf 5 | import numpy as np 6 | import cv2 7 | 8 | 9 | saver = tf.train.import_meta_graph("./train_graph/tiny-yolo-final.ckpt.meta") 10 | 11 | sess = tf.Session() 12 | saver.restore("./train_graph/tiny-yolo-final.ckpt") 13 | g = sess.graph 14 | X = g.get_tensor_by_name("YOLO/input:0") 15 | h = g.get_tensor_by_name("TRAINER/h:0") 16 | 17 | scores = h[:,0] 18 | y1 = h[:,2:3] - h[:,4:5] 19 | x1 = h[:,1:2] - h[:,3:4] 20 | y2 = h[:,2:3] + h[:,4:5] 21 | x2 = h[:,1:2] + h[:,3:4] 22 | boxes = tf.concat([y1,x1,y2,x2], axis=1) 23 | 24 | prediction = tf.image.non_max_suppression(boxes, scores, 10) 25 | 26 | 27 | def detect(im): 28 | Xp = letterbox(im) 29 | return sess.run(prediction, feed_dict = {X:Xp, "YOLO/dropout:0": 1}) 30 | 31 | def letterbox(im, size=416): 32 | h, w, _ = im.shape 33 | im_out = np.zeros(1, size, size, 3) 34 | if h>=w: 35 | new_h, new_w = size, int(size*w/h) 36 | else: 37 | new_h, new_w = int(size*h/w), size 38 | im = cv2.resize(im, (new_w, new_h)).reshape(1, new_h, new_w, 3) 39 | im_out[:, 0:new_h, 0:new_w, 0:3] = im 40 | return im_out 41 | 42 | 43 | def draw(): 44 | pass 45 | 46 | if __name__ == "__main__": 47 | ap = argparse.ArgumentParser() 48 | ap.add_argument("-i", "--input", required=True, help="path to input image") 49 | args = vars(ap.parse_args()) 50 | 51 | image_path = args["input"] 52 | 53 | im = cv2.imread(image_path) 54 | 55 | print(detect(im)) 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ''' 3 | This script will randomize weights and train the tiny yolo from scratch. 4 | ''' 5 | from data.datahandler import shuffle 6 | import tensorflow as tf 7 | import numpy as np 8 | import os 9 | import sys 10 | import shutil 11 | import time 12 | 13 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" 14 | 15 | saver = tf.train.import_meta_graph("./graph/tiny-yolo.ckpt.meta") 16 | with tf.Session() as sess: 17 | saver.restore(sess, "./graph/tiny-yolo.ckpt") 18 | g = sess.graph 19 | with g.name_scope("TRAINER"): 20 | X = g.get_tensor_by_name("YOLO/input:0") 21 | batch_size, height, width, in_channels = X.get_shape().as_list() 22 | classes = 80 23 | out_height = height//32 24 | out_width = width//32 25 | out_channels = 3*(5+classes) 26 | h1 = g.get_tensor_by_name("YOLO/output1:0") 27 | h2 = g.get_tensor_by_name("YOLO/output2:0") 28 | Y1 = tf.placeholder(shape = (batch_size, out_height, out_width, out_channels), dtype = tf.float32, name = "groundtruth1") 29 | Y2 = tf.placeholder(shape = (batch_size, 2*out_height, 2*out_width, out_channels), dtype = tf.float32, name = "groundtruth2") 30 | 31 | #loss 32 | h = [] 33 | Y = [] 34 | 35 | split_h1 = tf.split(h1, 3, axis = 3) 36 | for split in split_h1: 37 | h.append(tf.reshape(split, [batch_size * out_height * out_width, out_channels//3])) 38 | 39 | split_h2 = tf.split(h2, 3, axis = 3) 40 | for split in split_h2: 41 | h.append(tf.reshape(split, [batch_size * 2*out_height * 2*out_width, out_channels//3])) 42 | 43 | split_Y1 = tf.split(Y1, 3, axis = 3) 44 | for split in split_Y1: 45 | Y.append(tf.reshape(split, [batch_size * out_height * out_width, out_channels//3])) 46 | 47 | split_Y2 = tf.split(Y2, 3, axis = 3) 48 | for split in split_Y2: 49 | Y.append(tf.reshape(split, [batch_size * 2*out_height * 2*out_width, out_channels//3])) 50 | 51 | h = tf.concat(h, axis=0, name = "h") 52 | Y = tf.concat(Y, axis=0) 53 | 54 | Lcoord = 1 55 | Lnoobj = 1 56 | loss_xy = Lcoord*tf.reduce_mean(Y[:,0]*((h[:,1] - Y[:,1])**2 + (h[:,2] - Y[:,2])**2)) 57 | loss_wh = Lcoord*tf.reduce_mean(Y[:,0]*((h[:,3]**0.5 - Y[:,3]**0.5)**2+(h[:,4]**0.5 - Y[:,4]**0.5)**2)) 58 | loss_obj = (-1)*tf.reduce_mean(tf.tile(Y[:,0:1], (1, classes))*(Y[:,5:]*tf.log(h[:,5:]) + (1-Y[:,5:])*tf.log(1-h[:,5:]))) 59 | loss_noobj = (-1*Lnoobj)*tf.reduce_mean(tf.tile(1-Y[:,0:1], (1, classes))*(Y[:,5:]*tf.log(h[:,5:]) + (1-Y[:,5:])*tf.log(1-h[:,5:]))) 60 | loss_p = (-1)*tf.reduce_mean(tf.tile(Y[:,0:1], (1, classes))*tf.log((tf.tile(h[:,0:1], (1, classes)) * Y[:,5:])) + (1-tf.tile(Y[:,0:1], (1, classes)))*tf.log(1-(tf.tile(h[:,0:1], (1, classes)) * Y[:,5:]))) 61 | 62 | loss = loss_xy + loss_wh + loss_obj + loss_noobj + loss_p 63 | 64 | optimizer = tf.train.AdamOptimizer(learning_rate = 1e-3) 65 | trainer = optimizer.minimize(loss, name = "trainer") 66 | 67 | if os.path.exists("./train_graph"): 68 | shutil.rmtree("./train_graph") 69 | os.mkdir("./train_graph") 70 | 71 | train_writer = tf.summary.FileWriter("./train_graph", g) 72 | saver = tf.train.Saver() 73 | tf.summary.histogram("loss", loss) 74 | merge = tf.summary.merge_all() 75 | 76 | 77 | 78 | hm_steps = 25000 79 | sess.run(tf.global_variables_initializer()) 80 | 81 | input_size = height 82 | 83 | for batch in shuffle(batch_size, input_size): 84 | step, Xp, Y1p, Y2p = batch 85 | if step == 0: 86 | time.sleep(1) 87 | continue 88 | debugger = tf.logical_or(tf.is_nan(loss), tf.is_inf(loss)) 89 | 90 | while (1): 91 | d, l = sess.run([debugger, loss], feed_dict = {X:Xp, Y1:Y1p, Y2:Y2p, "YOLO/dropout:0" = 0.5}) 92 | if (not d): 93 | break 94 | else: 95 | print("Re-random variables!") 96 | sess.run(tf.global_variables_initializer()) 97 | summary, _ , lossp, lxy, lwh, lobj, lnoobj, lp = sess.run([merge, trainer, loss, loss_xy, loss_wh, loss_obj, loss_noobj, loss_p], feed_dict = {X: Xp, Y1: Y1p, Y2:Y2p, "YOLO/dropout:0" = 0.5}) 98 | 99 | print("""Step {} : loss {} 100 | loss_xy = {} 101 | loss_wh = {} 102 | loss_obj = {} 103 | loss_noobj = {} 104 | loss_p = {}\n""".format(step, lossp, lxy, lwh, lobj, lnoobj, lp), end="\n") 105 | 106 | train_writer.add_summary(summary, step) 107 | 108 | if (step % 2500 ==0): 109 | saver.save(sess, "./train_graph/tiny-yolo-{}.ckpt".format(step)) 110 | if (step>hm_steps): 111 | saver.save(sess, "./train_graph/tiny-yolo-final.ckpt".format(step)) 112 | 113 | 114 | 115 | --------------------------------------------------------------------------------