├── .gitignore
├── Makefile
├── README.md
├── create_graph.py
├── data
    ├── __init__.py
    ├── datahandler.py
    └── dog.jpg
├── docs
    └── Screenshot from 2019-09-06 00-14-44.png
├── images
    ├── tensorboard.png
    ├── train_tensorboard.png
    ├── train_tensorboard2.png
    └── training.png
├── scripts
    ├── clean_data.py
    └── get_coco.sh
├── tensorboard
├── test.py
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.zip
 2 | *.tgz
 3 | *.jpg
 4 | *.txt
 5 | __pycache__
 6 | graph
 7 | train_graph
 8 | data/images
 9 | data/labels
10 | 
11 | # OS Generated #
12 | .DS_Store*
13 | ehthumbs.db
14 | Icon?
15 | Thumbs.db
16 | *.swp
17 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | create:
 2 | 	python3 ./create_graph.py
 3 | train: create
 4 | 	python3 ./train.py
 5 | test:
 6 | 	python3 ./test.py -i data/dog.jpg
 7 | clean:
 8 | 	rm -r graph
 9 | 	rm -r train_graph
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TINY YOLO v3 model in tensorflow
 2 | 
 3 | ![alt text](https://raw.githubusercontent.com/khanhhhh/tiny-yolo-tensorflow/master/docs/Screenshot%20from%202019-09-06%2000-14-44.png)
 4 | 
 5 | Some notes in extracting weights from darknet to tensorflow in Notes.txt
 6 | 
 7 | # DEPENDENCIES
 8 | 
 9 | `numpy`
10 | `tensorflow`
11 | `opencv`
12 | 
13 | # USAGE
14 | 
15 | `make create`
16 | 
17 | `make train`
18 | 
19 | `make test`
20 | 
21 | # yolov3-tiny from darknet
22 | ```
23 | layer     filters    size              input                output
24 |     0 conv     16  3 x 3 / 1   416 x 416 x   3   ->   416 x 416 x  16  0.150 BFLOPs
25 |     1 max          2 x 2 / 2   416 x 416 x  16   ->   208 x 208 x  16
26 |     2 conv     32  3 x 3 / 1   208 x 208 x  16   ->   208 x 208 x  32  0.399 BFLOPs
27 |     3 max          2 x 2 / 2   208 x 208 x  32   ->   104 x 104 x  32
28 |     4 conv     64  3 x 3 / 1   104 x 104 x  32   ->   104 x 104 x  64  0.399 BFLOPs
29 |     5 max          2 x 2 / 2   104 x 104 x  64   ->    52 x  52 x  64
30 |     6 conv    128  3 x 3 / 1    52 x  52 x  64   ->    52 x  52 x 128  0.399 BFLOPs
31 |     7 max          2 x 2 / 2    52 x  52 x 128   ->    26 x  26 x 128
32 |     8 conv    256  3 x 3 / 1    26 x  26 x 128   ->    26 x  26 x 256  0.399 BFLOPs
33 |     9 max          2 x 2 / 2    26 x  26 x 256   ->    13 x  13 x 256
34 |    10 conv    512  3 x 3 / 1    13 x  13 x 256   ->    13 x  13 x 512  0.399 BFLOPs
35 |    11 max          2 x 2 / 1    13 x  13 x 512   ->    13 x  13 x 512
36 |    12 conv   1024  3 x 3 / 1    13 x  13 x 512   ->    13 x  13 x1024  1.595 BFLOPs
37 |    13 conv    256  1 x 1 / 1    13 x  13 x1024   ->    13 x  13 x 256  0.089 BFLOPs
38 |    14 conv    512  3 x 3 / 1    13 x  13 x 256   ->    13 x  13 x 512  0.399 BFLOPs
39 |    15 conv    255  1 x 1 / 1    13 x  13 x 512   ->    13 x  13 x 255  0.044 BFLOPs
40 |    16 yolo
41 |    17 route  13
42 |    18 conv    128  1 x 1 / 1    13 x  13 x 256   ->    13 x  13 x 128  0.011 BFLOPs
43 |    19 upsample            2x    13 x  13 x 128   ->    26 x  26 x 128
44 |    20 route  19 8
45 |    21 conv    256  3 x 3 / 1    26 x  26 x 384   ->    26 x  26 x 256  1.196 BFLOPs
46 |    22 conv    255  1 x 1 / 1    26 x  26 x 256   ->    26 x  26 x 255  0.088 BFLOPs
47 |    23 yolo
48 | ```
49 | 
50 | tensorflow model
51 | ![alt text](https://raw.githubusercontent.com/khanh1412/tiny-yolo-tensorflow/master/images/training.png)
52 | ![alt text](https://raw.githubusercontent.com/khanh1412/tiny-yolo-tensorflow/master/images/tensorboard.png)
53 | ![alt text](https://raw.githubusercontent.com/khanh1412/tiny-yolo-tensorflow/master/images/train_tensorboard.png)
54 | ![alt text](https://raw.githubusercontent.com/khanh1412/tiny-yolo-tensorflow/master/images/train_tensorboard2.png)
55 | 


--------------------------------------------------------------------------------
/create_graph.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import sys
  5 | import os
  6 | import shutil
  7 | import pdb
  8 | 
  9 | g1 = tf.Graph()
 10 | 
 11 | with g1.as_default() as g:
 12 |     with g.name_scope("YOLO"):
 13 |         def drop(n, in_name, keep_prob = 1):
 14 |             in_tensor = g.get_tensor_by_name(in_name)
 15 |             with g.name_scope("drop_{}".format(n)):
 16 |                 drop = tf.nn.dropout(in_tensor, keep_prob)
 17 |                 drop = tf.identity(drop, name = "out")
 18 |             return drop
 19 | 
 20 |         def conv(n, in_name, out_channels, kernel_size, stride, nonlin="relu", batchnorm=1):
 21 |             in_tensor = g.get_tensor_by_name(in_name)
 22 |             batch_size, height, width, in_channels = in_tensor.get_shape().as_list()
 23 |             with g.name_scope("conv_{}".format(n)):
 24 |                 kernel = tf.Variable(tf.random_uniform(shape = [kernel_size, kernel_size, in_channels, out_channels])/ (kernel_size*kernel_size*out_channels) , dtype = tf.float32, name = "kernel")
 25 |                 scale  = tf.Variable(tf.random_normal(shape = [1, 1, 1, out_channels]), dtype = tf.float32, name = "scales")
 26 |                 bias   = tf.Variable(tf.random_normal(shape = [1, 1, 1, out_channels]), dtype = tf.float32, name = "biases")
 27 |                 '''
 28 |                 conv
 29 |                 batchnorm + bias + scale
 30 |                 nonlin
 31 |                 '''
 32 |                 strides = (1, stride, stride, 1)
 33 |                 conv = tf.nn.conv2d(in_tensor, kernel, strides, padding="SAME", name = "conv")
 34 |                 if (batchnorm):
 35 |                     mean_conv, var_conv = tf.nn.moments(conv, axes = [1,2,3], keep_dims = True)
 36 |                     batchnorm = tf.nn.batch_normalization(conv, mean_conv, var_conv, bias, scale, 1e-100, name = "batchnorm")
 37 |                 else:
 38 |                     batchnorm = tf.add(conv, bias, name = "batchnorm")
 39 |                 if nonlin == "relu":
 40 |                     nonlin = tf.nn.leaky_relu(batchnorm)
 41 |                 elif nonlin == "sigmoid":
 42 |                     nonlin = tf.sigmoid(batchnorm)
 43 |                 elif nonlin == "linear":
 44 |                     nonlin = tf.identity(batchnorm)
 45 |                 else:
 46 |                     raise Exception(" \"{}\" is not a nonlinear function!".format(nonlin))
 47 |                 conv = tf.identity(nonlin, name = "out")
 48 |             return conv
 49 |  
 50 |         def maxpool(n, in_name, kernel_size, stride):
 51 |             in_tensor = g.get_tensor_by_name(in_name)
 52 |             batch_size, height, width, in_channels = in_tensor.get_shape().as_list()
 53 |             with g.name_scope("maxpool_{}".format(n)):
 54 |                 ksize = [1, kernel_size, kernel_size, 1]
 55 |                 strides = [1, stride, stride, 1]
 56 |                 '''
 57 |                 maxpool
 58 |                 '''
 59 |                 maxpool = tf.nn.max_pool(in_tensor, ksize, strides, padding="SAME")
 60 |                 maxpool = tf.identity(maxpool, name = "out")
 61 |             return maxpool
 62 |  
 63 |         def route(n, n1_name, n2_name):
 64 |  
 65 |             if (n2_name==None):
 66 |                 n1 = g.get_tensor_by_name(n1_name)
 67 |                 route = tf.identity(n1)
 68 |             else:
 69 |                 n1 = g.get_tensor_by_name(n1_name)
 70 |                 n2 = g.get_tensor_by_name(n2_name)
 71 |                 route = tf.concat([n1, n2], 3)
 72 |             with g.name_scope("route_{}".format(n)):
 73 |                 route = tf.identity(route, name = "out")
 74 |             return route
 75 |  
 76 |         def upsample(n, in_name, stride):
 77 |             in_tensor = g.get_tensor_by_name(in_name)
 78 |             batch_size, height, width, in_channels = in_tensor.get_shape().as_list()
 79 |             out_channels = in_channels
 80 |             with g.name_scope("upsample_{}".format(n)):
 81 |                 kernel = tf.ones([stride, stride, in_channels, out_channels], name = "kernel")
 82 |                 output_shape = [batch_size, stride*height, stride*width, in_channels]
 83 |                 strides = [1, stride, stride, 1]
 84 |                 padding = "SAME"
 85 |                 unsample = tf.nn.conv2d_transpose(in_tensor, kernel, output_shape, strides, name = "out")
 86 |             return unsample
 87 |  
 88 |         def yolo(n, in_name, anchor, thresh=0.5):#in tensor has shape (batch_size, height, width, 255)
 89 |             in_tensor = g.get_tensor_by_name(in_name)
 90 |             batch_size, height, width, in_channels = in_tensor.get_shape().as_list()
 91 |             split = tf.split(in_tensor, 3, axis = 3)
 92 |             new_split = []
 93 |             offset_x_np = np.zeros((batch_size, height, width, 1))
 94 |             for i in range(width):
 95 |                 offset_x_np[:, :, i, :] = i/width
 96 |             offset_y_np = np.zeros((batch_size, height, width, 1))
 97 |             for i in range(height):
 98 |                 offset_y_np[:, i, :, :] = i/height
 99 |             offset_x = tf.constant(offset_x_np, dtype = tf.float32)
100 |             offset_y = tf.constant(offset_y_np, dtype = tf.float32)
101 |             
102 |             for i in range(3):
103 |                 o = split[i][:, :, :, 0:1]
104 |                 o = tf.sigmoid(o)
105 |                 x = split[i][:, :, :, 1:2]
106 |                 x = tf.sigmoid(x)/width + offset_x
107 |                 y = split[i][:, :, :, 2:3]
108 |                 y = tf.sigmoid(y)/height + offset_y
109 |                 wh = split[i][:, :, :, 3:5]
110 |                 wh = tf.constant(anchor[i], dtype = tf.float32) * tf.exp(wh)
111 |                 c = split[i][:, :, :, 5: ]
112 |                 c = tf.sigmoid(c)
113 |                 new_split.append(o)
114 |                 new_split.append(x)
115 |                 new_split.append(y)
116 |                 new_split.append(wh)
117 |                 new_split.append(c)
118 |                 #obj,x,y,w,h,classes
119 |             
120 |             with g.name_scope("yolo_{}".format(n)):
121 |                 yolo = tf.concat(new_split, 3, name = "out")
122 |             return yolo
123 | 
124 |         height = 416
125 |         width = 416
126 |         anchor1 = ((344,319), (135,169), (81,82))
127 |         anchor2 = ((37,58), (23,27), (10,14))
128 |         classes = 80
129 |         batch_size = 32
130 |         image_depth = 3
131 | 
132 |         out_height = height//32
133 |         out_width = width//32
134 |         out_depth = 3*(5 + classes)
135 | 
136 |         X = tf.placeholder(shape = (batch_size, height, width, image_depth), dtype = tf.float32, name = "input")
137 |         dropout = tf.placeholder(shape = (), dtype = tf.float32, name = "dropout")
138 |         #0
139 |         conv_0 = conv(0, "YOLO/input:0", 16, 3, 1)
140 |         #1
141 |         maxpool(1, "YOLO/conv_0/out:0", 2, 2)
142 |         #2
143 |         conv(2, "YOLO/maxpool_1/out:0", 32, 3, 1)
144 |         #3
145 |         maxpool(3, "YOLO/conv_2/out:0", 2, 2)
146 |         #4
147 |         conv(4, "YOLO/maxpool_3/out:0", 64, 3, 1)
148 |         #5
149 |         maxpool(5, "YOLO/conv_4/out:0", 2, 2)
150 |         #6
151 |         conv(6, "YOLO/maxpool_5/out:0", 128, 3, 1)
152 |         #7
153 |         maxpool(7, "YOLO/conv_6/out:0", 2, 2)
154 |         #8
155 |         conv(8, "YOLO/maxpool_7/out:0", 256, 3, 1)
156 |         #9
157 |         maxpool(9, "YOLO/conv_8/out:0", 2, 2)
158 |         #10
159 |         conv(10, "YOLO/maxpool_9/out:0", 512, 3, 1)
160 |         #11
161 |         maxpool(11, "YOLO/conv_10/out:0", 2, 1)
162 |         #12
163 |         conv(12, "YOLO/maxpool_11/out:0", 1024, 3, 1)
164 |         #13
165 |         conv(13, "YOLO/conv_12/out:0", 256, 1, 1)
166 |         #14
167 |         conv(14, "YOLO/conv_13/out:0", 512, 3, 1)
168 |         drop(14, "YOLO/conv_14/out:0", dropout) 
169 |         #15
170 |         conv(15, "YOLO/drop_14/out:0", 255, 1, 1, nonlin = "linear", batchnorm=0)
171 |         #16
172 |         yolo(16, "YOLO/conv_15/out:0", anchor1)
173 |         #17
174 |         route(17, "YOLO/conv_13/out:0", None)
175 |         #18
176 |         conv(18, "YOLO/route_17/out:0", 128, 1, 1)
177 |         drop(18, "YOLO/conv_18/out:0", dropout)
178 |         #19
179 |         upsample(19, "YOLO/drop_18/out:0", 2)
180 |         #20
181 |         route(20, "YOLO/upsample_19/out:0", "YOLO/conv_8/out:0")
182 |         #21
183 |         conv(21, "YOLO/route_20/out:0", 256, 3, 1)
184 |         #22
185 |         conv(22, "YOLO/conv_21/out:0", 255, 1, 1, nonlin = "linear", batchnorm=0)
186 |         #23
187 |         yolo(23, "YOLO/conv_22/out:0", anchor2)
188 | 
189 |         h1 = tf.identity(g.get_tensor_by_name("YOLO/yolo_16/out:0"), "output1")
190 |         h2 = tf.identity(g.get_tensor_by_name("YOLO/yolo_23/out:0"), "output2")
191 | 
192 | if os.path.exists("./graph"):
193 | 	shutil.rmtree("./graph")
194 | os.mkdir("./graph")
195 | 
196 | tf.summary.FileWriter("./graph", g)
197 | 
198 | with tf.Session(graph = g) as sess:
199 |     sess.run(tf.global_variables_initializer())
200 |     saver = tf.train.Saver()
201 |     saver.save(sess, "./graph/tiny-yolo.ckpt")
202 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/data/__init__.py


--------------------------------------------------------------------------------
/data/datahandler.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import sh
  3 | import os
  4 | import random
  5 | import numpy as np
  6 | import cv2
  7 | 
  8 | data_path = "./data"
  9 | 
 10 | images_path = os.path.join(data_path, "images")
 11 | labels_path = os.path.join(data_path, "labels")
 12 | 
 13 | images_list = os.listdir(images_path)
 14 | 
 15 | def create(input_size, flip=1, crop=0.9, angle=10, color = 0.05):
 16 |     image_name = random.choice(images_list)
 17 |     image_path = os.path.join(images_path, image_name)
 18 |     label_name = image_name.split(".")[0] + ".txt"
 19 |     label_path = os.path.join(labels_path, label_name)
 20 | 
 21 |     #image
 22 |     im = cv2.imread(image_path).astype(np.float)
 23 |     h, w, _ = im.shape
 24 |         
 25 |         #rotate
 26 |     rot = random.uniform(-angle, +angle)
 27 |     M = cv2.getRotationMatrix2D((w/2, h/2), rot, 1)
 28 |     im = cv2.warpAffine(im, M, (w, h))
 29 |     
 30 |         #crop
 31 |     size = int(min(w, h) * random.uniform(crop, 1))
 32 |     x_min = int(random.uniform(0, w - size))
 33 |     y_min = int(random.uniform(0, h - size))
 34 |     x_max = x_min + size
 35 |     y_max = y_min + size
 36 |     im = im[y_min:y_max, x_min:x_max, :]
 37 | 
 38 |         #flip
 39 |     fl = random.random() < 0.5
 40 |     if fl:
 41 |         im = cv2.flip(im, 1)
 42 |     
 43 |        #color
 44 |     red = random.uniform(1-color, 1+color)
 45 |     blu = random.uniform(1-color, 1+color)
 46 |     gre = random.uniform(1-color, 1+color)
 47 | 
 48 |     col = np.array([blu, gre, red])
 49 |     im = im*col
 50 |     im[im<0] = 0
 51 |     im[im>255] = 255
 52 |         #resize to inputsize
 53 |     image = cv2.resize(im, (input_size, input_size), interpolation = cv2.INTER_CUBIC)
 54 |     image = image.reshape((1, input_size, input_size, 3))
 55 | 
 56 |     #label
 57 | 
 58 |     label = []
 59 |     if os.path.exists(label_path):
 60 |         with open(label_path, "r") as f:
 61 |             labeltxt = f.read()
 62 |         for objtxt in labeltxt.split("\n"):
 63 |             if objtxt == "": continue
 64 |             cls, x0, y0, w0, h0, _ = objtxt.split(" ")
 65 |             cls = int(cls)
 66 |             x0   = float(x0)
 67 |             y0   = float(y0)
 68 |             w0   = float(w0)
 69 |             h0   = float(h0)
 70 |             #convert back
 71 |             
 72 |                 #rotate
 73 |             rot = np.deg2rad(rot)
 74 |             M = np.array([[np.cos(rot), np.sin(rot)], [-np.sin(rot), np.cos(rot)]])
 75 |             x0, y0 = 0.5+np.matmul(M, np.array([x0-0.5, y0-0.5]))
 76 |                 #w0 h0 remain
 77 |             
 78 |                 #crop
 79 |             if x0 < x_min/w or x0 > x_max/w or y0 < y_min/h or y0 > y_max/h: continue
 80 |             x0 = (x0*w - x_min)/size
 81 |             y0 = (y0*h - y_min)/size
 82 |             w0 = w0*w/size
 83 |             h0 = h0*h/size
 84 | 
 85 |                 #flip
 86 |             if fl:
 87 |                 x0 = 1-x0
 88 |             
 89 |             label.append((cls, x0, y0, w0, h0))
 90 |     return image, label
 91 | 
 92 | def IoU(box1, box2):
 93 |     w1, h1 = box1
 94 |     w2, h2 = box2
 95 |     iou = min(w1, w2) * min(h1, h2)
 96 |     return iou
 97 | 
 98 | def which_anchor(box):
 99 |     anchor = ((10,14),  (23,27),  (37,58),  (81,82),  (135,169),  (344,319))
100 |     dist = []
101 |     for i in range(6):
102 |         dist.append(IoU(anchor[i], box))
103 |     i = dist.index(max(dist))
104 |     return i
105 | 
106 | def create_array(input_size):
107 |     image, label = create(input_size)
108 |     _, height, width, depth = image.shape
109 |     classes = 80
110 |     out_height = height//32
111 |     out_width = width//32
112 |     out_depth = 3*(5+classes)
113 |     
114 |     X = image
115 |     Y1 = np.random.random((1, out_height, out_width, out_depth))
116 |     Y2 = np.random.random((1, 2*out_height, 2*out_width, out_depth))
117 |     for i in range(3):
118 |         Y1[:, :, :, i*(out_depth//3)] = 1
119 |         Y2[:, :, :, i*(out_depth//3)] = 1
120 |     #convert label to array
121 |     for obj in label:
122 |         cls, x0, y0, w0, h0 = obj
123 |         if x0<0 or x0>=1 or y0<0 or y0>=1: continue
124 |         box = (w0, h0)
125 |         i = which_anchor(box)
126 |         if (i<3): #anchor1
127 |             x = int(out_width*x0)
128 |             y = int(out_height*y0)
129 |             Y1[0, y, x, 0+i*(out_depth//3)] = 1
130 |             Y1[0, y, x, 1+i*(out_depth//3)] = x0
131 |             Y1[0, y, x, 2+i*(out_depth//3)] = y0
132 |             Y1[0, y, x, 3+i*(out_depth//3)] = w0
133 |             Y1[0, y, x, 4+i*(out_depth//3)] = h0
134 |             Y1[0, y, x, 4:(i+1)*(out_depth//3)] = 0
135 |             Y1[0, y, x, cls] = 1
136 |         else: #anchor2
137 |             i = i - 3
138 |             x = int(2*out_width*x0)
139 |             y = int(2*out_height*y0)
140 |             Y2[0, y, x, 0+i*(2*out_depth//3)] = 1 
141 |             Y2[0, y, x, 1+i*(2*out_depth//3)] = x0
142 |             Y2[0, y, x, 2+i*(2*out_depth//3)] = y0
143 |             Y2[0, y, x, 3+i*(2*out_depth//3)] = w0
144 |             Y2[0, y, x, 4+i*(2*out_depth//3)] = h0
145 |             Y2[0, y, x, 4:(i+1)*(2*out_depth//3)] = 0
146 |             Y2[0, y, x, cls] = 1
147 |     X[X<1e-37] = 1e-37
148 |     Y1[Y1<1e-37] = 1e-37
149 |     Y2[Y2<1e-37] = 1e-37
150 | 
151 |     return X, Y1, Y2
152 | 
153 | def create_many_arrays(batch_size, input_size):
154 |     X = []
155 |     Y1 = []
156 |     Y2 = []
157 |     for i in range(batch_size):
158 |         x, y1, y2 = create_array(input_size)
159 |         X.append(x)
160 |         Y1.append(y1)
161 |         Y2.append(y2)
162 |     X = np.vstack(X)
163 |     Y1 = np.vstack(Y1)
164 |     Y2 = np.vstack(Y2)            
165 |     return X, Y1, Y2
166 | 
167 | def shuffle(batch_size, input_size):
168 |     step = 0
169 |     while (1):
170 |         if (step == 0):
171 |             yield step, None, None, None
172 |         else:
173 |             yield step, X, Y1, Y2
174 |             del X
175 |             del Y1
176 |             del Y2
177 |         step += 1
178 |         X, Y1, Y2 = create_many_arrays(batch_size, input_size)
179 | 
180 | if __name__ == "__main__":
181 |     image, label = create(416)
182 |     image = image.astype(np.int32).reshape(416,416,3)
183 |     print(image.shape)
184 |     for obj in label:
185 |         cls, x0, y0, w0, h0 = obj
186 |         x1 = int((x0 - w0/2)*416)
187 |         x2 = int((x0 + w0/2)*416)
188 |         y1 = int((y0 - h0/2)*416)
189 |         y2 = int((y0 + h0/2)*416)
190 |         cv2.rectangle(image, (x1, y1), (x2, y2), (0,0,0),2)
191 | 
192 |     cv2.imwrite("temp.jpg", image)
193 |     sh.eog("temp.jpg")
194 |     sh.rm("temp.jpg")                                            
195 | 


--------------------------------------------------------------------------------
/data/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/data/dog.jpg


--------------------------------------------------------------------------------
/docs/Screenshot from 2019-09-06 00-14-44.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/docs/Screenshot from 2019-09-06 00-14-44.png


--------------------------------------------------------------------------------
/images/tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/images/tensorboard.png


--------------------------------------------------------------------------------
/images/train_tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/images/train_tensorboard.png


--------------------------------------------------------------------------------
/images/train_tensorboard2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/images/train_tensorboard2.png


--------------------------------------------------------------------------------
/images/training.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/khanh101/tiny-yolo-tensorflow/717969afdda4ec64c6341da7938a57afee739c77/images/training.png


--------------------------------------------------------------------------------
/scripts/clean_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | #import sh
 4 | data_path = "../data"
 5 | 
 6 | images_path = os.path.join(data_path, "images")
 7 | labels_path = os.path.join(data_path, "labels")
 8 | 
 9 | count = 0
10 | import pdb
11 | for image_name in os.listdir(images_path):
12 | 
13 |     image_path = os.path.join(images_path, image_name)
14 |     label_name = image_name.split(".")[0] + ".txt"
15 |     label_path = os.path.join(labels_path, label_name)
16 | 
17 |     if not os.path.exists(label_path):
18 |         count += 1
19 |         print(count, image_path)
20 |         #sh.touch(label_path)
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/scripts/get_coco.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | wget -c https://pjreddie.com/media/files/train2014.zip
 3 | wget -c https://pjreddie.com/media/files/coco/labels.tgz
 4 | unzip train2014.zip -d ./
 5 | tar xzf labels.tgz -C ./
 6 | mkdir ./data
 7 | mv ./labels/train2014 ./data/labels
 8 | mv ./train2014 ./data/images
 9 | rm -r ./labels
10 | 


--------------------------------------------------------------------------------
/tensorboard:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | tensorboard --logdir=train_graph
3 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import tensorflow as tf
 5 | import numpy as np
 6 | import cv2
 7 | 
 8 | 
 9 | saver = tf.train.import_meta_graph("./train_graph/tiny-yolo-final.ckpt.meta")
10 | 
11 | sess = tf.Session()
12 | saver.restore("./train_graph/tiny-yolo-final.ckpt")
13 | g = sess.graph
14 | X = g.get_tensor_by_name("YOLO/input:0")
15 | h = g.get_tensor_by_name("TRAINER/h:0")
16 | 
17 | scores = h[:,0]
18 | y1 = h[:,2:3] - h[:,4:5]
19 | x1 = h[:,1:2] - h[:,3:4]
20 | y2 = h[:,2:3] + h[:,4:5]
21 | x2 = h[:,1:2] + h[:,3:4]
22 | boxes = tf.concat([y1,x1,y2,x2], axis=1)
23 | 
24 | prediction = tf.image.non_max_suppression(boxes, scores, 10)
25 | 
26 | 
27 | def detect(im):
28 |     Xp = letterbox(im)
29 |     return sess.run(prediction, feed_dict = {X:Xp, "YOLO/dropout:0": 1})
30 | 
31 | def letterbox(im, size=416):
32 |     h, w, _ = im.shape
33 |     im_out = np.zeros(1, size, size, 3)
34 |     if h>=w:
35 |         new_h, new_w = size, int(size*w/h)
36 |     else:
37 |         new_h, new_w = int(size*h/w), size
38 |     im = cv2.resize(im, (new_w, new_h)).reshape(1, new_h, new_w, 3)
39 |     im_out[:, 0:new_h, 0:new_w, 0:3] = im
40 |     return im_out
41 | 
42 | 
43 | def draw():
44 |     pass
45 | 
46 | if __name__ == "__main__":
47 |     ap = argparse.ArgumentParser()
48 |     ap.add_argument("-i", "--input", required=True, help="path to input image")
49 |     args = vars(ap.parse_args())
50 | 
51 |     image_path = args["input"]
52 | 
53 |     im = cv2.imread(image_path)
54 | 
55 |     print(detect(im))
56 | 
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | '''
  3 | This script will randomize weights and train the tiny yolo from scratch.
  4 | '''
  5 | from data.datahandler import shuffle
  6 | import tensorflow as tf
  7 | import numpy as np
  8 | import os
  9 | import sys
 10 | import shutil
 11 | import time
 12 | 
 13 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
 14 | 
 15 | saver = tf.train.import_meta_graph("./graph/tiny-yolo.ckpt.meta")
 16 | with tf.Session() as sess:
 17 |     saver.restore(sess, "./graph/tiny-yolo.ckpt")
 18 |     g = sess.graph
 19 |     with g.name_scope("TRAINER"):
 20 |         X = g.get_tensor_by_name("YOLO/input:0")
 21 |         batch_size, height, width, in_channels = X.get_shape().as_list()
 22 |         classes = 80
 23 |         out_height = height//32
 24 |         out_width = width//32
 25 |         out_channels = 3*(5+classes)
 26 |         h1 = g.get_tensor_by_name("YOLO/output1:0")
 27 |         h2 = g.get_tensor_by_name("YOLO/output2:0")
 28 |         Y1 = tf.placeholder(shape = (batch_size, out_height, out_width, out_channels), dtype = tf.float32, name = "groundtruth1")
 29 |         Y2 = tf.placeholder(shape = (batch_size, 2*out_height, 2*out_width, out_channels), dtype = tf.float32, name = "groundtruth2")
 30 |     
 31 |         #loss
 32 |         h = []
 33 |         Y = []
 34 | 
 35 |         split_h1 = tf.split(h1, 3, axis = 3)
 36 |         for split in split_h1:
 37 |             h.append(tf.reshape(split, [batch_size * out_height * out_width, out_channels//3]))
 38 | 
 39 |         split_h2 = tf.split(h2, 3, axis = 3)
 40 |         for split in split_h2:
 41 |             h.append(tf.reshape(split, [batch_size * 2*out_height * 2*out_width, out_channels//3]))
 42 | 
 43 |         split_Y1 = tf.split(Y1, 3, axis = 3)
 44 |         for split in split_Y1:
 45 |             Y.append(tf.reshape(split, [batch_size * out_height * out_width, out_channels//3]))
 46 | 
 47 |         split_Y2 = tf.split(Y2, 3, axis = 3)
 48 |         for split in split_Y2:                                                                         
 49 |             Y.append(tf.reshape(split, [batch_size * 2*out_height * 2*out_width, out_channels//3]))       
 50 |     
 51 |         h = tf.concat(h, axis=0, name = "h")
 52 |         Y = tf.concat(Y, axis=0)
 53 |    
 54 |         Lcoord = 1
 55 |         Lnoobj = 1
 56 |         loss_xy = Lcoord*tf.reduce_mean(Y[:,0]*((h[:,1] - Y[:,1])**2 + (h[:,2] - Y[:,2])**2))
 57 |         loss_wh = Lcoord*tf.reduce_mean(Y[:,0]*((h[:,3]**0.5 - Y[:,3]**0.5)**2+(h[:,4]**0.5 - Y[:,4]**0.5)**2))
 58 |         loss_obj = (-1)*tf.reduce_mean(tf.tile(Y[:,0:1], (1, classes))*(Y[:,5:]*tf.log(h[:,5:]) + (1-Y[:,5:])*tf.log(1-h[:,5:])))
 59 |         loss_noobj = (-1*Lnoobj)*tf.reduce_mean(tf.tile(1-Y[:,0:1], (1, classes))*(Y[:,5:]*tf.log(h[:,5:]) + (1-Y[:,5:])*tf.log(1-h[:,5:])))
 60 |         loss_p = (-1)*tf.reduce_mean(tf.tile(Y[:,0:1], (1, classes))*tf.log((tf.tile(h[:,0:1], (1, classes)) * Y[:,5:])) + (1-tf.tile(Y[:,0:1], (1, classes)))*tf.log(1-(tf.tile(h[:,0:1], (1, classes)) * Y[:,5:])))
 61 | 
 62 |         loss = loss_xy + loss_wh + loss_obj + loss_noobj + loss_p
 63 | 
 64 |         optimizer = tf.train.AdamOptimizer(learning_rate = 1e-3)
 65 |         trainer = optimizer.minimize(loss, name = "trainer")
 66 | 
 67 |     if os.path.exists("./train_graph"):
 68 |             shutil.rmtree("./train_graph")
 69 |     os.mkdir("./train_graph")
 70 | 
 71 |     train_writer = tf.summary.FileWriter("./train_graph", g)
 72 |     saver = tf.train.Saver()
 73 |     tf.summary.histogram("loss", loss)
 74 |     merge = tf.summary.merge_all()
 75 | 
 76 | 
 77 | 
 78 |     hm_steps = 25000
 79 |     sess.run(tf.global_variables_initializer())
 80 | 
 81 |     input_size = height
 82 | 
 83 |     for batch in shuffle(batch_size, input_size):
 84 |         step, Xp, Y1p, Y2p = batch
 85 |         if step == 0:
 86 |             time.sleep(1)
 87 |             continue
 88 |         debugger = tf.logical_or(tf.is_nan(loss), tf.is_inf(loss))
 89 | 
 90 |         while (1):
 91 |             d, l = sess.run([debugger, loss], feed_dict = {X:Xp, Y1:Y1p, Y2:Y2p, "YOLO/dropout:0" = 0.5})
 92 |             if (not d):
 93 |                 break
 94 |             else:
 95 |                 print("Re-random variables!")
 96 |                 sess.run(tf.global_variables_initializer())
 97 |         summary, _ , lossp, lxy, lwh, lobj, lnoobj, lp = sess.run([merge, trainer, loss, loss_xy, loss_wh, loss_obj, loss_noobj, loss_p], feed_dict = {X: Xp, Y1: Y1p, Y2:Y2p, "YOLO/dropout:0" = 0.5})
 98 | 
 99 |         print("""Step {} : loss {}
100 |     loss_xy     = {}
101 |     loss_wh     = {}
102 |     loss_obj    = {}
103 |     loss_noobj  = {}
104 |     loss_p      = {}\n""".format(step, lossp, lxy, lwh, lobj, lnoobj, lp), end="\n")
105 | 
106 |         train_writer.add_summary(summary, step)
107 | 
108 |         if (step % 2500 ==0):
109 |             saver.save(sess, "./train_graph/tiny-yolo-{}.ckpt".format(step))
110 |         if (step>hm_steps):
111 |              saver.save(sess, "./train_graph/tiny-yolo-final.ckpt".format(step))       
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------