├── README.md
├── convert_annotations_for_yolov2.py
├── obj.names
└── yolov2_logo_detection.cfg


/README.md:
--------------------------------------------------------------------------------
 1 | # Logo Detection using YOLOv2
 2 | 
 3 | ![alt text](https://cdn-images-1.medium.com/max/1100/1*uLoIU1s-lcvfOMBgRFZ1eg.png "YOLOv2 detection the logo of Google")
 4 | 
 5 | 
 6 | 
 7 | This repository provides the [code](https://github.com/akarshzingade/Logo-Detection-YOLOv2/blob/master/convert_annotations_for_yolov2.py) that converts FlickrLogo-47 Dataset annotations to the format required by YOLOv2. It also has the YOLOv2 [configuration file](https://github.com/akarshzingade/Logo-Detection-YOLOv2/blob/master/yolov2_logo_detection.cfg) used for the Logo Detection. You can read about how YOLOv2 works and how it was used to detect logos in FlickrLogo-47 Dataset in [this](https://medium.com/@akarshzingade/logo-detection-using-yolov2-8cda5a68740e) blog.  
 8 | 
 9 | The best weights for logo detection using YOLOv2 can be found [here](https://drive.google.com/open?id=1_Wg2hOKRiqWK6rpbCI6XbNLOC5YT1zyS)
10 | 
11 | # Instructions to use convert_annotations_for_yolov2.py
12 | 
13 | convert_annotations_for_yolov2.py takes in 4 arguments:
14 | 1) Path to the train/test folder containing the images and annotations of FlickrLogo-47 Dataset.
15 | 2) Path to the destination folder where the images and the converted annotations are to be stored. 
16 | 3) Path to store the train.txt/test.txt file and obj.names file. 
17 | 4) Name of the text file to store the paths to the images for train/test.
18 | 
19 | The 3rd argument is for a textfile that points to the train/test images for YOLOv2. 
20 | 
21 | ## How to pass the arguments?
22 | 
23 | ```python
24 | python convert_annotations_for_yolov2.py --input_folder train --output_folder train_yolo --obj_names_path . --text_filename train
25 | ```
26 | 
27 | This will take './train' as the input folder (this should point to the train folder in FlickrLogo-47 dataset), './train_yolo' as the output folder where all the images and the converted annotations will be stored, '.' as the path to store train.txt and obj.names, and 'train' as the filename to store the image path for all train/test images. 
28 | 
29 | Make sure "className2ClassID.txt" file is in the same path as obj_names_path. 
30 | 
31 | Run convert_annotations_for_yolov2.py for both the train and test directory of FlickrLogo-47 dataset.
32 | 


--------------------------------------------------------------------------------
/convert_annotations_for_yolov2.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | from os import walk, getcwd
  4 | from PIL import Image
  5 | from shutil import copyfile
  6 | 
  7 | def convert(size, box):
  8 |    x = (box[0] + box[1])/2.0
  9 |    y = (box[2] + box[3])/2.0
 10 |    w = box[1] - box[0]
 11 |    h = box[3] - box[2]
 12 |    dw = 1./size[0]
 13 |    dh = 1./size[1]
 14 |    x = x*dw
 15 |    w = w*dw
 16 |    y = y*dh
 17 |    h = h*dh
 18 |    return (x,y,w,h)
 19 | 
 20 | def convert_annotation(input_path, output_path, obj_names_path,text_filename):
 21 |    if not os.path.exists(output_path):
 22 |       os.mkdir(output_path)
 23 | 
 24 |    train_files = [] # A list to keep track of all the train files. 
 25 | 
 26 |    for folder_name in os.listdir(input_path):
 27 |       try:
 28 |          if "no-logo" in folder_name or "filelist" in folder_name:
 29 |             continue
 30 |          for file_name in os.listdir(os.path.join(input_path,folder_name)):
 31 |             if '.png' in file_name and 'mask' not in file_name:
 32 |                copyfile(os.path.join(input_path,folder_name,file_name),os.path.join(output_path,file_name))
 33 |                train_files.append(os.path.abspath(file_name)+'\n')
 34 |             elif '.txt' in file_name and 'mask' not in file_name  :
 35 |                new_text = ""
 36 |                f = open(os.path.join(input_path,folder_name,file_name),'r')
 37 |                lines = f.read().split('\n')[:-1]
 38 |                f.close()
 39 |                for line in lines:
 40 |                   chunks = line.split(' ')
 41 |                   class_id = chunks[4]
 42 |                   xmin = chunks[0]
 43 |                   xmax = chunks[2]
 44 |                   ymin = chunks[1]
 45 |                   ymax = chunks[3]
 46 |                   img_path = str(os.path.join(input_path,folder_name,'%s.png'%(file_name.split('.')[0])))
 47 |                   img=Image.open(img_path)
 48 |                   w= int(img.size[0])
 49 |                   h= int(img.size[1])
 50 |                   b = (float(xmin), float(xmax), float(ymin), float(ymax))
 51 |                   bb = convert((w,h), b)
 52 |                   new_text += (str(class_id) + " " + " ".join([str(a) for a in bb]) + '\n')
 53 |                f = open(os.path.join(output_path,(file_name.replace('.gt_data',''))),'w')
 54 |                f.write(new_text)
 55 |                f.close()
 56 |             
 57 |       except:
 58 |          continue
 59 | 
 60 |    text = "".join(train_files)
 61 |    f = open(os.path.join(obj_names_path,text_filename+'.txt'),'w')
 62 |    f.write(text)
 63 |    f.close()
 64 | 
 65 |    f = open(os.path.join(obj_names_path,'className2ClassID.txt'),'r')
 66 |    classes_all = f.read().split('\n')[:-1]
 67 | 
 68 |    classes =[]
 69 | 
 70 |    for line in classes_all:
 71 |       classes.append(line.split('\t')[0]+'\n')
 72 | 
 73 | 
 74 |    text = "".join(classes)
 75 | 
 76 |    f = open(os.path.join(obj_names_path,'obj.names'),'w')
 77 |    f.write(text)
 78 |    f.close()
 79 | 
 80 | 
 81 | if __name__ == '__main__':
 82 |    # Instantiate the parser
 83 |    parser = argparse.ArgumentParser(description='Optional app description')
 84 | 
 85 |    # Optional positional argument
 86 |    parser.add_argument('--input_directory', 
 87 |                        help='An Optional positional argument for input directory')
 88 | 
 89 |    # Optional positional argument
 90 |    parser.add_argument('--output_directory', 
 91 |                        help='A Optional positional argument for output directory')
 92 | 
 93 |    # Optional positional argument
 94 |    parser.add_argument('--obj_names_path',
 95 |                        help='A Optional positional argument for obj.names')
 96 |    
 97 |    parser.add_argument('--text_filename',
 98 |                        help='A Optional positional argument for train.txt/test.txt')
 99 | 
100 | 
101 |    args = parser.parse_args()
102 |    if (args.input_directory is None):
103 |       args.input_directory = "./train"
104 | 
105 |    if not os.path.exists(args.input_directory):
106 |       print (args.input_directory+" path does not exist!")
107 |       quit()
108 |    
109 |    if (args.output_directory is  None):
110 |       args.output_directory = './train_yolo'
111 |    
112 |    if (args.obj_names_path is  None):
113 |       args.obj_names_path = './'
114 | 
115 |    if '.txt' in args.text_filename:
116 |       args.text_filename = args.text_filename.replace('.txt','')
117 |    
118 |    print ("Input Directory: "+args.input_directory)
119 |    print ("Output Directory: "+args.output_directory)
120 |    print ("Name of the file that contains path to train/test images: "+args.text_filename+'.txt')
121 |    convert_annotation(input_path=args.input_directory, output_path=args.output_directory, obj_names_path=args.obj_names_path,text_filename=args.text_filename)
122 | 


--------------------------------------------------------------------------------
/obj.names:
--------------------------------------------------------------------------------
 1 | HP
 2 | adidas_symbol
 3 | adidas_text
 4 | aldi
 5 | apple
 6 | becks_symbol
 7 | becks_text
 8 | bmw
 9 | carlsberg_symbol
10 | carlsberg_text
11 | chimay_symbol
12 | chimay_text
13 | cocacola
14 | corona_symbol
15 | corona_text
16 | dhl
17 | erdinger_symbol
18 | erdinger_text
19 | esso_symbol
20 | esso_text
21 | fedex
22 | ferrari
23 | ford
24 | fosters_symbol
25 | fosters_text
26 | google
27 | guinness_symbol
28 | guinness_text
29 | heineken
30 | milka
31 | nvidia_symbol
32 | nvidia_text
33 | paulaner_symbol
34 | paulaner_text
35 | pepsi_symbol
36 | pepsi_text
37 | rittersport
38 | shell
39 | singha_symbol
40 | singha_text
41 | starbucks
42 | stellaartois_symbol
43 | stellaartois_text
44 | texaco
45 | tsingtao_symbol
46 | tsingtao_text
47 | ups
48 | 


--------------------------------------------------------------------------------
/yolov2_logo_detection.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | batch=64
  3 | subdivisions=8
  4 | height=416
  5 | width=416
  6 | channels=3
  7 | momentum=0.9
  8 | decay=0.0005
  9 | angle=0
 10 | saturation = 1.5
 11 | exposure = 1.5
 12 | hue=.1
 13 | 
 14 | learning_rate=0.0001
 15 | max_batches = 45000
 16 | policy=steps
 17 | steps=100,25000,35000
 18 | scales=10,.1,.1
 19 | 
 20 | [convolutional]
 21 | batch_normalize=1
 22 | filters=32
 23 | size=3
 24 | stride=1
 25 | pad=1
 26 | activation=leaky
 27 | 
 28 | [maxpool]
 29 | size=2
 30 | stride=2
 31 | 
 32 | [convolutional]
 33 | batch_normalize=1
 34 | filters=64
 35 | size=3
 36 | stride=1
 37 | pad=1
 38 | activation=leaky
 39 | 
 40 | [maxpool]
 41 | size=2
 42 | stride=2
 43 | 
 44 | [convolutional]
 45 | batch_normalize=1
 46 | filters=128
 47 | size=3
 48 | stride=1
 49 | pad=1
 50 | activation=leaky
 51 | 
 52 | [convolutional]
 53 | batch_normalize=1
 54 | filters=64
 55 | size=1
 56 | stride=1
 57 | pad=1
 58 | activation=leaky
 59 | 
 60 | [convolutional]
 61 | batch_normalize=1
 62 | filters=128
 63 | size=3
 64 | stride=1
 65 | pad=1
 66 | activation=leaky
 67 | 
 68 | [maxpool]
 69 | size=2
 70 | stride=2
 71 | 
 72 | [convolutional]
 73 | batch_normalize=1
 74 | filters=256
 75 | size=3
 76 | stride=1
 77 | pad=1
 78 | activation=leaky
 79 | 
 80 | [convolutional]
 81 | batch_normalize=1
 82 | filters=128
 83 | size=1
 84 | stride=1
 85 | pad=1
 86 | activation=leaky
 87 | 
 88 | [convolutional]
 89 | batch_normalize=1
 90 | filters=256
 91 | size=3
 92 | stride=1
 93 | pad=1
 94 | activation=leaky
 95 | 
 96 | [maxpool]
 97 | size=2
 98 | stride=2
 99 | 
100 | [convolutional]
101 | batch_normalize=1
102 | filters=512
103 | size=3
104 | stride=1
105 | pad=1
106 | activation=leaky
107 | 
108 | [convolutional]
109 | batch_normalize=1
110 | filters=256
111 | size=1
112 | stride=1
113 | pad=1
114 | activation=leaky
115 | 
116 | [convolutional]
117 | batch_normalize=1
118 | filters=512
119 | size=3
120 | stride=1
121 | pad=1
122 | activation=leaky
123 | 
124 | [convolutional]
125 | batch_normalize=1
126 | filters=256
127 | size=1
128 | stride=1
129 | pad=1
130 | activation=leaky
131 | 
132 | [convolutional]
133 | batch_normalize=1
134 | filters=512
135 | size=3
136 | stride=1
137 | pad=1
138 | activation=leaky
139 | 
140 | [maxpool]
141 | size=2
142 | stride=2
143 | 
144 | [convolutional]
145 | batch_normalize=1
146 | filters=1024
147 | size=3
148 | stride=1
149 | pad=1
150 | activation=leaky
151 | 
152 | [convolutional]
153 | batch_normalize=1
154 | filters=512
155 | size=1
156 | stride=1
157 | pad=1
158 | activation=leaky
159 | 
160 | [convolutional]
161 | batch_normalize=1
162 | filters=1024
163 | size=3
164 | stride=1
165 | pad=1
166 | activation=leaky
167 | 
168 | [convolutional]
169 | batch_normalize=1
170 | filters=512
171 | size=1
172 | stride=1
173 | pad=1
174 | activation=leaky
175 | 
176 | [convolutional]
177 | batch_normalize=1
178 | filters=1024
179 | size=3
180 | stride=1
181 | pad=1
182 | activation=leaky
183 | 
184 | 
185 | #######
186 | 
187 | [convolutional]
188 | batch_normalize=1
189 | size=3
190 | stride=1
191 | pad=1
192 | filters=1024
193 | activation=leaky
194 | 
195 | [convolutional]
196 | batch_normalize=1
197 | size=3
198 | stride=1
199 | pad=1
200 | filters=1024
201 | activation=leaky
202 | 
203 | [route]
204 | layers=-9
205 | 
206 | [reorg]
207 | stride=2
208 | 
209 | [route]
210 | layers=-1,-3
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | size=3
215 | stride=1
216 | pad=1
217 | filters=1024
218 | activation=leaky
219 | 
220 | [convolutional]
221 | size=1
222 | stride=1
223 | pad=1
224 | filters=260
225 | activation=linear
226 | 
227 | [region]
228 | anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52
229 | bias_match=1
230 | classes=47
231 | coords=4
232 | num=5
233 | softmax=1
234 | jitter=.2
235 | rescore=1
236 | 
237 | object_scale=5
238 | noobject_scale=1
239 | class_scale=1
240 | coord_scale=1
241 | 
242 | absolute=1
243 | thresh = .6
244 | random=0


--------------------------------------------------------------------------------