├── README.md
├── argumentation.jpg
├── conf.py
├── data.py
├── data_process.py
├── detect.jpg
├── detect.py
├── kite.jpg
├── loss.py
├── model.py
├── model_infer.py
├── mosaic_argumentation.jpg
├── nms.py
├── prepost_process.py
├── read_txt.py
└── torchx.yaml
/README.md:
--------------------------------------------------------------------------------
1 | # Yolov4-tensorflow
2 | tensorflow implementation for Yolo v4
3 |
4 | dependencies:
5 |
6 | tensorflow2.x
7 | opencv
8 |
9 |
10 | # Mosaic data argumentation
11 | data.py added Mosaic data argumentation, for imagenet classfication and object detection
12 |
13 | this mosaic data argumentation is not exactly the same as original yolov4 implementation, but much close to that and I will work on that
14 |
15 |

16 |
17 | 
18 |
19 |
20 | # Inference
21 | some postprocess code borrowed [here](https://github.com/hunglc007/tensorflow-yolov4-tflite) and will update own version
22 |
23 | run:
24 | ```bash
25 | python detect.py --image ./kite.jpg
26 | ```
27 | demo:
28 | 
29 |
30 |
--------------------------------------------------------------------------------
/argumentation.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klauspa/Yolov4-tensorflow/802a9245c94983db7d702e9d5a62512a0539fe51/argumentation.jpg
--------------------------------------------------------------------------------
/conf.py:
--------------------------------------------------------------------------------
1 | #inference
2 | XYSCALE = [1.2, 1.1, 1.05]
3 |
4 | # training
5 | EPOCHS = 1000
6 | BATCH_SIZE = 4
7 | load_weights_before_training = False
8 | load_weights_from_epoch = 10
9 | STRIDES = [8, 16, 32]
10 |
11 | ANCHORS = [12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401]
12 |
13 | # input image
14 | IMAGE_HEIGHT = 608
15 | IMAGE_WIDTH = 608
16 | CHANNELS = 3
17 |
18 | # Dataset
19 | CATEGORY_NUM = 80
20 | ANCHOR_NUM_EACH_SCALE = 3
21 | COCO_ANCHORS = [[116, 90], [156, 198], [373, 326], [30, 61], [62, 45], [59, 119], [10, 13], [16, 30], [33, 23]]
22 | COCO_ANCHOR_INDEX = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
23 | SCALE_SIZE = [13, 26, 52]
24 |
25 | use_dataset = "pascal_voc" # "custom", "pascal_voc", "coco"
26 |
27 | PASCAL_VOC_DIR = "./dataset/VOCdevkit/VOC2012/"
28 | PASCAL_VOC_ANNOTATION = PASCAL_VOC_DIR + "Annotations"
29 | PASCAL_VOC_IMAGE = PASCAL_VOC_DIR + "JPEGImages"
30 | # The 20 object classes of PASCAL VOC
31 | PASCAL_VOC_CLASSES = {"person": 1, "bird": 2, "cat": 3, "cow": 4, "dog": 5,
32 | "horse": 6, "sheep": 7, "aeroplane": 8, "bicycle": 9,
33 | "boat": 10, "bus": 11, "car": 12, "motorbike": 13,
34 | "train": 14, "bottle": 15, "chair": 16, "diningtable": 17,
35 | "pottedplant": 18, "sofa": 19, "tvmonitor": 20}
36 |
37 | COCO_DIR = "/mnt/d/coco2017/"
38 | COCO_CLASSES = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorbike', 4: 'aeroplane', 5: 'bus', 6: 'train', 7: 'truck',
39 | 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',
40 | 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear',
41 | 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase',
42 | 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat',
43 | 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle',
44 | 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple',
45 | 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut',
46 | 55: 'cake', 56: 'chair', 57: 'sofa', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet',
47 | 62: 'tvmonitor', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone',
48 | 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock',
49 | 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
50 |
51 | TRAIN_DIR = "train2017"
52 |
53 | TXT_DIR = "./data.txt"
54 |
55 | custom_dataset_dir = ""
56 | custom_dataset_classes = {}
57 |
58 | # loss
59 | IGNORE_THRESHOLD = 0.5
60 |
61 |
62 | # NMS
63 | CONFIDENCE_THRESHOLD = 0.6
64 | IOU_THRESHOLD = 0.5
65 | MAX_BOX_NUM = 50
66 |
67 | MAX_TRUE_BOX_NUM_PER_IMG = 20
68 |
69 |
70 | # save model
71 | save_model_dir = "saved_model/"
72 | save_frequency = 5
73 |
74 | test_images_during_training = True
75 | training_results_save_dir = "./test_results_during_training/"
76 | test_images = ["", ""]
77 |
78 | test_picture_dir = "./test_data/1.jpg"
79 | test_video_dir = "./test_data/test_video.mp4"
80 | temp_frame_dir = "./test_data/temp.jpg"
81 |
82 | class DATA_ARG_FACTOR:
83 | saturation = 1.5
84 | exposure = 1.5
85 | hue=.1
86 |
--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
1 | """
2 | mosaic data argumentation tensorflow implementation
3 | reference: https://github.com/clovaai/CutMix-PyTorch https://github.com/AlexeyAB/darknet
4 | """
5 |
6 | import numpy as np
7 | import tensorflow as tf
8 | from tensorflow.keras.preprocessing.image import ImageDataGenerator
9 | import argparse
10 | import cv2
11 | from read_txt import ReadTxt
12 | import os
13 | import random
14 | from conf import COCO_DIR, TRAIN_DIR, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS, DATA_ARG_FACTOR
15 | TXT_DIR = "./data.txt"
16 | BATCH_SIZE = 4
17 | data_factors = DATA_ARG_FACTOR()
18 |
19 |
20 | parser = argparse.ArgumentParser(description="mosaic data argumentation tensorflow implementation")
21 | parser.add_argument("--path", default="./imagenet_test", type=str)
22 | args = parser.parse_args()
23 |
24 |
25 | def load_classification_data():
26 | """
27 | two classes imagenet_test data folder as a test
28 | """
29 | train_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our training data
30 | train_data_gen = train_image_generator.flow_from_directory(batch_size=4,
31 | directory=args.path,
32 | shuffle=True,
33 | target_size=(224, 224),
34 | class_mode='binary')
35 | steps = 4
36 | while (steps > 0):
37 | for inputs, target in train_data_gen:
38 | min_offset = 0.2
39 | w = inputs.shape[1]
40 | h = inputs.shape[2]
41 | cut_x = np.random.randint(int(w*min_offset), int(w*(1 - min_offset)))
42 | cut_y = np.random.randint(int(h*min_offset), int(h*(1 - min_offset)))
43 |
44 | s1 = (cut_x * cut_y) // (w*h)
45 | s2 = ((w - cut_x) * cut_y) // (w*h)
46 | s3 = (cut_x * (h - cut_y)) // (w*h)
47 | s4 = ((w - cut_x) * (h - cut_y)) // (w*h)
48 |
49 | d1 = inputs[0, :(h-cut_y), 0:cut_x, :]
50 | d2 = inputs[1, (h-cut_y):, 0:cut_x, :]
51 | d3 = inputs[2, (h-cut_y):, cut_x:, :]
52 | d4 = inputs[3, :(h-cut_y), cut_x:, :]
53 |
54 | tmp1 = np.vstack((d1, d2))
55 | tmp2 = np.vstack((d4, d3))
56 |
57 | tmpx = np.hstack((tmp1, tmp2))
58 | tmpx = tmpx*255
59 | tmpy = target[0]*s1 + target[1]*s2 + target[2]*s3 + target[3]*s4
60 |
61 | cv2.imwrite("argumentation.jpg", tmpx)
62 | break
63 |
64 | steps -= 1
65 |
66 | #load_classification_data()
67 |
68 | def random_gen():
69 | return np.random.randint(10000)
70 |
71 | def rand_int(min, max):
72 | if max < min:
73 | min, max = max, min
74 |
75 | r = (random_gen()%(max - min + 1)) + min
76 | return r
77 |
78 | def random_float():
79 | return np.random.rand()
80 |
81 | def rand_uniform_strong(min, max):
82 | if (max < min):
83 | min, max = max, min
84 | return (random_float() * (max - min)) + min
85 |
86 | def rand_scale(s):
87 | scale = rand_uniform_strong(1, s)
88 | if(random_gen()%2):
89 | return scale
90 | return 1./scale
91 |
92 | def draw_boxes(images, boxes):
93 | for i in range(BATCH_SIZE):
94 | img = images[i].numpy()
95 | cv2.imwrite("hello.jpg", img)
96 | img = cv2.imread("hello.jpg")
97 | for j in range(len(boxes[i])):
98 | x = boxes[i][j][1]
99 | y = boxes[i][j][2]
100 | w = boxes[i][j][3]
101 | h = boxes[i][j][4]
102 |
103 | left = int((x - w / 2) * IMAGE_WIDTH)
104 | top = int((y - h / 2) * IMAGE_HEIGHT)
105 | right = int((x + w / 2) * IMAGE_WIDTH)
106 | bot = int((y + h / 2) * IMAGE_HEIGHT)
107 |
108 | cv2.rectangle(img, (left, top), (right, bot), (0,0,255), 2)
109 | cv2.resize(img,(224, 224))
110 | cv2.imwrite(str(i)+".jpg", img)
111 |
112 |
113 | def load_img(file_path):
114 |
115 | img_raw = tf.io.read_file(file_path)
116 | image = tf.io.decode_jpeg(img_raw, channels=CHANNELS)
117 | image = tf.image.adjust_saturation(image, rand_scale(data_factors.saturation))
118 | image = tf.image.adjust_hue(image, rand_uniform_strong(-1*data_factors.hue, data_factors.hue))
119 | image = tf.image.adjust_contrast(image, rand_scale(data_factors.exposure))
120 | #image = tf.image.resize_with_pad(image=image, target_height=IMAGE_HEIGHT, target_width=IMAGE_WIDTH)
121 | image = tf.image.resize(images=image, size=(IMAGE_HEIGHT,IMAGE_WIDTH))
122 |
123 | return image
124 |
125 | def merge_bboxes(bboxes, cutx, cuty):
126 | cutx = cutx / IMAGE_WIDTH
127 | cuty = cuty / IMAGE_HEIGHT
128 |
129 | merge_bbox = []
130 | for i in range(bboxes.shape[0]):
131 | for box in bboxes[i]:
132 | tmp_box = []
133 | x,y,w,h = box[1], box[2], box[3], box[4]
134 |
135 | if i == 0:
136 | if box[2]-box[4]/2 > cuty or box[1]-box[3]/2 > cutx:
137 | continue
138 |
139 | if box[2]+box[4]/2 > cuty and box[2]-box[4]/2 < cuty:
140 | h -= (box[2]+box[4]/2-cuty)
141 | y -= (box[2]+box[4]/2-cuty)/2
142 |
143 | if box[1]+box[3]/2 > cutx and box[1]-box[3]/2 < cutx:
144 | w -= (box[1]+box[3]/2-cutx)
145 | x -= (box[1]+box[3]/2-cutx)/2
146 |
147 | if i == 1:
148 | if box[2]+box[4]/2 < cuty or box[1]-box[3]/2 > cutx:
149 | continue
150 |
151 | if box[2]+box[4]/2 > cutx and box[2]-box[4]/2 < cutx:
152 | h -= (cuty-(box[2]-box[4]/2))
153 | y += (cuty-(box[2]-box[4]/2))/2
154 |
155 | if box[1]+box[3]/2 > cutx and box[1]-box[3]/2 < cutx:
156 | w -= (box[1]+box[3]/2-cutx)
157 | x -= (box[1]+box[3]/2-cutx)/2
158 |
159 | if i == 2:
160 | if box[2]+box[4]/2 < cuty or box[1]+box[3]/2 < cutx:
161 | continue
162 |
163 | if box[2]+box[4]/2 < 1 and box[2]-box[4]/2 < cuty:
164 | h -= (cuty-(box[2]-box[4]/2))
165 | y += (cuty-(box[2]-box[4]/2))/2
166 |
167 | if box[1]+box[3]/2 > cutx and box[1]-box[3]/2 < cutx:
168 | w -= (cutx-(box[1]-box[3]/2))
169 | x += (cutx-(box[1]-box[3]/2))/2
170 |
171 | if i == 3:
172 | if box[2]-box[4]/2 > cuty or box[1]+box[3]/2 < cutx:
173 | continue
174 |
175 | if box[2]+box[4]/2 > cuty and box[2]-box[4]/2 < cuty:
176 | h -= (box[2]+box[4]/2-cuty)
177 | y -= (box[2]+box[4]/2-cuty)/2
178 |
179 | if box[1]+box[3]/2 > cutx and box[1]-box[3]/2 < cutx:
180 | w -= (cutx-(box[1]-box[3]/2))
181 | x += (cutx-(box[1]-box[3]/2))/2
182 |
183 | tmp_box.append(box[0])
184 | tmp_box.append(x)
185 | tmp_box.append(y)
186 | tmp_box.append(w)
187 | tmp_box.append(h)
188 | merge_bbox.append(tmp_box)
189 |
190 | #TO DO:eliminate small boxes
191 | #may be no boxes
192 |
193 | if len(merge_bbox) == 0:
194 | return None
195 | else:
196 | return merge_bbox
197 |
198 | def mosaic_process(image_batch, label_batch):
199 | """default dataset: coco
200 | mosaic data argumentation
201 | >args
202 | -------
203 |
204 | """
205 | #usr_mix = 0 no mosaic use_mix = 3 use mosaic
206 |
207 | use_mix = 3
208 | #num of image
209 | n = len(image_batch)
210 |
211 | cut_x, cut_y = [0]*n, [0]*n
212 | random_index = random_gen()
213 | #if (random_index % 2 == 0): use_mix = 1
214 | if (use_mix == 3):
215 | min_offset = 0.2
216 | for i in range(n):
217 | h = IMAGE_HEIGHT
218 | w = IMAGE_WIDTH
219 | cut_x[i] = np.random.randint(int(w*min_offset), int(w*(1 - min_offset)))
220 | cut_y[i] = np.random.randint(int(h*min_offset), int(h*(1 - min_offset)))
221 | #cut_x[i] = random.uniform(min_offset, (1-min_offset))
222 | #cut_y[i] = random.uniform(min_offset, (1-min_offset))
223 |
224 | augmentation_calculated, gaussian_noise = 0, 0
225 |
226 | def get_random_paths():
227 | random_index = random.sample(list(range(n)), use_mix+1)
228 |
229 | random_paths = []
230 | random_bboxes = []
231 | for idx in random_index:
232 | random_paths.append(os.path.join(COCO_DIR, TRAIN_DIR, image_batch[idx]))
233 | random_bboxes.append(label_batch[idx])
234 | return random_paths, np.array(random_bboxes)
235 |
236 | #n images per batch, we also generate n images if mosaic
237 |
238 | if (use_mix == 3):
239 |
240 | dest = []
241 | new_boxes = []
242 | for i in range(n):
243 | paths, bboxes = get_random_paths()
244 | img0 = load_img(paths[0])
245 | img1 = load_img(paths[1])
246 | img2 = load_img(paths[2])
247 | img3 = load_img(paths[3])
248 |
249 | #cut and adjust
250 | d1 = img0[:cut_y[i], :cut_x[i], :]
251 | d2 = img1[cut_y[i]:, :cut_x[i], :]
252 | d3 = img2[cut_y[i]:, cut_x[i]:, :]
253 | d4 = img3[:cut_y[i], cut_x[i]:, :]
254 |
255 | tmp1 = tf.concat([d1, d2], axis=0)
256 | tmp2 = tf.concat([d4, d3], axis=0)
257 |
258 | dest.append(tf.concat([tmp1, tmp2], axis=1))
259 | #print(bboxes)
260 |
261 | tmp_boxes = (merge_bboxes(bboxes, cut_x[i], cut_y[i]))
262 | if not tmp_boxes:
263 | i = i - 1
264 | continue
265 | new_boxes.append(tmp_boxes)
266 |
267 | dest = tf.stack(dest)
268 |
269 | draw_boxes(dest, new_boxes)
270 | return dest, new_boxes
271 |
272 |
273 | if (use_mix == 0):
274 | dest = tf.zeros([n, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS])
275 | for i in range(n):
276 | paths, bboxes = get_random_paths()
277 | dest[i] = load_img(paths[0])
278 | new_boxes = label_batch
279 |
280 | return dest, new_boxes
281 |
282 |
283 | def get_length_of_dataset(dataset):
284 | count = 0
285 | for _ in dataset:
286 | count += 1
287 | return count
288 |
289 | def generate_dataset():
290 | txt_dataset = tf.data.TextLineDataset(filenames=TXT_DIR)
291 | train_count = get_length_of_dataset(txt_dataset)
292 | train_dataset = txt_dataset.batch(batch_size=BATCH_SIZE)
293 |
294 | return train_dataset, train_count
295 |
296 | def parse_dataset_batch(dataset):
297 | """
298 | Return :
299 | image_name_list : list, length is N (N is the batch size.)
300 | boxes_array : numpy.ndarrray, shape is (N, MAX_TRUE_BOX_NUM_PER_IMG, 5)
301 | """
302 | image_name_list = []
303 | boxes_list = []
304 | len_of_batch = dataset.shape[0]
305 | for i in range(len_of_batch):
306 | image_name, boxes = ReadTxt(line_bytes=dataset[i].numpy()).parse_line()
307 | image_name_list.append(image_name)
308 | boxes_list.append(boxes)
309 | boxes_array = np.array(boxes_list)
310 | return image_name_list, boxes_array
311 |
312 | if __name__ == "__main__":
313 | #get txt dataset which contains filename、boexs、label in text format
314 | train_dataset, train_count = generate_dataset()
315 |
316 | step = 0
317 | for dataset_batch in train_dataset:
318 | step += 1
319 | images, boxes = parse_dataset_batch(dataset=dataset_batch)
320 |
321 | images, boxes = mosaic_process(images, boxes)
322 | print(images.shape)
323 |
324 | #draw_boxes(images, boxes)
325 |
--------------------------------------------------------------------------------
/data_process.py:
--------------------------------------------------------------------------------
1 | from conf import COCO_DIR, COCO_CLASSES, IMAGE_HEIGHT, IMAGE_WIDTH
2 | import json
3 | from pathlib import Path
4 | import time
5 |
6 | class ResizeWithPad():
7 | def __init__(self, h, w):
8 | super(ResizeWithPad, self).__init__()
9 | self.H = IMAGE_HEIGHT
10 | self.W = IMAGE_WIDTH
11 | self.w = w
12 | self.h = h
13 |
14 | def get_transform_coefficient(self):
15 | if self.h <= self.w:
16 | longer_edge = "w"
17 | scale = self.W / self.w
18 | padding_length = (self.H - self.h * scale) / 2
19 | else:
20 | longer_edge = "h"
21 | scale = self.H / self.h
22 | padding_length = (self.W - self.w * scale) / 2
23 | return longer_edge, scale, padding_length
24 |
25 | def raw_to_resized(self, x, y, w, h):
26 | x = x + w / 2
27 | y = y + h / 2
28 | x = x / self.w
29 | y = y / self.h
30 | w = w / self.w
31 | h = h / self.h
32 | return x, y, w, h
33 |
34 | def resized_to_raw(self, center_x, center_y, width, height):
35 | longer_edge, scale, padding_length = self.get_transform_coefficient()
36 | center_x *= self.W
37 | width *= self.W
38 | center_y *= self.H
39 | height *= self.H
40 | if longer_edge == "h":
41 | center_x -= padding_length
42 | else:
43 | center_y -= padding_length
44 | center_x = center_x / scale
45 | center_y = center_y / scale
46 | width = width / scale
47 | height = height / scale
48 | return center_x, center_y, width, height
49 |
50 |
51 | class ParseCOCO(object):
52 | def __init__(self):
53 | self.annotation_dir = COCO_DIR + "annotations/"
54 | self.images_dir = COCO_DIR + "train2017/"
55 | self.train_annotation = Path(self.annotation_dir + "instances_train2017.json")
56 | start_time = time.time()
57 | self.train_dict = self.__load_json(self.train_annotation)
58 | print("It took {:.2f} seconds to load the json files.".format(time.time() - start_time))
59 | print(self.__get_category_id_information(self.train_dict))
60 |
61 | def __load_json(self, json_file):
62 | print("Start loading {}...".format(json_file.name))
63 | with json_file.open(mode='r') as f:
64 | load_dict = json.load(f)
65 | print("Loading is complete!")
66 | return load_dict
67 |
68 | def __find_all(self, x, value):
69 | list_data = []
70 | for i in range(len(x)):
71 | if x[i] == value:
72 | list_data.append(i)
73 | return list_data
74 |
75 | def __get_image_information(self, data_dict):
76 | images = data_dict["images"]
77 | image_file_list = []
78 | image_id_list = []
79 | image_height_list = []
80 | image_width_list = []
81 | for image in images:
82 | image_file_list.append(image["file_name"])
83 | image_id_list.append(image["id"])
84 | image_height_list.append(image["height"])
85 | image_width_list.append(image["width"])
86 | return image_file_list, image_id_list, image_height_list, image_width_list
87 |
88 | def __get_bounding_box_information(self, data_dict):
89 | annotations = data_dict["annotations"]
90 | image_id_list = []
91 | bbox_list = []
92 | category_id_list = []
93 | for annotation in annotations:
94 | category_id_list.append(annotation["category_id"])
95 | image_id_list.append(annotation["image_id"])
96 | bbox_list.append(annotation["bbox"])
97 | return image_id_list, bbox_list, category_id_list
98 |
99 | def __get_category_id_information(self, data_dict):
100 | categories = data_dict["categories"]
101 | category_dict = {}
102 | for category in categories:
103 | category_dict[category["name"]] = category["id"]
104 | return category_dict
105 |
106 | def __process_coord(self, x, y, w, h, image_width, image_height):
107 | x_center, y_center, w_norm, h_norm = ResizeWithPad(h=image_height, w=image_width).raw_to_resized(x, y, w, h)
108 | return x_center, y_center, w_norm, h_norm
109 |
110 | def __bbox_information(self, image_id, image_ids_from_annotation, bboxes, image_height, image_width, category_ids):
111 | processed_bboxes = []
112 | index_list = self.__find_all(x=image_ids_from_annotation, value=image_id)
113 | for index in index_list:
114 | x, y, w, h = bboxes[index]
115 |
116 | x_center, y_center, w_norm, h_norm = self.__process_coord(x, y, w, h, image_width, image_height)
117 | processed_bboxes.append([self.__category_id_transform(category_ids[index]), x_center, y_center, w_norm, h_norm])
118 | return processed_bboxes
119 |
120 | def __category_id_transform(self, original_id):
121 | category_id_dict = self.__get_category_id_information(self.train_dict)
122 | original_name = "none"
123 | for category_name, category_id in category_id_dict.items():
124 | if category_id == original_id:
125 | original_name = category_name
126 | if original_name == "none":
127 | raise ValueError("An error occurred while transforming the category id.")
128 | return COCO_CLASSES[original_name]
129 |
130 | def __bbox_str(self, bboxes):
131 | bbox_info = ""
132 | for bbox in bboxes:
133 | for item in bbox:
134 | bbox_info += str(item)
135 | bbox_info += " "
136 | return bbox_info.strip()
137 |
138 | def write_data_to_txt(self, txt_dir):
139 | image_files, image_ids, image_heights, image_widths = self.__get_image_information(self.train_dict)
140 | image_ids_from_annotation, bboxes, category_ids = self.__get_bounding_box_information(self.train_dict)
141 | with open(file=txt_dir, mode="a+") as f:
142 | picture_index = 0
143 | for i in range(len(image_files)):
144 | write_line_start_time = time.time()
145 | line_info = ""
146 | line_info += image_files[i] + " "
147 | processed_bboxes = self.__bbox_information(image_ids[i],
148 | image_ids_from_annotation,
149 | bboxes,
150 | image_heights[i],
151 | image_widths[i],
152 | category_ids)
153 | if processed_bboxes:
154 | picture_index += 1
155 | line_info += self.__bbox_str(bboxes=processed_bboxes)
156 | line_info += "\n"
157 | print("Writing information of the {}th picture {} to {}, which took {:.2f}s".format(picture_index, image_files[i], txt_dir, time.time() - write_line_start_time))
158 | f.write(line_info)
159 |
160 | coco = ParseCOCO()
161 | TXT_DIR = "./data.txt"
162 | coco.write_data_to_txt(TXT_DIR)
163 |
164 |
--------------------------------------------------------------------------------
/detect.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klauspa/Yolov4-tensorflow/802a9245c94983db7d702e9d5a62512a0539fe51/detect.jpg
--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from conf import ANCHORS, COCO_CLASSES, XYSCALE, IMAGE_HEIGHT, IMAGE_WIDTH, CATEGORY_NUM, STRIDES
4 | from model_infer import Yolo_Model, load_weights
5 | from prepost_process import postprocess_boxes, postprocess_bbbox, nms, draw_bbox, image_preporcess
6 | import argparse
7 | import cv2
8 | import time
9 |
10 | parser = argparse.ArgumentParser(description='yolov4 detect args')
11 | parser.add_argument('--image', type=str)
12 | parser.add_argument('--weight', type=str, default='yolov4.weights')
13 | args = parser.parse_args()
14 |
15 | if __name__ == "__main__":
16 | anchors = np.array(ANCHORS)
17 | anchors = np.reshape(anchors, [3, 3, 2])
18 | num_classes = len(COCO_CLASSES)
19 | xy_scale = XYSCALE
20 | input_size = IMAGE_WIDTH
21 |
22 | #input image path
23 | image_path = args.image
24 | img = cv2.imread(image_path)
25 | original_image = img
26 | original_image_size = img.shape[:2]
27 | image_data = image_preporcess(np.copy(original_image), [input_size, input_size])
28 |
29 | img_tensor = tf.convert_to_tensor(image_data, dtype=tf.float32)
30 | img_tensor = tf.expand_dims(img_tensor, axis=0)
31 |
32 | time_p1 = time.time()
33 | model = Yolo_Model()
34 |
35 | load_weights(model, args.weight)
36 | time_p2 = time.time()
37 |
38 | pred_bbox = model.predict(img_tensor)
39 | time_p3 = time.time()
40 |
41 | pred_bbox = postprocess_bbbox(pred_bbox, anchors, STRIDES, XYSCALE)
42 | bboxes = postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25)
43 | bboxes = nms(bboxes, 0.213, method='nms')
44 | time_p4 = time.time()
45 |
46 | image = draw_bbox(original_image, bboxes)
47 | cv2.imwrite("detect.jpg", image)
48 |
49 | print("load model: ", time_p2-time_p1)
50 | print("forward: ", time_p3-time_p2)
51 | print("post process: ", time_p4-time_p3)
52 |
53 |
--------------------------------------------------------------------------------
/kite.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klauspa/Yolov4-tensorflow/802a9245c94983db7d702e9d5a62512a0539fe51/kite.jpg
--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
1 | #ciou and diou loss python implementation
2 | import math
3 | pi = math.pi
4 | atan = math.atan
5 |
6 | class boxabs:
7 | left, right, top, bot = 0, 0, 0, 0
8 |
9 | class box:
10 | x, y, w, h = 0, 0, 0, 0
11 |
12 | def overlap(x1, w1, x2, w2):
13 | l1 = x1 - w1/2
14 | l2 = x2 - w2/2
15 | left = l1 if l1 - l2 > 0 else l2
16 | r1 = x1 + w1/2
17 | r2 = x2 + w2/2
18 | right = r1 if r1 - r2 < 0 else r2
19 | return right - left
20 |
21 | def box_intersection(a, b):
22 | """
23 | args:
24 | a type:box
25 | b type:box
26 | """
27 | w = overlap(a.x, a.w, b.x, b.w)
28 | h = overlap(a.y, a.h, b.y, b.h)
29 | if(w < 0 or h < 0):
30 | return 0
31 | area = w*h;
32 | return area
33 |
34 | def box_union(a, b):
35 | """
36 | args:
37 | a type:box
38 | b type:box
39 | """
40 | i = box_intersection(a, b)
41 | u = a.w*a.h + b.w*b.h - i
42 | return u
43 |
44 | def box_c(a, b):
45 | """
46 | arg: two boxes a b type: box
47 | return: smallest box that fully encompases a and b
48 | """
49 | ba = boxabs()
50 | ba.top = min(a.y - a.h / 2, b.y - b.h / 2)
51 | ba.bot = max(a.y + a.h / 2, b.y + b.h / 2)
52 | ba.left = min(a.x - a.w / 2, b.x - b.w / 2)
53 | ba.right = max(a.x + a.w / 2, b.x + b.w / 2)
54 | return ba
55 |
56 | def box_iou(a, b):
57 | """
58 | args:
59 | a type:box
60 | b type:box
61 | """
62 | I = box_intersection(a, b)
63 | U = box_union(a, b)
64 | if (I == 0 or U == 0):
65 | return 0
66 | return I / U
67 |
68 | def box_ciou(pred_box, gtbox):
69 | ba = box_c(pred_box, gtbox)
70 | w = ba.right - ba.left
71 | h = ba.bot - ba.top
72 | #Diagonal distance of ba
73 | c = w * w + h * h
74 | iou = box_iou(pred_box, gtbox)
75 | # w = 0. h = 0
76 | if c == 0:
77 | return iou
78 | #center point distance
79 | u = (pred_box.x - gtbox.x) * (pred_box.x - gtbox.x) + (pred_box.y - gtbox.y) * (pred_box.y - gtbox.y)
80 | d = u / c
81 | ar_gt = gtbox.w / gtbox.h
82 | ar_pred = pred_box.w / pred_box.h
83 | ar_loss = 4 / (pi * pi) * (atan(ar_gt) - atan(ar_pred)) * (atan(ar_gt) - atan(ar_pred))
84 | alpha = ar_loss / (1 - iou + ar_loss + 0.000001)
85 | ciou_term = d + alpha * ar_loss
86 | return iou - ciou_term
87 |
88 |
89 | def box_diou(pred_box, gtbox):
90 | ba = box_c(pred_box, gtbox)
91 | w = ba.right - ba.left
92 | h = ba.bot - ba.top
93 | c = w * w + h * h;
94 | iou = box_iou(pred_box, gtbox)
95 | if (c == 0):
96 | return iou
97 | d = (pred_box.x - gtbox.x) * (pred_box.x - gtbox.x) + (pred_box.y - gtbox.y) * (pred_box.y - gtbox.y)
98 | u = math.pow(d / c, 0.6)
99 | diou_term = u
100 |
101 | return iou - diou_term
102 |
103 | if __name__ == "__main__":
104 | pred_box = box()
105 | pred_box.x, pred_box.y, pred_box.w, pred_box.h = 0.4, 0.6, 0.3, 0.2
106 | gtbox = box()
107 | gtbox.x, gtbox.y, gtbox.w, gtbox.h = 0.5, 0.5, 0.4, 0.3
108 | print("diou loss:", box_diou(pred_box, gtbox))
109 | print("ciou loss:", box_ciou(pred_box, gtbox))
110 |
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import tensorflow_addons as tfa
3 |
4 | #hyper parameters
5 | batch = 64
6 | subdivisions=8
7 | width=608
8 | height=608
9 | channels=3
10 | momentum=0.949
11 | decay=0.0005
12 | angle=0
13 | saturation = 1.5
14 | exposure = 1.5
15 | hue=.1
16 |
17 | learning_rate=0.00261
18 | burn_in=1000
19 | max_batches = 500500
20 | #policy=steps
21 | steps=400000,450000
22 | scales=.1,.1
23 |
24 | #cutmix=1
25 | mosaic=1
26 |
27 | weightfile = "../yolov4.weights"
28 | import numpy as np
29 |
30 | def load_weight():
31 |
32 | print('Loading weights.')
33 | weights_file = fp = open(weightfile, 'rb')
34 | major, minor, revision = np.ndarray(
35 | shape=(3, ), dtype='int32', buffer=weights_file.read(12))
36 | if (major*10+minor)>=2 and major<1000 and minor<1000:
37 | seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8))
38 | else:
39 | seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4))
40 | print('Weights Header: ', major, minor, revision, seen)
41 | fp.close()
42 |
43 | def mish(x):
44 | return tfa.activations.mish(x)
45 |
46 | def leaky(x):
47 | return tf.nn.leaky_relu(x)
48 |
49 | def res_conn_block(filters, is_half):
50 | res = tf.keras.Sequential()
51 |
52 | res.add(Conv2D_BN_Mish(filters//2, kernel=1, strides=1))
53 | res.add(Conv2D_BN_Mish(filters//2 if is_half else filters, kernel=3, strides=1))
54 |
55 | return res
56 |
57 | class Conv2D_BN_Mish(tf.keras.Model):
58 | def __init__(self, filters, kernel, strides):
59 | super(Conv2D_BN_Mish, self).__init__()
60 | padding = 'valid' if strides == 2 else 'same'
61 | if strides == 2:
62 | self.conv2d_bn = tf.keras.Sequential([
63 | tf.keras.layers.ZeroPadding2D(((1,0),(1,0))),
64 | tf.keras.layers.Conv2D(filters = filters, kernel_size = kernel,
65 | strides = strides, padding = padding),
66 | tf.keras.layers.BatchNormalization(),
67 | ])
68 | else:
69 | self.conv2d_bn = tf.keras.Sequential([
70 | tf.keras.layers.Conv2D(filters = filters, kernel_size = kernel,
71 | strides = strides, padding = padding),
72 | tf.keras.layers.BatchNormalization(),
73 | ])
74 |
75 | def call(self, x):
76 | x = self.conv2d_bn(x)
77 | x = mish(x)
78 | return x
79 |
80 | class Conv2D_BN_Leaky(tf.keras.Model):
81 | def __init__(self, filters, kernel, strides):
82 | super(Conv2D_BN_Leaky, self).__init__()
83 |
84 | if strides == 2:
85 | self.conv2d_bn = tf.keras.Sequential([
86 | tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0))),
87 | tf.keras.layers.Conv2D(filters = filters, kernel_size = kernel,
88 | strides = strides, padding = 'valid'),
89 | tf.keras.layers.BatchNormalization(),
90 | tf.keras.layers.LeakyReLU(),
91 | ])
92 | else:
93 | self.conv2d_bn = tf.keras.Sequential([
94 | tf.keras.layers.Conv2D(filters = filters, kernel_size = kernel,
95 | strides = strides, padding = 'same'),
96 | tf.keras.layers.BatchNormalization(),
97 | tf.keras.layers.LeakyReLU(),
98 | ])
99 |
100 | def call(self, x):
101 | x = self.conv2d_bn(x)
102 | return x
103 |
104 | class ResBlock(tf.keras.Model):
105 | def __init__(self, filters, res_num, is_half):
106 | super(ResBlock, self).__init__()
107 | self.res_num = res_num
108 |
109 | self.pad_conv = Conv2D_BN_Mish(filters, kernel=3, strides=2)
110 | self.pred_block_conv = Conv2D_BN_Mish(filters//2 if is_half else filters, kernel=1, strides=1)
111 | self.res_conn_block = res_conn_block(filters, is_half)
112 | self.succ_block_conv = Conv2D_BN_Mish(filters//2 if is_half else filters, kernel=1, strides=1)
113 | self.right_conv = Conv2D_BN_Mish(filters//2 if is_half else filters, kernel=1, strides=1)
114 | self.after_concat_conv = Conv2D_BN_Mish(filters, kernel=1, strides=1)
115 |
116 | def call(self,x):
117 | pred_res = self.pad_conv(x)
118 | right_conv = self.right_conv(pred_res)
119 | left_conv = self.pred_block_conv(pred_res)
120 | for i in range(self.res_num):
121 | res_block_out = self.res_conn_block(left_conv)
122 | left_conv = left_conv + res_block_out
123 | left_conv = self.succ_block_conv(left_conv)
124 |
125 | concat_x = tf.concat([left_conv, right_conv], axis=-1)
126 | out = self.after_concat_conv(concat_x)
127 | return out
128 |
129 | def make_leaky_convs(layer_num, filters, strides=1):
130 |
131 | layers = tf.keras.Sequential()
132 | if layer_num == 1:
133 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides))
134 |
135 | if layer_num == 3:
136 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides))
137 | layers.add(Conv2D_BN_Leaky(filters*2, kernel=3, strides=strides))
138 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides))
139 |
140 | if layer_num == 5:
141 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides))
142 | layers.add(Conv2D_BN_Leaky(filters*2, kernel=3, strides=strides))
143 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides))
144 | layers.add(Conv2D_BN_Leaky(filters*2, kernel=3, strides=strides))
145 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides))
146 |
147 | return layers
148 |
149 | class spp(tf.keras.Model):
150 | def __init__(self):
151 | super(spp, self).__init__()
152 | self.pool1 = tf.keras.layers.MaxPooling2D((5,5), strides=1, padding='same')
153 | self.pool2 = tf.keras.layers.MaxPooling2D((9,9), strides=1, padding='same')
154 | self.pool3 = tf.keras.layers.MaxPooling2D((13,13), strides=1, padding='same')
155 |
156 | def call(self, x):
157 | return tf.concat([self.pool1(x), self.pool2(x), self.pool3(x), x], -1)
158 |
159 |
160 | class Yolo_Model(tf.keras.Model):
161 | def __init__(self,):
162 | super(Yolo_Model, self).__init__()
163 | self.conv_last1 = tf.keras.layers.Conv2D(255, kernel_size=1, padding='same')
164 | self.conv_last2 = tf.keras.layers.Conv2D(255, kernel_size=1, padding='same')
165 | self.conv_last3 = tf.keras.layers.Conv2D(255, kernel_size=1, padding='same')
166 | self.pad = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))
167 | self.first_conv = Conv2D_BN_Mish(filters=32, kernel=3, strides=1)
168 | self.res_block1 = ResBlock(64, 1, False)
169 | self.res_block2 = ResBlock(128, 2, True)
170 | self.res_block3 = ResBlock(256, 8, True)
171 |
172 | self.res_block4 = ResBlock(512, 8, False)
173 | self.res_block5 = ResBlock(1024, 4, False)
174 |
175 | self.conv_leaky3_1 = make_leaky_convs(3, 512)
176 | self.conv_leaky3_2 = make_leaky_convs(3, 512)
177 | self.conv_leaky1_1 = make_leaky_convs(1, 256)
178 | self.conv_leaky1_2 = make_leaky_convs(1, 128)
179 | self.conv_leaky1_3 = make_leaky_convs(1, 512)
180 | self.conv_leaky1_4 = make_leaky_convs(1, 1024)
181 | self.conv_leaky1_5 = make_leaky_convs(1, 256)
182 | self.conv_leaky1_6 = Conv2D_BN_Mish(256, 3, 2)
183 | self.conv_leaky1_7 = Conv2D_BN_Mish(256, 3, 2)
184 | self.conv_leaky5_1 = make_leaky_convs(5, 256)
185 | self.conv_leaky5_2= make_leaky_convs(5, 128)
186 | self.conv_leaky5_3= make_leaky_convs(5, 512)
187 | self.spp_layer = spp()
188 | self.upsampling = tf.keras.layers.UpSampling2D(2)
189 |
190 |
191 | def call(self, x):
192 | #cspdarknet53
193 | first_conv_out = self.first_conv(x)
194 | res_block1_out = self.res_block1(first_conv_out)
195 | res_block2_out = self.res_block2(res_block1_out)
196 | res_block3_out = self.res_block3(res_block2_out)
197 |
198 | intermediate_1 = res_block3_out
199 |
200 | res_block4_out = self.res_block4(res_block3_out)
201 |
202 | intermediate_2 = res_block4_out
203 |
204 | res_block5_out = self.res_block5(res_block4_out)
205 |
206 | #spp
207 | pred_spp = self.conv_leaky3_1(res_block5_out)
208 | spp_out = self.spp_layer(pred_spp)
209 |
210 | succ_spp = self.conv_leaky3_2(spp_out)
211 |
212 | intermediate_3 = succ_spp
213 |
214 | head2_1 = self.conv_leaky1_1(intermediate_2)
215 | head2_2 = self.conv_leaky1_1(intermediate_3)
216 | head2_2 = self.upsampling(head2_2)
217 | head2 = tf.concat([head2_1, head2_2], axis=-1)
218 | head2 = self.conv_leaky5_1(head2)
219 |
220 | intermediate_4 = head2
221 |
222 | head1_1 = self.conv_leaky1_2(intermediate_1)
223 | head1_2 = self.conv_leaky1_2(intermediate_4)
224 | head1_2 = self.upsampling(head1_2)
225 | head1 = tf.concat([head1_1, head1_2], axis=-1)
226 | head1 = self.conv_leaky5_2(head1)
227 |
228 | intermediate_5 = head1
229 |
230 | head1 = self.conv_leaky1_5(head1)
231 |
232 | head1_out = self.conv_last1(head1)
233 |
234 | head2_3 = self.conv_leaky1_6(intermediate_5)
235 |
236 | head2 = tf.concat([intermediate_4, head2_3], axis=-1)
237 | head2 = self.conv_leaky5_1(head2)
238 |
239 | intermediate_6 = head2
240 |
241 | head2 = self.conv_leaky1_3(head2)
242 | head2_out = self.conv_last2(head2)
243 |
244 | head3_2 = self.conv_leaky1_7(intermediate_6)
245 | head3 = tf.concat([intermediate_3, head3_2], axis=-1)
246 | head3 = self.conv_leaky5_3(head3)
247 | head3 = self.conv_leaky1_4(head3)
248 | head3_out = self.conv_last3(head3)
249 |
250 | return head1_out, head2_out, head3_out
251 |
252 | if __name__ == "__main__":
253 | model = Yolo_Model()
254 | x = tf.random.normal(shape=(1, 608, 608, 3))
255 | head1, head2, head3 = model(x)
256 |
257 | print("head1 shape: ", head1.shape)
258 | print("head2 shape: ", head2.shape)
259 | print("head3 shape: ", head3.shape)
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
--------------------------------------------------------------------------------
/model_infer.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from conf import IMAGE_HEIGHT, IMAGE_WIDTH, CATEGORY_NUM
3 | import numpy as np
4 |
5 | #mish activation
6 | def mish(x):
7 | return x*tf.tanh(tf.math.log(1+tf.exp(x)))
8 |
9 | class Mish(tf.keras.layers.Layer):
10 | def __init__(self):
11 | super(Mish, self).__init__()
12 | def call(self, x):
13 | return mish(x)
14 |
15 | #conv block with mish
16 | def single_conv_mish(inputs, filters, kernel, strides):
17 | padding = 'valid' if strides == 2 else 'same'
18 | if strides == 2:
19 | inputs = tf.keras.layers.ZeroPadding2D(((1,0),(1,0)))(inputs)
20 | out = tf.keras.layers.Conv2D(filters=filters, kernel_size=kernel, use_bias=False,
21 | strides=strides, padding=padding)(inputs)
22 | out = tf.keras.layers.BatchNormalization()(out)
23 | out = Mish()(out)
24 | return out
25 |
26 | #conv block with leaky
27 | def single_conv_leaky(inputs, filters, kernel, strides):
28 | padding = 'valid' if strides == 2 else 'same'
29 | if strides == 2:
30 | inputs = tf.keras.layers.ZeroPadding2D(((1,0),(1,0)))(inputs)
31 | out = tf.keras.layers.Conv2D(filters=filters, kernel_size=kernel, use_bias=False,
32 | strides=strides, padding=padding)(inputs)
33 | out = tf.keras.layers.BatchNormalization()(out)
34 | out = tf.keras.layers.LeakyReLU(0.1)(out)
35 | return out
36 |
37 | #res connection
38 | def res_conn_block(inputs, filters, is_half):
39 | out = single_conv_mish(inputs, filters//2, 1, 1)
40 | out = single_conv_mish(out, filters//2 if is_half else filters, 3, 1)
41 | return out
42 |
43 | #single res conn block
44 | def ResBlock(inputs, filters, res_num, is_half):
45 | downsample_out = single_conv_mish(inputs, filters, 3, 2)
46 | right_conv = single_conv_mish(downsample_out, filters//2 if is_half else filters, 1, 1)
47 |
48 | left_conv = single_conv_mish(downsample_out, filters//2 if is_half else filters, 1, 1)
49 | for i in range(res_num):
50 | res_intermidiate = res_conn_block(left_conv, filters, is_half)
51 | left_conv = left_conv + res_intermidiate
52 | left_conv = single_conv_mish(left_conv, filters//2 if is_half else filters, 1, 1)
53 | concat_out = tf.keras.layers.Concatenate()([left_conv, right_conv])
54 | out = single_conv_mish(concat_out, filters, 1, 1)
55 |
56 | return out
57 |
58 | #conv leaky stacked layers
59 | def make_leaky_convs(inputs, layer_num, filters, strides):
60 | if layer_num == 1:
61 | out = single_conv_leaky(inputs, filters, 1, strides)
62 |
63 | if layer_num == 3:
64 | out = single_conv_leaky(inputs, filters, 1, strides)
65 | out = single_conv_leaky(out, filters*2, 3, strides)
66 | out = single_conv_leaky(out, filters, 1, strides)
67 |
68 | if layer_num == 5:
69 | out = single_conv_leaky(inputs, filters, 1, strides)
70 | out = single_conv_leaky(out, filters*2, 3, strides)
71 | out = single_conv_leaky(out, filters, 1, strides)
72 | out = single_conv_leaky(out, filters*2, 3, strides)
73 | out = single_conv_leaky(out, filters, 1, strides)
74 |
75 | return out
76 |
77 | #spp module
78 | def spp_module(inputs):
79 | pool1 = tf.keras.layers.MaxPooling2D((13,13), strides=1, padding='same')(inputs)
80 | pool2 = tf.keras.layers.MaxPooling2D((9,9), strides=1, padding='same')(inputs)
81 | pool3 = tf.keras.layers.MaxPooling2D((5,5), strides=1, padding='same')(inputs)
82 | out = tf.keras.layers.Concatenate()([pool1, pool2, pool3, inputs])
83 | return out
84 |
85 | #transorm yolo feature map
86 | #reference: https://github.com/hunglc007/tensorflow-yolov4-tflite
87 | def transform(conv_output, NUM_CLASS, i=0):
88 | """
89 | return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes]
90 | contains (x, y, w, h, score, probability)
91 | """
92 | conv_shape = tf.shape(conv_output)
93 | batch_size = conv_shape[0]
94 | output_size = conv_shape[1]
95 |
96 | conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
97 | conv_raw_xywh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (4, 1, NUM_CLASS), axis=-1)
98 |
99 | pred_conf = tf.sigmoid(conv_raw_conf)
100 | pred_prob = tf.sigmoid(conv_raw_prob)
101 |
102 | return tf.concat([conv_raw_xywh, pred_conf, pred_prob], axis=-1)
103 |
104 | #load weights
105 | #reference: https://github.com/hunglc007/tensorflow-yolov4-tflite
106 | def load_weights(model, weights_file):
107 | wf = open(weights_file, 'rb')
108 | major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)
109 |
110 | j = 0
111 | for i in range(110):
112 | conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d'
113 | bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization'
114 |
115 | conv_layer = model.get_layer(conv_layer_name)
116 | filters = conv_layer.filters
117 | k_size = conv_layer.kernel_size[0]
118 | in_dim = conv_layer.input_shape[-1]
119 |
120 | if i not in [93, 101, 109]:
121 | # darknet weights: [beta, gamma, mean, variance]
122 | bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
123 | # tf weights: [gamma, beta, mean, variance]
124 | bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
125 | bn_layer = model.get_layer(bn_layer_name)
126 | j += 1
127 | else:
128 | conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
129 |
130 | # darknet shape (out_dim, in_dim, height, width)
131 | conv_shape = (filters, in_dim, k_size, k_size)
132 | conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape))
133 | # tf shape (height, width, in_dim, out_dim)
134 | conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
135 |
136 | if i not in [93, 101, 109]:
137 | conv_layer.set_weights([conv_weights])
138 | bn_layer.set_weights(bn_weights)
139 | else:
140 | conv_layer.set_weights([conv_weights, conv_bias])
141 |
142 | assert len(wf.read()) == 0, 'failed to read all data'
143 | print("load OK")
144 | wf.close()
145 |
146 |
147 | def yolo_body(inputs, classes):
148 | #cspdarknet53
149 | first_conv = single_conv_mish(inputs, 32, 3, 1)
150 | res_block_1 = ResBlock(first_conv, 64, 1, False)
151 | res_block_2 = ResBlock(res_block_1, 128, 2, True)
152 | res_block_3 = ResBlock(res_block_2, 256, 8, True)
153 |
154 | intermediate_1 = res_block_3
155 |
156 | res_block_4 = ResBlock(res_block_3, 512, 8, True)
157 |
158 | intermediate_2 = res_block_4
159 |
160 | res_block_5 = ResBlock(res_block_4, 1024, 4, True)
161 |
162 | pred_spp = make_leaky_convs(res_block_5, 3, 512, 1)
163 | spp_out = spp_module(pred_spp)
164 | succ_spp = make_leaky_convs(spp_out, 3, 512, 1)
165 |
166 | intermediate_3 = succ_spp
167 |
168 | head2_right = make_leaky_convs(intermediate_3, 1, 256, 1)
169 | head2_right = tf.keras.layers.UpSampling2D()(head2_right)
170 | head2_left = make_leaky_convs(intermediate_2, 1, 256, 1)
171 | head2 = tf.keras.layers.Concatenate()([head2_left, head2_right])
172 | head2 = make_leaky_convs(head2, 5, 256, 1)
173 |
174 | intermediate_4 = head2
175 |
176 | head1_right = make_leaky_convs(intermediate_4, 1, 128, 1)
177 | head1_right = tf.keras.layers.UpSampling2D()(head1_right)
178 | head1_left = make_leaky_convs(intermediate_1, 1, 128, 1)
179 | head1 = tf.keras.layers.Concatenate()([head1_left, head1_right])
180 | head1 = make_leaky_convs(head1, 5, 128, 1) #conv92
181 |
182 | intermediate_5 = head1
183 |
184 | head1 = single_conv_leaky(head1, 256, 3, 1)
185 | head1_out = tf.keras.layers.Conv2D(3*(4+1+classes), kernel_size=1, padding='same')(head1)
186 |
187 | head2_side = single_conv_leaky(intermediate_5, 256, 3, 2)
188 | head2 = tf.keras.layers.Concatenate()([head2_side, intermediate_4])
189 | head2 = make_leaky_convs(head2, 5, 256, 1)
190 |
191 | intermediate_6 = head2
192 |
193 | head2 = single_conv_leaky(head2, 512, 3, 1)
194 | head2_out = tf.keras.layers.Conv2D(3*(4+1+classes), kernel_size=1, padding='same')(head2)
195 |
196 | head3_right = single_conv_leaky(intermediate_6, 512, 3, 2)
197 | head3 = tf.keras.layers.Concatenate()([head3_right, intermediate_3])
198 | head3 = make_leaky_convs(head3, 5, 512, 1)
199 | head3 = single_conv_leaky(head3, 1024, 3, 1)
200 | head3_out = tf.keras.layers.Conv2D(3*(4+1+classes), kernel_size=1, padding='same')(head3)
201 |
202 | conv_out = [head1_out, head2_out, head3_out]
203 |
204 | return conv_out
205 |
206 | def Yolo_Model():
207 | inputs = tf.keras.layers.Input(shape=[IMAGE_WIDTH, IMAGE_HEIGHT, 3])
208 | yolobody_out = yolo_body(inputs, CATEGORY_NUM)
209 | conv_outs = []
210 | for i, conv_out in enumerate(yolobody_out):
211 | transformed_out = transform(conv_out, CATEGORY_NUM, i)
212 | conv_outs.append(transformed_out)
213 |
214 | return tf.keras.Model(inputs=inputs, outputs=conv_outs)
215 |
216 | #simple test
217 | if __name__ == "__main__":
218 | inputs = tf.keras.layers.Input(shape=(608, 608, 3))
219 | outputs = Yolo_Model(inputs, 80)
220 | model = tf.keras.Model(inputs=inputs, outputs=outputs)
221 | x = tf.random.normal(shape=(1, 608, 608, 3))
222 | out = model(x)
223 | head1, head2, head3 = out[0], out[1], out[2]
224 |
225 | print("head1 shape: ", head1.shape)
226 | print("head2 shape: ", head2.shape)
227 | print("head3 shape: ", head3.shape)
--------------------------------------------------------------------------------
/mosaic_argumentation.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/klauspa/Yolov4-tensorflow/802a9245c94983db7d702e9d5a62512a0539fe51/mosaic_argumentation.jpg
--------------------------------------------------------------------------------
/nms.py:
--------------------------------------------------------------------------------
1 | #greedy nms tensorflow implementation
2 | import tensorflow as tf
3 |
4 | CONFIDENCE_THRESHOLD = 0.45
5 | NUM_CLASS = 80
6 | MAX_BOX_NUM = 20
7 |
8 | def xywh2xyxy(x):
9 | y = x.new(x.shape)
10 | y[..., 0] = x[..., 0] - x[..., 2] / 2
11 | y[..., 1] = x[..., 1] - x[..., 3] / 2
12 | y[..., 2] = x[..., 0] + x[..., 2] / 2
13 | y[..., 3] = x[..., 1] + x[..., 3] / 2
14 | return y
15 |
16 | def nms(pred_boxes, conf_thres=0.5, nms_thres=0.4):
17 | pred_boxes[..., :4] = xywh2xyxy(pred_boxes[..., :4])
18 | output = [None for _ in range(len(pred_boxes))]
19 |
20 | for image_i, image_pred in enumerate(pred_boxes):
21 | # Filter out confidence scores below threshold
22 | image_pred = image_pred[image_pred[:, 4] >= conf_thres]
23 | # If none are remaining => process next image
24 | if not image_pred.size(0):
25 | continue
26 | # Object confidence times class confidence
27 | score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
28 | # Sort by it
29 | image_pred = image_pred[(-score).argsort()]
30 | boxes = image_pred[:, :4]
31 | scores = image_pred[:, 4]
32 | selected_indices = tf.image.non_max_suppression(
33 | boxes, scores, MAX_BOX_NUM, nms_thres
34 | )
35 | selected_boxes = tf.gather(boxes, selected_indices)
36 | selected_scores = tf.gather(scores, selected_indices)
--------------------------------------------------------------------------------
/prepost_process.py:
--------------------------------------------------------------------------------
1 | #reference: https://github.com/hunglc007/tensorflow-yolov4-tflite
2 | import tensorflow as tf
3 | import numpy as np
4 | import time
5 | import random
6 | import colorsys
7 | import cv2
8 | from conf import COCO_CLASSES
9 |
10 | def image_preporcess(image, target_size, gt_boxes=None):
11 |
12 | ih, iw = target_size
13 | h, w, _ = image.shape
14 |
15 | scale = min(iw/w, ih/h)
16 | nw, nh = int(scale * w), int(scale * h)
17 | image_resized = cv2.resize(image, (nw, nh))
18 |
19 | image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0)
20 | dw, dh = (iw - nw) // 2, (ih-nh) // 2
21 | image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized
22 | image_paded = image_paded / 255.
23 |
24 | if gt_boxes is None:
25 | return image_paded
26 |
27 | else:
28 | gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
29 | gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
30 | return image_paded, gt_boxes
31 |
32 | def process_feature(conv_output, NUM_CLASS, STRIDES, ANCHORS, CONF_THRESH, i=0, XYSCALE=[1,1,1]):
33 | conv_shape = tf.shape(conv_output)
34 | batch_size = conv_shape[0]
35 | output_size = conv_shape[1]
36 |
37 | conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
38 | conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (2, 2, 1, NUM_CLASS), axis=-1)
39 |
40 | x = tf.tile(tf.expand_dims(tf.range(output_size, dtype=tf.int32), axis=0), [output_size, 1])
41 | y = tf.tile(tf.expand_dims(tf.range(output_size, dtype=tf.int32), axis=1), [1, output_size])
42 | xy_grid = tf.expand_dims(tf.stack([x, y], axis=-1), axis=2)
43 |
44 |
45 | xy_grid = tf.tile(tf.expand_dims(xy_grid, axis=0), [batch_size, 1, 1, 3, 1])
46 | xy_grid = tf.cast(xy_grid, tf.float32)
47 |
48 | pred_xy = ((tf.sigmoid(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * STRIDES[i]
49 | pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i])
50 | pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
51 |
52 | pred_conf = tf.sigmoid(conv_raw_conf)
53 | pred_prob = tf.sigmoid(conv_raw_prob)
54 |
55 | return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
56 |
57 | def postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=[1,1,1]):
58 | for i, pred in enumerate(pred_bbox):
59 | conv_shape = pred.shape
60 | output_size = conv_shape[1]
61 | conv_raw_dxdy = pred[:, :, :, :, 0:2]
62 | conv_raw_dwdh = pred[:, :, :, :, 2:4]
63 | xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size))
64 | xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2) # [gx, gy, 1, 2]
65 |
66 | xy_grid = np.tile(tf.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1])
67 | xy_grid = xy_grid.astype(np.float)
68 |
69 | # pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
70 | pred_xy = ((tf.sigmoid(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * STRIDES[i]
71 | # pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]
72 | pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i])
73 | pred[:, :, :, :, 0:4] = tf.concat([pred_xy, pred_wh], axis=-1)
74 |
75 |
76 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
77 | pred_bbox = tf.concat(pred_bbox, axis=0)
78 | return pred_bbox
79 |
80 | def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):
81 |
82 | valid_scale=[0, np.inf]
83 | pred_bbox = np.array(pred_bbox)
84 |
85 | pred_xywh = pred_bbox[:, 0:4]
86 | pred_conf = pred_bbox[:, 4]
87 |
88 | pred_prob = pred_bbox[:, 5:]
89 |
90 | # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax)
91 | pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
92 | pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
93 |
94 | # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
95 | org_h, org_w = org_img_shape
96 | resize_ratio = min(input_size / org_w, input_size / org_h)
97 |
98 | dw = (input_size - resize_ratio * org_w) / 2
99 | dh = (input_size - resize_ratio * org_h) / 2
100 |
101 | pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
102 | pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
103 |
104 | # # (3) clip some boxes those are out of range
105 | pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
106 | np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
107 | invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
108 | pred_coor[invalid_mask] = 0
109 |
110 | # # (4) discard some invalid boxes
111 | bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
112 | scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
113 |
114 | # # (5) discard some boxes with low scores
115 | classes = np.argmax(pred_prob, axis=-1)
116 | scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
117 | # scores = pred_prob[np.arange(len(pred_coor)), classes]
118 | score_mask = scores > score_threshold
119 | mask = np.logical_and(scale_mask, score_mask)
120 | coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
121 |
122 | return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
123 |
124 | def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
125 | """
126 | :param bboxes: (xmin, ymin, xmax, ymax, score, class)
127 |
128 | Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
129 | https://github.com/bharatsingh430/soft-nms
130 | """
131 | classes_in_img = list(set(bboxes[:, 5]))
132 | best_bboxes = []
133 |
134 | for cls in classes_in_img:
135 | cls_mask = (bboxes[:, 5] == cls)
136 | cls_bboxes = bboxes[cls_mask]
137 |
138 | while len(cls_bboxes) > 0:
139 | max_ind = np.argmax(cls_bboxes[:, 4])
140 | best_bbox = cls_bboxes[max_ind]
141 | best_bboxes.append(best_bbox)
142 | cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
143 | iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
144 | weight = np.ones((len(iou),), dtype=np.float32)
145 |
146 | assert method in ['nms', 'soft-nms']
147 |
148 | if method == 'nms':
149 | iou_mask = iou > iou_threshold
150 | weight[iou_mask] = 0.0
151 |
152 | if method == 'soft-nms':
153 | weight = np.exp(-(1.0 * iou ** 2 / sigma))
154 |
155 | cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
156 | score_mask = cls_bboxes[:, 4] > 0.
157 | cls_bboxes = cls_bboxes[score_mask]
158 |
159 | return best_bboxes
160 |
161 | def bboxes_iou(boxes1, boxes2):
162 |
163 | boxes1 = np.array(boxes1)
164 | boxes2 = np.array(boxes2)
165 |
166 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
167 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
168 |
169 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
170 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
171 |
172 | inter_section = np.maximum(right_down - left_up, 0.0)
173 | inter_area = inter_section[..., 0] * inter_section[..., 1]
174 | union_area = boxes1_area + boxes2_area - inter_area
175 | ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
176 |
177 | return ious
178 |
179 | def draw_bbox(image, bboxes, classes=COCO_CLASSES, show_label=True):
180 | """
181 | bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
182 | """
183 |
184 | num_classes = len(classes)
185 | image_h, image_w, _ = image.shape
186 | hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
187 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
188 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
189 |
190 | random.seed(0)
191 | random.shuffle(colors)
192 | random.seed(None)
193 |
194 | for i, bbox in enumerate(bboxes):
195 | coor = np.array(bbox[:4], dtype=np.int32)
196 | fontScale = 0.5
197 | score = bbox[4]
198 | class_ind = int(bbox[5])
199 | bbox_color = colors[class_ind]
200 | bbox_thick = int(0.6 * (image_h + image_w) / 600)
201 | c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
202 | cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
203 |
204 | if show_label:
205 | bbox_mess = '%s: %.2f' % (classes[class_ind], score)
206 | t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
207 | cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled
208 |
209 | cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
210 | fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)
211 |
212 | return image
213 |
214 |
--------------------------------------------------------------------------------
/read_txt.py:
--------------------------------------------------------------------------------
1 | from conf import MAX_TRUE_BOX_NUM_PER_IMG
2 |
3 | class ReadTxt(object):
4 | def __init__(self, line_bytes):
5 | super(ReadTxt, self).__init__()
6 | # bytes -> string
7 | self.line_str = bytes.decode(line_bytes, encoding="utf-8")
8 |
9 | def parse_line(self):
10 | line_info = self.line_str.strip('\n')
11 | split_line = line_info.split(" ")
12 | box_num = (len(split_line) - 1) / 5
13 | image_name = split_line[0]
14 | # print("Reading {}".format(image_name))
15 | split_line = split_line[1:]
16 | boxes = []
17 | for i in range(MAX_TRUE_BOX_NUM_PER_IMG):
18 | if i < box_num:
19 | box_xmin = float(split_line[i * 5 + 1])
20 | box_ymin = float(split_line[i * 5 + 2])
21 | box_xmax = float(split_line[i * 5 + 3])
22 | box_ymax = float(split_line[i * 5 + 4])
23 | class_id = int(split_line[i * 5])
24 | boxes.append([class_id, box_xmin, box_ymin, box_xmax, box_ymax])
25 | """
26 | else:
27 | box_xmin = 0
28 | box_ymin = 0
29 | box_xmax = 0
30 | box_ymax = 0
31 | class_id = 0
32 | boxes.append([box_xmin, box_ymin, box_xmax, box_ymax, class_id])
33 | """
34 |
35 | return image_name, boxes
--------------------------------------------------------------------------------
/torchx.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: argoproj.io/v1alpha1
2 | kind: Workflow
3 | metadata:
4 | generateName: pipeline-
5 | annotations: {pipelines.kubeflow.org/kfp_sdk_version: 1.6.2, pipelines.kubeflow.org/pipeline_compilation_time: '2021-12-16T17:39:24.231309',
6 | pipelines.kubeflow.org/pipeline_spec: '{"name": "Pipeline"}'}
7 | labels: {pipelines.kubeflow.org/kfp_sdk_version: 1.6.2}
8 | spec:
9 | entrypoint: pipeline
10 | templates:
11 | - name: cv-interpret-worker
12 | container:
13 | args: []
14 | command: [python, -m, torchx.examples.apps.lightning_classy_vision.interpret,
15 | --load_path, /tmp/output/models/last.ckpt, --output_path, /tmp/output/interpret,
16 | --data_path, /tmp/output/processed]
17 | image: ghcr.io/pytorch/torchx:0.1.1
18 | resources:
19 | limits: {cpu: 1000m, memory: 1024M}
20 | requests: {cpu: 1000m, memory: 1024M}
21 | tty: true
22 | metadata:
23 | labels: {torchx.pytorch.org/version: 0.1.1, torchx.pytorch.org/app-name: cv-interpret,
24 | torchx.pytorch.org/role-index: '0', torchx.pytorch.org/role-name: worker,
25 | torchx.pytorch.org/replica-id: '0', pipelines.kubeflow.org/kfp_sdk_version: 1.6.2,
26 | pipelines.kubeflow.org/pipeline-sdk-type: kfp}
27 | annotations: {pipelines.kubeflow.org/component_spec: '{"description": "KFP wrapper
28 | for TorchX component cv-interpret, role worker", "implementation": {"container":
29 | {"command": ["python", "-m", "torchx.examples.apps.lightning_classy_vision.interpret",
30 | "--load_path", "/tmp/output/models/last.ckpt", "--output_path", "/tmp/output/interpret",
31 | "--data_path", "/tmp/output/processed"], "env": {}, "image": "ghcr.io/pytorch/torchx:0.1.1"}},
32 | "name": "cv-interpret-worker", "outputs": []}', pipelines.kubeflow.org/component_ref: '{"digest":
33 | "a21fbc29a0eb30707292ef6dda4ae8f46eed5ecbc7ec852f166d018d5b09fed4"}'}
34 | - name: cv-trainer-worker
35 | container:
36 | args: []
37 | command: [python, -m, torchx.examples.apps.lightning_classy_vision.train, --load_path,
38 | '', --log_path, /tmp/output/logs, --epochs, '1', --output_path, /tmp/output/models,
39 | --num_samples, '200', --data_path, /tmp/output/processed]
40 | image: ghcr.io/pytorch/torchx:0.1.1
41 | resources:
42 | limits: {cpu: 1000m, memory: 4000M}
43 | requests: {cpu: 1000m, memory: 4000M}
44 | tty: true
45 | volumeMounts:
46 | - {mountPath: /tmp/, name: tmp}
47 | outputs:
48 | artifacts:
49 | - {name: mlpipeline-ui-metadata, path: /tmp/outputs/mlpipeline-ui-metadata/data.json}
50 | metadata:
51 | labels: {torchx.pytorch.org/version: 0.1.1, torchx.pytorch.org/app-name: cv-trainer,
52 | torchx.pytorch.org/role-index: '0', torchx.pytorch.org/role-name: worker,
53 | torchx.pytorch.org/replica-id: '0', pipelines.kubeflow.org/kfp_sdk_version: 1.6.2,
54 | pipelines.kubeflow.org/pipeline-sdk-type: kfp}
55 | annotations: {pipelines.kubeflow.org/component_spec: '{"description": "KFP wrapper
56 | for TorchX component cv-trainer, role worker", "implementation": {"container":
57 | {"command": ["python", "-m", "torchx.examples.apps.lightning_classy_vision.train",
58 | "--load_path", "", "--log_path", "/tmp/output/logs", "--epochs", "1", "--output_path",
59 | "/tmp/output/models", "--num_samples", "200", "--data_path", "/tmp/output/processed"],
60 | "env": {}, "image": "ghcr.io/pytorch/torchx:0.1.1"}}, "name": "cv-trainer-worker",
61 | "outputs": [{"description": "ui metadata", "name": "mlpipeline-ui-metadata",
62 | "type": "MLPipeline UI Metadata"}]}', pipelines.kubeflow.org/component_ref: '{"digest":
63 | "24a9c860e919337e5ff14db25ae7703cf9b9b2eaeeeb1a6b982ae4be3e9a0e09"}'}
64 | sidecars:
65 | - command: [sh, -c, 'mkdir -p /tmp/outputs/mlpipeline-ui-metadata; echo ''{"outputs":
66 | [{"type": "tensorboard", "source": "/tmp/output/logs/lightning_logs"}]}''
67 | > /tmp/outputs/mlpipeline-ui-metadata/data.json']
68 | image: alpine
69 | name: ui-metadata-sidecar
70 | mirrorVolumeMounts: true
71 | volumes:
72 | - emptyDir: {}
73 | name: tmp
74 | - name: datapreproc-worker
75 | container:
76 | args: []
77 | command: [python, -m, torchx.examples.apps.datapreproc.datapreproc, --input_path,
78 | /tmp/output/tiny-imagenet-200.zip, --output_path, /tmp/output/processed]
79 | image: ghcr.io/pytorch/torchx:0.1.1
80 | resources:
81 | limits: {cpu: 1000m, memory: 1024M}
82 | requests: {cpu: 1000m, memory: 1024M}
83 | tty: true
84 | metadata:
85 | labels: {torchx.pytorch.org/version: 0.1.1, torchx.pytorch.org/app-name: datapreproc,
86 | torchx.pytorch.org/role-index: '0', torchx.pytorch.org/role-name: worker,
87 | torchx.pytorch.org/replica-id: '0', pipelines.kubeflow.org/kfp_sdk_version: 1.6.2,
88 | pipelines.kubeflow.org/pipeline-sdk-type: kfp}
89 | annotations: {pipelines.kubeflow.org/component_spec: '{"description": "KFP wrapper
90 | for TorchX component datapreproc, role worker", "implementation": {"container":
91 | {"command": ["python", "-m", "torchx.examples.apps.datapreproc.datapreproc",
92 | "--input_path", "/tmp/output/tiny-imagenet-200.zip", "--output_path", "/tmp/output/processed"],
93 | "env": {}, "image": "ghcr.io/pytorch/torchx:0.1.1"}}, "name": "datapreproc-worker",
94 | "outputs": []}', pipelines.kubeflow.org/component_ref: '{"digest": "306620766906929f355231a5a37dfedf5faff9bf03d1b6f648fe8d6765633e78"}'}
95 | - name: pipeline
96 | dag:
97 | tasks:
98 | - name: cv-interpret-worker
99 | template: cv-interpret-worker
100 | dependencies: [cv-trainer-worker]
101 | - name: cv-trainer-worker
102 | template: cv-trainer-worker
103 | dependencies: [datapreproc-worker]
104 | - name: datapreproc-worker
105 | template: datapreproc-worker
106 | dependencies: [torchx-utils-copy-torchx-utils-copy]
107 | - name: torchx-torchserve-worker
108 | template: torchx-torchserve-worker
109 | dependencies: [cv-trainer-worker]
110 | - {name: torchx-utils-copy-torchx-utils-copy, template: torchx-utils-copy-torchx-utils-copy}
111 | - name: torchx-torchserve-worker
112 | container:
113 | args: []
114 | command: [python, -m, torchx.apps.serve.serve, --model_path, /tmp/output/models/model.mar,
115 | --management_api, 'http://torchserve.default.svc.cluster.local:8081', --model_name,
116 | tiny_image_net]
117 | image: ghcr.io/pytorch/torchx:0.1.1
118 | ports:
119 | - {containerPort: 8222, name: model-download}
120 | tty: true
121 | metadata:
122 | labels: {torchx.pytorch.org/version: 0.1.1, torchx.pytorch.org/app-name: torchx-torchserve,
123 | torchx.pytorch.org/role-index: '0', torchx.pytorch.org/role-name: worker,
124 | torchx.pytorch.org/replica-id: '0', pipelines.kubeflow.org/kfp_sdk_version: 1.6.2,
125 | pipelines.kubeflow.org/pipeline-sdk-type: kfp}
126 | annotations: {pipelines.kubeflow.org/component_spec: '{"description": "KFP wrapper
127 | for TorchX component torchx-torchserve, role worker", "implementation":
128 | {"container": {"command": ["python", "-m", "torchx.apps.serve.serve", "--model_path",
129 | "/tmp/output/models/model.mar", "--management_api", "http://torchserve.default.svc.cluster.local:8081",
130 | "--model_name", "tiny_image_net"], "env": {}, "image": "ghcr.io/pytorch/torchx:0.1.1"}},
131 | "name": "torchx-torchserve-worker", "outputs": []}', pipelines.kubeflow.org/component_ref: '{"digest":
132 | "7b971194b2a921896419135a1a663036634aa9a40ae15578e7fdb60f38109351"}'}
133 | - name: torchx-utils-copy-torchx-utils-copy
134 | container:
135 | args: []
136 | command: [python, -m, torchx.apps.utils.copy_main, --src, 'http://cs231n.stanford.edu/tiny-imagenet-200.zip',
137 | --dst, /tmp/output/tiny-imagenet-200.zip]
138 | image: ghcr.io/pytorch/torchx:0.1.1
139 | tty: true
140 | metadata:
141 | labels: {torchx.pytorch.org/version: 0.1.1, torchx.pytorch.org/app-name: torchx-utils-copy,
142 | torchx.pytorch.org/role-index: '0', torchx.pytorch.org/role-name: torchx-utils-copy,
143 | torchx.pytorch.org/replica-id: '0', pipelines.kubeflow.org/kfp_sdk_version: 1.6.2,
144 | pipelines.kubeflow.org/pipeline-sdk-type: kfp}
145 | annotations: {pipelines.kubeflow.org/component_spec: '{"description": "KFP wrapper
146 | for TorchX component torchx-utils-copy, role torchx-utils-copy", "implementation":
147 | {"container": {"command": ["python", "-m", "torchx.apps.utils.copy_main",
148 | "--src", "http://cs231n.stanford.edu/tiny-imagenet-200.zip", "--dst", "/tmp/output/tiny-imagenet-200.zip"],
149 | "env": {}, "image": "ghcr.io/pytorch/torchx:0.1.1"}}, "name": "torchx-utils-copy-torchx-utils-copy",
150 | "outputs": []}', pipelines.kubeflow.org/component_ref: '{"digest": "390cfaf8fe8483cb9182e4a8fcccf9fcac9fdfaaff594e7bdb2236dacc367bc7"}'}
151 | arguments:
152 | parameters: []
153 | serviceAccountName: pipeline-runner
154 |
--------------------------------------------------------------------------------