├── README.md ├── argumentation.jpg ├── conf.py ├── data.py ├── data_process.py ├── detect.jpg ├── detect.py ├── kite.jpg ├── loss.py ├── model.py ├── model_infer.py ├── mosaic_argumentation.jpg ├── nms.py ├── prepost_process.py ├── read_txt.py └── torchx.yaml /README.md: -------------------------------------------------------------------------------- 1 | # Yolov4-tensorflow 2 | tensorflow implementation for Yolo v4 3 | 4 | dependencies: 5 | 6 | tensorflow2.x 7 | opencv 8 | 9 | 10 | # Mosaic data argumentation 11 | data.py added Mosaic data argumentation, for imagenet classfication and object detection 12 | 13 | this mosaic data argumentation is not exactly the same as original yolov4 implementation, but much close to that and I will work on that 14 | 15 |

16 | 17 |

18 | 19 | 20 | # Inference 21 | some postprocess code borrowed [here](https://github.com/hunglc007/tensorflow-yolov4-tflite) and will update own version 22 | 23 | run: 24 | ```bash 25 | python detect.py --image ./kite.jpg 26 | ``` 27 | demo: 28 |

29 | 30 | -------------------------------------------------------------------------------- /argumentation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klauspa/Yolov4-tensorflow/802a9245c94983db7d702e9d5a62512a0539fe51/argumentation.jpg -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | #inference 2 | XYSCALE = [1.2, 1.1, 1.05] 3 | 4 | # training 5 | EPOCHS = 1000 6 | BATCH_SIZE = 4 7 | load_weights_before_training = False 8 | load_weights_from_epoch = 10 9 | STRIDES = [8, 16, 32] 10 | 11 | ANCHORS = [12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401] 12 | 13 | # input image 14 | IMAGE_HEIGHT = 608 15 | IMAGE_WIDTH = 608 16 | CHANNELS = 3 17 | 18 | # Dataset 19 | CATEGORY_NUM = 80 20 | ANCHOR_NUM_EACH_SCALE = 3 21 | COCO_ANCHORS = [[116, 90], [156, 198], [373, 326], [30, 61], [62, 45], [59, 119], [10, 13], [16, 30], [33, 23]] 22 | COCO_ANCHOR_INDEX = [[0, 1, 2], [3, 4, 5], [6, 7, 8]] 23 | SCALE_SIZE = [13, 26, 52] 24 | 25 | use_dataset = "pascal_voc" # "custom", "pascal_voc", "coco" 26 | 27 | PASCAL_VOC_DIR = "./dataset/VOCdevkit/VOC2012/" 28 | PASCAL_VOC_ANNOTATION = PASCAL_VOC_DIR + "Annotations" 29 | PASCAL_VOC_IMAGE = PASCAL_VOC_DIR + "JPEGImages" 30 | # The 20 object classes of PASCAL VOC 31 | PASCAL_VOC_CLASSES = {"person": 1, "bird": 2, "cat": 3, "cow": 4, "dog": 5, 32 | "horse": 6, "sheep": 7, "aeroplane": 8, "bicycle": 9, 33 | "boat": 10, "bus": 11, "car": 12, "motorbike": 13, 34 | "train": 14, "bottle": 15, "chair": 16, "diningtable": 17, 35 | "pottedplant": 18, "sofa": 19, "tvmonitor": 20} 36 | 37 | COCO_DIR = "/mnt/d/coco2017/" 38 | COCO_CLASSES = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorbike', 4: 'aeroplane', 5: 'bus', 6: 'train', 7: 'truck', 39 | 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 40 | 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 41 | 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 42 | 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 43 | 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 44 | 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 45 | 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 46 | 55: 'cake', 56: 'chair', 57: 'sofa', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 47 | 62: 'tvmonitor', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 48 | 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 49 | 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'} 50 | 51 | TRAIN_DIR = "train2017" 52 | 53 | TXT_DIR = "./data.txt" 54 | 55 | custom_dataset_dir = "" 56 | custom_dataset_classes = {} 57 | 58 | # loss 59 | IGNORE_THRESHOLD = 0.5 60 | 61 | 62 | # NMS 63 | CONFIDENCE_THRESHOLD = 0.6 64 | IOU_THRESHOLD = 0.5 65 | MAX_BOX_NUM = 50 66 | 67 | MAX_TRUE_BOX_NUM_PER_IMG = 20 68 | 69 | 70 | # save model 71 | save_model_dir = "saved_model/" 72 | save_frequency = 5 73 | 74 | test_images_during_training = True 75 | training_results_save_dir = "./test_results_during_training/" 76 | test_images = ["", ""] 77 | 78 | test_picture_dir = "./test_data/1.jpg" 79 | test_video_dir = "./test_data/test_video.mp4" 80 | temp_frame_dir = "./test_data/temp.jpg" 81 | 82 | class DATA_ARG_FACTOR: 83 | saturation = 1.5 84 | exposure = 1.5 85 | hue=.1 86 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | """ 2 | mosaic data argumentation tensorflow implementation 3 | reference: https://github.com/clovaai/CutMix-PyTorch https://github.com/AlexeyAB/darknet 4 | """ 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | from tensorflow.keras.preprocessing.image import ImageDataGenerator 9 | import argparse 10 | import cv2 11 | from read_txt import ReadTxt 12 | import os 13 | import random 14 | from conf import COCO_DIR, TRAIN_DIR, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS, DATA_ARG_FACTOR 15 | TXT_DIR = "./data.txt" 16 | BATCH_SIZE = 4 17 | data_factors = DATA_ARG_FACTOR() 18 | 19 | 20 | parser = argparse.ArgumentParser(description="mosaic data argumentation tensorflow implementation") 21 | parser.add_argument("--path", default="./imagenet_test", type=str) 22 | args = parser.parse_args() 23 | 24 | 25 | def load_classification_data(): 26 | """ 27 | two classes imagenet_test data folder as a test 28 | """ 29 | train_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our training data 30 | train_data_gen = train_image_generator.flow_from_directory(batch_size=4, 31 | directory=args.path, 32 | shuffle=True, 33 | target_size=(224, 224), 34 | class_mode='binary') 35 | steps = 4 36 | while (steps > 0): 37 | for inputs, target in train_data_gen: 38 | min_offset = 0.2 39 | w = inputs.shape[1] 40 | h = inputs.shape[2] 41 | cut_x = np.random.randint(int(w*min_offset), int(w*(1 - min_offset))) 42 | cut_y = np.random.randint(int(h*min_offset), int(h*(1 - min_offset))) 43 | 44 | s1 = (cut_x * cut_y) // (w*h) 45 | s2 = ((w - cut_x) * cut_y) // (w*h) 46 | s3 = (cut_x * (h - cut_y)) // (w*h) 47 | s4 = ((w - cut_x) * (h - cut_y)) // (w*h) 48 | 49 | d1 = inputs[0, :(h-cut_y), 0:cut_x, :] 50 | d2 = inputs[1, (h-cut_y):, 0:cut_x, :] 51 | d3 = inputs[2, (h-cut_y):, cut_x:, :] 52 | d4 = inputs[3, :(h-cut_y), cut_x:, :] 53 | 54 | tmp1 = np.vstack((d1, d2)) 55 | tmp2 = np.vstack((d4, d3)) 56 | 57 | tmpx = np.hstack((tmp1, tmp2)) 58 | tmpx = tmpx*255 59 | tmpy = target[0]*s1 + target[1]*s2 + target[2]*s3 + target[3]*s4 60 | 61 | cv2.imwrite("argumentation.jpg", tmpx) 62 | break 63 | 64 | steps -= 1 65 | 66 | #load_classification_data() 67 | 68 | def random_gen(): 69 | return np.random.randint(10000) 70 | 71 | def rand_int(min, max): 72 | if max < min: 73 | min, max = max, min 74 | 75 | r = (random_gen()%(max - min + 1)) + min 76 | return r 77 | 78 | def random_float(): 79 | return np.random.rand() 80 | 81 | def rand_uniform_strong(min, max): 82 | if (max < min): 83 | min, max = max, min 84 | return (random_float() * (max - min)) + min 85 | 86 | def rand_scale(s): 87 | scale = rand_uniform_strong(1, s) 88 | if(random_gen()%2): 89 | return scale 90 | return 1./scale 91 | 92 | def draw_boxes(images, boxes): 93 | for i in range(BATCH_SIZE): 94 | img = images[i].numpy() 95 | cv2.imwrite("hello.jpg", img) 96 | img = cv2.imread("hello.jpg") 97 | for j in range(len(boxes[i])): 98 | x = boxes[i][j][1] 99 | y = boxes[i][j][2] 100 | w = boxes[i][j][3] 101 | h = boxes[i][j][4] 102 | 103 | left = int((x - w / 2) * IMAGE_WIDTH) 104 | top = int((y - h / 2) * IMAGE_HEIGHT) 105 | right = int((x + w / 2) * IMAGE_WIDTH) 106 | bot = int((y + h / 2) * IMAGE_HEIGHT) 107 | 108 | cv2.rectangle(img, (left, top), (right, bot), (0,0,255), 2) 109 | cv2.resize(img,(224, 224)) 110 | cv2.imwrite(str(i)+".jpg", img) 111 | 112 | 113 | def load_img(file_path): 114 | 115 | img_raw = tf.io.read_file(file_path) 116 | image = tf.io.decode_jpeg(img_raw, channels=CHANNELS) 117 | image = tf.image.adjust_saturation(image, rand_scale(data_factors.saturation)) 118 | image = tf.image.adjust_hue(image, rand_uniform_strong(-1*data_factors.hue, data_factors.hue)) 119 | image = tf.image.adjust_contrast(image, rand_scale(data_factors.exposure)) 120 | #image = tf.image.resize_with_pad(image=image, target_height=IMAGE_HEIGHT, target_width=IMAGE_WIDTH) 121 | image = tf.image.resize(images=image, size=(IMAGE_HEIGHT,IMAGE_WIDTH)) 122 | 123 | return image 124 | 125 | def merge_bboxes(bboxes, cutx, cuty): 126 | cutx = cutx / IMAGE_WIDTH 127 | cuty = cuty / IMAGE_HEIGHT 128 | 129 | merge_bbox = [] 130 | for i in range(bboxes.shape[0]): 131 | for box in bboxes[i]: 132 | tmp_box = [] 133 | x,y,w,h = box[1], box[2], box[3], box[4] 134 | 135 | if i == 0: 136 | if box[2]-box[4]/2 > cuty or box[1]-box[3]/2 > cutx: 137 | continue 138 | 139 | if box[2]+box[4]/2 > cuty and box[2]-box[4]/2 < cuty: 140 | h -= (box[2]+box[4]/2-cuty) 141 | y -= (box[2]+box[4]/2-cuty)/2 142 | 143 | if box[1]+box[3]/2 > cutx and box[1]-box[3]/2 < cutx: 144 | w -= (box[1]+box[3]/2-cutx) 145 | x -= (box[1]+box[3]/2-cutx)/2 146 | 147 | if i == 1: 148 | if box[2]+box[4]/2 < cuty or box[1]-box[3]/2 > cutx: 149 | continue 150 | 151 | if box[2]+box[4]/2 > cutx and box[2]-box[4]/2 < cutx: 152 | h -= (cuty-(box[2]-box[4]/2)) 153 | y += (cuty-(box[2]-box[4]/2))/2 154 | 155 | if box[1]+box[3]/2 > cutx and box[1]-box[3]/2 < cutx: 156 | w -= (box[1]+box[3]/2-cutx) 157 | x -= (box[1]+box[3]/2-cutx)/2 158 | 159 | if i == 2: 160 | if box[2]+box[4]/2 < cuty or box[1]+box[3]/2 < cutx: 161 | continue 162 | 163 | if box[2]+box[4]/2 < 1 and box[2]-box[4]/2 < cuty: 164 | h -= (cuty-(box[2]-box[4]/2)) 165 | y += (cuty-(box[2]-box[4]/2))/2 166 | 167 | if box[1]+box[3]/2 > cutx and box[1]-box[3]/2 < cutx: 168 | w -= (cutx-(box[1]-box[3]/2)) 169 | x += (cutx-(box[1]-box[3]/2))/2 170 | 171 | if i == 3: 172 | if box[2]-box[4]/2 > cuty or box[1]+box[3]/2 < cutx: 173 | continue 174 | 175 | if box[2]+box[4]/2 > cuty and box[2]-box[4]/2 < cuty: 176 | h -= (box[2]+box[4]/2-cuty) 177 | y -= (box[2]+box[4]/2-cuty)/2 178 | 179 | if box[1]+box[3]/2 > cutx and box[1]-box[3]/2 < cutx: 180 | w -= (cutx-(box[1]-box[3]/2)) 181 | x += (cutx-(box[1]-box[3]/2))/2 182 | 183 | tmp_box.append(box[0]) 184 | tmp_box.append(x) 185 | tmp_box.append(y) 186 | tmp_box.append(w) 187 | tmp_box.append(h) 188 | merge_bbox.append(tmp_box) 189 | 190 | #TO DO:eliminate small boxes 191 | #may be no boxes 192 | 193 | if len(merge_bbox) == 0: 194 | return None 195 | else: 196 | return merge_bbox 197 | 198 | def mosaic_process(image_batch, label_batch): 199 | """default dataset: coco 200 | mosaic data argumentation 201 | >args 202 | ------- 203 | 204 | """ 205 | #usr_mix = 0 no mosaic use_mix = 3 use mosaic 206 | 207 | use_mix = 3 208 | #num of image 209 | n = len(image_batch) 210 | 211 | cut_x, cut_y = [0]*n, [0]*n 212 | random_index = random_gen() 213 | #if (random_index % 2 == 0): use_mix = 1 214 | if (use_mix == 3): 215 | min_offset = 0.2 216 | for i in range(n): 217 | h = IMAGE_HEIGHT 218 | w = IMAGE_WIDTH 219 | cut_x[i] = np.random.randint(int(w*min_offset), int(w*(1 - min_offset))) 220 | cut_y[i] = np.random.randint(int(h*min_offset), int(h*(1 - min_offset))) 221 | #cut_x[i] = random.uniform(min_offset, (1-min_offset)) 222 | #cut_y[i] = random.uniform(min_offset, (1-min_offset)) 223 | 224 | augmentation_calculated, gaussian_noise = 0, 0 225 | 226 | def get_random_paths(): 227 | random_index = random.sample(list(range(n)), use_mix+1) 228 | 229 | random_paths = [] 230 | random_bboxes = [] 231 | for idx in random_index: 232 | random_paths.append(os.path.join(COCO_DIR, TRAIN_DIR, image_batch[idx])) 233 | random_bboxes.append(label_batch[idx]) 234 | return random_paths, np.array(random_bboxes) 235 | 236 | #n images per batch, we also generate n images if mosaic 237 | 238 | if (use_mix == 3): 239 | 240 | dest = [] 241 | new_boxes = [] 242 | for i in range(n): 243 | paths, bboxes = get_random_paths() 244 | img0 = load_img(paths[0]) 245 | img1 = load_img(paths[1]) 246 | img2 = load_img(paths[2]) 247 | img3 = load_img(paths[3]) 248 | 249 | #cut and adjust 250 | d1 = img0[:cut_y[i], :cut_x[i], :] 251 | d2 = img1[cut_y[i]:, :cut_x[i], :] 252 | d3 = img2[cut_y[i]:, cut_x[i]:, :] 253 | d4 = img3[:cut_y[i], cut_x[i]:, :] 254 | 255 | tmp1 = tf.concat([d1, d2], axis=0) 256 | tmp2 = tf.concat([d4, d3], axis=0) 257 | 258 | dest.append(tf.concat([tmp1, tmp2], axis=1)) 259 | #print(bboxes) 260 | 261 | tmp_boxes = (merge_bboxes(bboxes, cut_x[i], cut_y[i])) 262 | if not tmp_boxes: 263 | i = i - 1 264 | continue 265 | new_boxes.append(tmp_boxes) 266 | 267 | dest = tf.stack(dest) 268 | 269 | draw_boxes(dest, new_boxes) 270 | return dest, new_boxes 271 | 272 | 273 | if (use_mix == 0): 274 | dest = tf.zeros([n, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS]) 275 | for i in range(n): 276 | paths, bboxes = get_random_paths() 277 | dest[i] = load_img(paths[0]) 278 | new_boxes = label_batch 279 | 280 | return dest, new_boxes 281 | 282 | 283 | def get_length_of_dataset(dataset): 284 | count = 0 285 | for _ in dataset: 286 | count += 1 287 | return count 288 | 289 | def generate_dataset(): 290 | txt_dataset = tf.data.TextLineDataset(filenames=TXT_DIR) 291 | train_count = get_length_of_dataset(txt_dataset) 292 | train_dataset = txt_dataset.batch(batch_size=BATCH_SIZE) 293 | 294 | return train_dataset, train_count 295 | 296 | def parse_dataset_batch(dataset): 297 | """ 298 | Return : 299 | image_name_list : list, length is N (N is the batch size.) 300 | boxes_array : numpy.ndarrray, shape is (N, MAX_TRUE_BOX_NUM_PER_IMG, 5) 301 | """ 302 | image_name_list = [] 303 | boxes_list = [] 304 | len_of_batch = dataset.shape[0] 305 | for i in range(len_of_batch): 306 | image_name, boxes = ReadTxt(line_bytes=dataset[i].numpy()).parse_line() 307 | image_name_list.append(image_name) 308 | boxes_list.append(boxes) 309 | boxes_array = np.array(boxes_list) 310 | return image_name_list, boxes_array 311 | 312 | if __name__ == "__main__": 313 | #get txt dataset which contains filename、boexs、label in text format 314 | train_dataset, train_count = generate_dataset() 315 | 316 | step = 0 317 | for dataset_batch in train_dataset: 318 | step += 1 319 | images, boxes = parse_dataset_batch(dataset=dataset_batch) 320 | 321 | images, boxes = mosaic_process(images, boxes) 322 | print(images.shape) 323 | 324 | #draw_boxes(images, boxes) 325 | -------------------------------------------------------------------------------- /data_process.py: -------------------------------------------------------------------------------- 1 | from conf import COCO_DIR, COCO_CLASSES, IMAGE_HEIGHT, IMAGE_WIDTH 2 | import json 3 | from pathlib import Path 4 | import time 5 | 6 | class ResizeWithPad(): 7 | def __init__(self, h, w): 8 | super(ResizeWithPad, self).__init__() 9 | self.H = IMAGE_HEIGHT 10 | self.W = IMAGE_WIDTH 11 | self.w = w 12 | self.h = h 13 | 14 | def get_transform_coefficient(self): 15 | if self.h <= self.w: 16 | longer_edge = "w" 17 | scale = self.W / self.w 18 | padding_length = (self.H - self.h * scale) / 2 19 | else: 20 | longer_edge = "h" 21 | scale = self.H / self.h 22 | padding_length = (self.W - self.w * scale) / 2 23 | return longer_edge, scale, padding_length 24 | 25 | def raw_to_resized(self, x, y, w, h): 26 | x = x + w / 2 27 | y = y + h / 2 28 | x = x / self.w 29 | y = y / self.h 30 | w = w / self.w 31 | h = h / self.h 32 | return x, y, w, h 33 | 34 | def resized_to_raw(self, center_x, center_y, width, height): 35 | longer_edge, scale, padding_length = self.get_transform_coefficient() 36 | center_x *= self.W 37 | width *= self.W 38 | center_y *= self.H 39 | height *= self.H 40 | if longer_edge == "h": 41 | center_x -= padding_length 42 | else: 43 | center_y -= padding_length 44 | center_x = center_x / scale 45 | center_y = center_y / scale 46 | width = width / scale 47 | height = height / scale 48 | return center_x, center_y, width, height 49 | 50 | 51 | class ParseCOCO(object): 52 | def __init__(self): 53 | self.annotation_dir = COCO_DIR + "annotations/" 54 | self.images_dir = COCO_DIR + "train2017/" 55 | self.train_annotation = Path(self.annotation_dir + "instances_train2017.json") 56 | start_time = time.time() 57 | self.train_dict = self.__load_json(self.train_annotation) 58 | print("It took {:.2f} seconds to load the json files.".format(time.time() - start_time)) 59 | print(self.__get_category_id_information(self.train_dict)) 60 | 61 | def __load_json(self, json_file): 62 | print("Start loading {}...".format(json_file.name)) 63 | with json_file.open(mode='r') as f: 64 | load_dict = json.load(f) 65 | print("Loading is complete!") 66 | return load_dict 67 | 68 | def __find_all(self, x, value): 69 | list_data = [] 70 | for i in range(len(x)): 71 | if x[i] == value: 72 | list_data.append(i) 73 | return list_data 74 | 75 | def __get_image_information(self, data_dict): 76 | images = data_dict["images"] 77 | image_file_list = [] 78 | image_id_list = [] 79 | image_height_list = [] 80 | image_width_list = [] 81 | for image in images: 82 | image_file_list.append(image["file_name"]) 83 | image_id_list.append(image["id"]) 84 | image_height_list.append(image["height"]) 85 | image_width_list.append(image["width"]) 86 | return image_file_list, image_id_list, image_height_list, image_width_list 87 | 88 | def __get_bounding_box_information(self, data_dict): 89 | annotations = data_dict["annotations"] 90 | image_id_list = [] 91 | bbox_list = [] 92 | category_id_list = [] 93 | for annotation in annotations: 94 | category_id_list.append(annotation["category_id"]) 95 | image_id_list.append(annotation["image_id"]) 96 | bbox_list.append(annotation["bbox"]) 97 | return image_id_list, bbox_list, category_id_list 98 | 99 | def __get_category_id_information(self, data_dict): 100 | categories = data_dict["categories"] 101 | category_dict = {} 102 | for category in categories: 103 | category_dict[category["name"]] = category["id"] 104 | return category_dict 105 | 106 | def __process_coord(self, x, y, w, h, image_width, image_height): 107 | x_center, y_center, w_norm, h_norm = ResizeWithPad(h=image_height, w=image_width).raw_to_resized(x, y, w, h) 108 | return x_center, y_center, w_norm, h_norm 109 | 110 | def __bbox_information(self, image_id, image_ids_from_annotation, bboxes, image_height, image_width, category_ids): 111 | processed_bboxes = [] 112 | index_list = self.__find_all(x=image_ids_from_annotation, value=image_id) 113 | for index in index_list: 114 | x, y, w, h = bboxes[index] 115 | 116 | x_center, y_center, w_norm, h_norm = self.__process_coord(x, y, w, h, image_width, image_height) 117 | processed_bboxes.append([self.__category_id_transform(category_ids[index]), x_center, y_center, w_norm, h_norm]) 118 | return processed_bboxes 119 | 120 | def __category_id_transform(self, original_id): 121 | category_id_dict = self.__get_category_id_information(self.train_dict) 122 | original_name = "none" 123 | for category_name, category_id in category_id_dict.items(): 124 | if category_id == original_id: 125 | original_name = category_name 126 | if original_name == "none": 127 | raise ValueError("An error occurred while transforming the category id.") 128 | return COCO_CLASSES[original_name] 129 | 130 | def __bbox_str(self, bboxes): 131 | bbox_info = "" 132 | for bbox in bboxes: 133 | for item in bbox: 134 | bbox_info += str(item) 135 | bbox_info += " " 136 | return bbox_info.strip() 137 | 138 | def write_data_to_txt(self, txt_dir): 139 | image_files, image_ids, image_heights, image_widths = self.__get_image_information(self.train_dict) 140 | image_ids_from_annotation, bboxes, category_ids = self.__get_bounding_box_information(self.train_dict) 141 | with open(file=txt_dir, mode="a+") as f: 142 | picture_index = 0 143 | for i in range(len(image_files)): 144 | write_line_start_time = time.time() 145 | line_info = "" 146 | line_info += image_files[i] + " " 147 | processed_bboxes = self.__bbox_information(image_ids[i], 148 | image_ids_from_annotation, 149 | bboxes, 150 | image_heights[i], 151 | image_widths[i], 152 | category_ids) 153 | if processed_bboxes: 154 | picture_index += 1 155 | line_info += self.__bbox_str(bboxes=processed_bboxes) 156 | line_info += "\n" 157 | print("Writing information of the {}th picture {} to {}, which took {:.2f}s".format(picture_index, image_files[i], txt_dir, time.time() - write_line_start_time)) 158 | f.write(line_info) 159 | 160 | coco = ParseCOCO() 161 | TXT_DIR = "./data.txt" 162 | coco.write_data_to_txt(TXT_DIR) 163 | 164 | -------------------------------------------------------------------------------- /detect.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klauspa/Yolov4-tensorflow/802a9245c94983db7d702e9d5a62512a0539fe51/detect.jpg -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from conf import ANCHORS, COCO_CLASSES, XYSCALE, IMAGE_HEIGHT, IMAGE_WIDTH, CATEGORY_NUM, STRIDES 4 | from model_infer import Yolo_Model, load_weights 5 | from prepost_process import postprocess_boxes, postprocess_bbbox, nms, draw_bbox, image_preporcess 6 | import argparse 7 | import cv2 8 | import time 9 | 10 | parser = argparse.ArgumentParser(description='yolov4 detect args') 11 | parser.add_argument('--image', type=str) 12 | parser.add_argument('--weight', type=str, default='yolov4.weights') 13 | args = parser.parse_args() 14 | 15 | if __name__ == "__main__": 16 | anchors = np.array(ANCHORS) 17 | anchors = np.reshape(anchors, [3, 3, 2]) 18 | num_classes = len(COCO_CLASSES) 19 | xy_scale = XYSCALE 20 | input_size = IMAGE_WIDTH 21 | 22 | #input image path 23 | image_path = args.image 24 | img = cv2.imread(image_path) 25 | original_image = img 26 | original_image_size = img.shape[:2] 27 | image_data = image_preporcess(np.copy(original_image), [input_size, input_size]) 28 | 29 | img_tensor = tf.convert_to_tensor(image_data, dtype=tf.float32) 30 | img_tensor = tf.expand_dims(img_tensor, axis=0) 31 | 32 | time_p1 = time.time() 33 | model = Yolo_Model() 34 | 35 | load_weights(model, args.weight) 36 | time_p2 = time.time() 37 | 38 | pred_bbox = model.predict(img_tensor) 39 | time_p3 = time.time() 40 | 41 | pred_bbox = postprocess_bbbox(pred_bbox, anchors, STRIDES, XYSCALE) 42 | bboxes = postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25) 43 | bboxes = nms(bboxes, 0.213, method='nms') 44 | time_p4 = time.time() 45 | 46 | image = draw_bbox(original_image, bboxes) 47 | cv2.imwrite("detect.jpg", image) 48 | 49 | print("load model: ", time_p2-time_p1) 50 | print("forward: ", time_p3-time_p2) 51 | print("post process: ", time_p4-time_p3) 52 | 53 | -------------------------------------------------------------------------------- /kite.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klauspa/Yolov4-tensorflow/802a9245c94983db7d702e9d5a62512a0539fe51/kite.jpg -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | #ciou and diou loss python implementation 2 | import math 3 | pi = math.pi 4 | atan = math.atan 5 | 6 | class boxabs: 7 | left, right, top, bot = 0, 0, 0, 0 8 | 9 | class box: 10 | x, y, w, h = 0, 0, 0, 0 11 | 12 | def overlap(x1, w1, x2, w2): 13 | l1 = x1 - w1/2 14 | l2 = x2 - w2/2 15 | left = l1 if l1 - l2 > 0 else l2 16 | r1 = x1 + w1/2 17 | r2 = x2 + w2/2 18 | right = r1 if r1 - r2 < 0 else r2 19 | return right - left 20 | 21 | def box_intersection(a, b): 22 | """ 23 | args: 24 | a type:box 25 | b type:box 26 | """ 27 | w = overlap(a.x, a.w, b.x, b.w) 28 | h = overlap(a.y, a.h, b.y, b.h) 29 | if(w < 0 or h < 0): 30 | return 0 31 | area = w*h; 32 | return area 33 | 34 | def box_union(a, b): 35 | """ 36 | args: 37 | a type:box 38 | b type:box 39 | """ 40 | i = box_intersection(a, b) 41 | u = a.w*a.h + b.w*b.h - i 42 | return u 43 | 44 | def box_c(a, b): 45 | """ 46 | arg: two boxes a b type: box 47 | return: smallest box that fully encompases a and b 48 | """ 49 | ba = boxabs() 50 | ba.top = min(a.y - a.h / 2, b.y - b.h / 2) 51 | ba.bot = max(a.y + a.h / 2, b.y + b.h / 2) 52 | ba.left = min(a.x - a.w / 2, b.x - b.w / 2) 53 | ba.right = max(a.x + a.w / 2, b.x + b.w / 2) 54 | return ba 55 | 56 | def box_iou(a, b): 57 | """ 58 | args: 59 | a type:box 60 | b type:box 61 | """ 62 | I = box_intersection(a, b) 63 | U = box_union(a, b) 64 | if (I == 0 or U == 0): 65 | return 0 66 | return I / U 67 | 68 | def box_ciou(pred_box, gtbox): 69 | ba = box_c(pred_box, gtbox) 70 | w = ba.right - ba.left 71 | h = ba.bot - ba.top 72 | #Diagonal distance of ba 73 | c = w * w + h * h 74 | iou = box_iou(pred_box, gtbox) 75 | # w = 0. h = 0 76 | if c == 0: 77 | return iou 78 | #center point distance 79 | u = (pred_box.x - gtbox.x) * (pred_box.x - gtbox.x) + (pred_box.y - gtbox.y) * (pred_box.y - gtbox.y) 80 | d = u / c 81 | ar_gt = gtbox.w / gtbox.h 82 | ar_pred = pred_box.w / pred_box.h 83 | ar_loss = 4 / (pi * pi) * (atan(ar_gt) - atan(ar_pred)) * (atan(ar_gt) - atan(ar_pred)) 84 | alpha = ar_loss / (1 - iou + ar_loss + 0.000001) 85 | ciou_term = d + alpha * ar_loss 86 | return iou - ciou_term 87 | 88 | 89 | def box_diou(pred_box, gtbox): 90 | ba = box_c(pred_box, gtbox) 91 | w = ba.right - ba.left 92 | h = ba.bot - ba.top 93 | c = w * w + h * h; 94 | iou = box_iou(pred_box, gtbox) 95 | if (c == 0): 96 | return iou 97 | d = (pred_box.x - gtbox.x) * (pred_box.x - gtbox.x) + (pred_box.y - gtbox.y) * (pred_box.y - gtbox.y) 98 | u = math.pow(d / c, 0.6) 99 | diou_term = u 100 | 101 | return iou - diou_term 102 | 103 | if __name__ == "__main__": 104 | pred_box = box() 105 | pred_box.x, pred_box.y, pred_box.w, pred_box.h = 0.4, 0.6, 0.3, 0.2 106 | gtbox = box() 107 | gtbox.x, gtbox.y, gtbox.w, gtbox.h = 0.5, 0.5, 0.4, 0.3 108 | print("diou loss:", box_diou(pred_box, gtbox)) 109 | print("ciou loss:", box_ciou(pred_box, gtbox)) 110 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_addons as tfa 3 | 4 | #hyper parameters 5 | batch = 64 6 | subdivisions=8 7 | width=608 8 | height=608 9 | channels=3 10 | momentum=0.949 11 | decay=0.0005 12 | angle=0 13 | saturation = 1.5 14 | exposure = 1.5 15 | hue=.1 16 | 17 | learning_rate=0.00261 18 | burn_in=1000 19 | max_batches = 500500 20 | #policy=steps 21 | steps=400000,450000 22 | scales=.1,.1 23 | 24 | #cutmix=1 25 | mosaic=1 26 | 27 | weightfile = "../yolov4.weights" 28 | import numpy as np 29 | 30 | def load_weight(): 31 | 32 | print('Loading weights.') 33 | weights_file = fp = open(weightfile, 'rb') 34 | major, minor, revision = np.ndarray( 35 | shape=(3, ), dtype='int32', buffer=weights_file.read(12)) 36 | if (major*10+minor)>=2 and major<1000 and minor<1000: 37 | seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8)) 38 | else: 39 | seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4)) 40 | print('Weights Header: ', major, minor, revision, seen) 41 | fp.close() 42 | 43 | def mish(x): 44 | return tfa.activations.mish(x) 45 | 46 | def leaky(x): 47 | return tf.nn.leaky_relu(x) 48 | 49 | def res_conn_block(filters, is_half): 50 | res = tf.keras.Sequential() 51 | 52 | res.add(Conv2D_BN_Mish(filters//2, kernel=1, strides=1)) 53 | res.add(Conv2D_BN_Mish(filters//2 if is_half else filters, kernel=3, strides=1)) 54 | 55 | return res 56 | 57 | class Conv2D_BN_Mish(tf.keras.Model): 58 | def __init__(self, filters, kernel, strides): 59 | super(Conv2D_BN_Mish, self).__init__() 60 | padding = 'valid' if strides == 2 else 'same' 61 | if strides == 2: 62 | self.conv2d_bn = tf.keras.Sequential([ 63 | tf.keras.layers.ZeroPadding2D(((1,0),(1,0))), 64 | tf.keras.layers.Conv2D(filters = filters, kernel_size = kernel, 65 | strides = strides, padding = padding), 66 | tf.keras.layers.BatchNormalization(), 67 | ]) 68 | else: 69 | self.conv2d_bn = tf.keras.Sequential([ 70 | tf.keras.layers.Conv2D(filters = filters, kernel_size = kernel, 71 | strides = strides, padding = padding), 72 | tf.keras.layers.BatchNormalization(), 73 | ]) 74 | 75 | def call(self, x): 76 | x = self.conv2d_bn(x) 77 | x = mish(x) 78 | return x 79 | 80 | class Conv2D_BN_Leaky(tf.keras.Model): 81 | def __init__(self, filters, kernel, strides): 82 | super(Conv2D_BN_Leaky, self).__init__() 83 | 84 | if strides == 2: 85 | self.conv2d_bn = tf.keras.Sequential([ 86 | tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0))), 87 | tf.keras.layers.Conv2D(filters = filters, kernel_size = kernel, 88 | strides = strides, padding = 'valid'), 89 | tf.keras.layers.BatchNormalization(), 90 | tf.keras.layers.LeakyReLU(), 91 | ]) 92 | else: 93 | self.conv2d_bn = tf.keras.Sequential([ 94 | tf.keras.layers.Conv2D(filters = filters, kernel_size = kernel, 95 | strides = strides, padding = 'same'), 96 | tf.keras.layers.BatchNormalization(), 97 | tf.keras.layers.LeakyReLU(), 98 | ]) 99 | 100 | def call(self, x): 101 | x = self.conv2d_bn(x) 102 | return x 103 | 104 | class ResBlock(tf.keras.Model): 105 | def __init__(self, filters, res_num, is_half): 106 | super(ResBlock, self).__init__() 107 | self.res_num = res_num 108 | 109 | self.pad_conv = Conv2D_BN_Mish(filters, kernel=3, strides=2) 110 | self.pred_block_conv = Conv2D_BN_Mish(filters//2 if is_half else filters, kernel=1, strides=1) 111 | self.res_conn_block = res_conn_block(filters, is_half) 112 | self.succ_block_conv = Conv2D_BN_Mish(filters//2 if is_half else filters, kernel=1, strides=1) 113 | self.right_conv = Conv2D_BN_Mish(filters//2 if is_half else filters, kernel=1, strides=1) 114 | self.after_concat_conv = Conv2D_BN_Mish(filters, kernel=1, strides=1) 115 | 116 | def call(self,x): 117 | pred_res = self.pad_conv(x) 118 | right_conv = self.right_conv(pred_res) 119 | left_conv = self.pred_block_conv(pred_res) 120 | for i in range(self.res_num): 121 | res_block_out = self.res_conn_block(left_conv) 122 | left_conv = left_conv + res_block_out 123 | left_conv = self.succ_block_conv(left_conv) 124 | 125 | concat_x = tf.concat([left_conv, right_conv], axis=-1) 126 | out = self.after_concat_conv(concat_x) 127 | return out 128 | 129 | def make_leaky_convs(layer_num, filters, strides=1): 130 | 131 | layers = tf.keras.Sequential() 132 | if layer_num == 1: 133 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides)) 134 | 135 | if layer_num == 3: 136 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides)) 137 | layers.add(Conv2D_BN_Leaky(filters*2, kernel=3, strides=strides)) 138 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides)) 139 | 140 | if layer_num == 5: 141 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides)) 142 | layers.add(Conv2D_BN_Leaky(filters*2, kernel=3, strides=strides)) 143 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides)) 144 | layers.add(Conv2D_BN_Leaky(filters*2, kernel=3, strides=strides)) 145 | layers.add(Conv2D_BN_Leaky(filters, kernel=1, strides=strides)) 146 | 147 | return layers 148 | 149 | class spp(tf.keras.Model): 150 | def __init__(self): 151 | super(spp, self).__init__() 152 | self.pool1 = tf.keras.layers.MaxPooling2D((5,5), strides=1, padding='same') 153 | self.pool2 = tf.keras.layers.MaxPooling2D((9,9), strides=1, padding='same') 154 | self.pool3 = tf.keras.layers.MaxPooling2D((13,13), strides=1, padding='same') 155 | 156 | def call(self, x): 157 | return tf.concat([self.pool1(x), self.pool2(x), self.pool3(x), x], -1) 158 | 159 | 160 | class Yolo_Model(tf.keras.Model): 161 | def __init__(self,): 162 | super(Yolo_Model, self).__init__() 163 | self.conv_last1 = tf.keras.layers.Conv2D(255, kernel_size=1, padding='same') 164 | self.conv_last2 = tf.keras.layers.Conv2D(255, kernel_size=1, padding='same') 165 | self.conv_last3 = tf.keras.layers.Conv2D(255, kernel_size=1, padding='same') 166 | self.pad = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0))) 167 | self.first_conv = Conv2D_BN_Mish(filters=32, kernel=3, strides=1) 168 | self.res_block1 = ResBlock(64, 1, False) 169 | self.res_block2 = ResBlock(128, 2, True) 170 | self.res_block3 = ResBlock(256, 8, True) 171 | 172 | self.res_block4 = ResBlock(512, 8, False) 173 | self.res_block5 = ResBlock(1024, 4, False) 174 | 175 | self.conv_leaky3_1 = make_leaky_convs(3, 512) 176 | self.conv_leaky3_2 = make_leaky_convs(3, 512) 177 | self.conv_leaky1_1 = make_leaky_convs(1, 256) 178 | self.conv_leaky1_2 = make_leaky_convs(1, 128) 179 | self.conv_leaky1_3 = make_leaky_convs(1, 512) 180 | self.conv_leaky1_4 = make_leaky_convs(1, 1024) 181 | self.conv_leaky1_5 = make_leaky_convs(1, 256) 182 | self.conv_leaky1_6 = Conv2D_BN_Mish(256, 3, 2) 183 | self.conv_leaky1_7 = Conv2D_BN_Mish(256, 3, 2) 184 | self.conv_leaky5_1 = make_leaky_convs(5, 256) 185 | self.conv_leaky5_2= make_leaky_convs(5, 128) 186 | self.conv_leaky5_3= make_leaky_convs(5, 512) 187 | self.spp_layer = spp() 188 | self.upsampling = tf.keras.layers.UpSampling2D(2) 189 | 190 | 191 | def call(self, x): 192 | #cspdarknet53 193 | first_conv_out = self.first_conv(x) 194 | res_block1_out = self.res_block1(first_conv_out) 195 | res_block2_out = self.res_block2(res_block1_out) 196 | res_block3_out = self.res_block3(res_block2_out) 197 | 198 | intermediate_1 = res_block3_out 199 | 200 | res_block4_out = self.res_block4(res_block3_out) 201 | 202 | intermediate_2 = res_block4_out 203 | 204 | res_block5_out = self.res_block5(res_block4_out) 205 | 206 | #spp 207 | pred_spp = self.conv_leaky3_1(res_block5_out) 208 | spp_out = self.spp_layer(pred_spp) 209 | 210 | succ_spp = self.conv_leaky3_2(spp_out) 211 | 212 | intermediate_3 = succ_spp 213 | 214 | head2_1 = self.conv_leaky1_1(intermediate_2) 215 | head2_2 = self.conv_leaky1_1(intermediate_3) 216 | head2_2 = self.upsampling(head2_2) 217 | head2 = tf.concat([head2_1, head2_2], axis=-1) 218 | head2 = self.conv_leaky5_1(head2) 219 | 220 | intermediate_4 = head2 221 | 222 | head1_1 = self.conv_leaky1_2(intermediate_1) 223 | head1_2 = self.conv_leaky1_2(intermediate_4) 224 | head1_2 = self.upsampling(head1_2) 225 | head1 = tf.concat([head1_1, head1_2], axis=-1) 226 | head1 = self.conv_leaky5_2(head1) 227 | 228 | intermediate_5 = head1 229 | 230 | head1 = self.conv_leaky1_5(head1) 231 | 232 | head1_out = self.conv_last1(head1) 233 | 234 | head2_3 = self.conv_leaky1_6(intermediate_5) 235 | 236 | head2 = tf.concat([intermediate_4, head2_3], axis=-1) 237 | head2 = self.conv_leaky5_1(head2) 238 | 239 | intermediate_6 = head2 240 | 241 | head2 = self.conv_leaky1_3(head2) 242 | head2_out = self.conv_last2(head2) 243 | 244 | head3_2 = self.conv_leaky1_7(intermediate_6) 245 | head3 = tf.concat([intermediate_3, head3_2], axis=-1) 246 | head3 = self.conv_leaky5_3(head3) 247 | head3 = self.conv_leaky1_4(head3) 248 | head3_out = self.conv_last3(head3) 249 | 250 | return head1_out, head2_out, head3_out 251 | 252 | if __name__ == "__main__": 253 | model = Yolo_Model() 254 | x = tf.random.normal(shape=(1, 608, 608, 3)) 255 | head1, head2, head3 = model(x) 256 | 257 | print("head1 shape: ", head1.shape) 258 | print("head2 shape: ", head2.shape) 259 | print("head3 shape: ", head3.shape) 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | -------------------------------------------------------------------------------- /model_infer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from conf import IMAGE_HEIGHT, IMAGE_WIDTH, CATEGORY_NUM 3 | import numpy as np 4 | 5 | #mish activation 6 | def mish(x): 7 | return x*tf.tanh(tf.math.log(1+tf.exp(x))) 8 | 9 | class Mish(tf.keras.layers.Layer): 10 | def __init__(self): 11 | super(Mish, self).__init__() 12 | def call(self, x): 13 | return mish(x) 14 | 15 | #conv block with mish 16 | def single_conv_mish(inputs, filters, kernel, strides): 17 | padding = 'valid' if strides == 2 else 'same' 18 | if strides == 2: 19 | inputs = tf.keras.layers.ZeroPadding2D(((1,0),(1,0)))(inputs) 20 | out = tf.keras.layers.Conv2D(filters=filters, kernel_size=kernel, use_bias=False, 21 | strides=strides, padding=padding)(inputs) 22 | out = tf.keras.layers.BatchNormalization()(out) 23 | out = Mish()(out) 24 | return out 25 | 26 | #conv block with leaky 27 | def single_conv_leaky(inputs, filters, kernel, strides): 28 | padding = 'valid' if strides == 2 else 'same' 29 | if strides == 2: 30 | inputs = tf.keras.layers.ZeroPadding2D(((1,0),(1,0)))(inputs) 31 | out = tf.keras.layers.Conv2D(filters=filters, kernel_size=kernel, use_bias=False, 32 | strides=strides, padding=padding)(inputs) 33 | out = tf.keras.layers.BatchNormalization()(out) 34 | out = tf.keras.layers.LeakyReLU(0.1)(out) 35 | return out 36 | 37 | #res connection 38 | def res_conn_block(inputs, filters, is_half): 39 | out = single_conv_mish(inputs, filters//2, 1, 1) 40 | out = single_conv_mish(out, filters//2 if is_half else filters, 3, 1) 41 | return out 42 | 43 | #single res conn block 44 | def ResBlock(inputs, filters, res_num, is_half): 45 | downsample_out = single_conv_mish(inputs, filters, 3, 2) 46 | right_conv = single_conv_mish(downsample_out, filters//2 if is_half else filters, 1, 1) 47 | 48 | left_conv = single_conv_mish(downsample_out, filters//2 if is_half else filters, 1, 1) 49 | for i in range(res_num): 50 | res_intermidiate = res_conn_block(left_conv, filters, is_half) 51 | left_conv = left_conv + res_intermidiate 52 | left_conv = single_conv_mish(left_conv, filters//2 if is_half else filters, 1, 1) 53 | concat_out = tf.keras.layers.Concatenate()([left_conv, right_conv]) 54 | out = single_conv_mish(concat_out, filters, 1, 1) 55 | 56 | return out 57 | 58 | #conv leaky stacked layers 59 | def make_leaky_convs(inputs, layer_num, filters, strides): 60 | if layer_num == 1: 61 | out = single_conv_leaky(inputs, filters, 1, strides) 62 | 63 | if layer_num == 3: 64 | out = single_conv_leaky(inputs, filters, 1, strides) 65 | out = single_conv_leaky(out, filters*2, 3, strides) 66 | out = single_conv_leaky(out, filters, 1, strides) 67 | 68 | if layer_num == 5: 69 | out = single_conv_leaky(inputs, filters, 1, strides) 70 | out = single_conv_leaky(out, filters*2, 3, strides) 71 | out = single_conv_leaky(out, filters, 1, strides) 72 | out = single_conv_leaky(out, filters*2, 3, strides) 73 | out = single_conv_leaky(out, filters, 1, strides) 74 | 75 | return out 76 | 77 | #spp module 78 | def spp_module(inputs): 79 | pool1 = tf.keras.layers.MaxPooling2D((13,13), strides=1, padding='same')(inputs) 80 | pool2 = tf.keras.layers.MaxPooling2D((9,9), strides=1, padding='same')(inputs) 81 | pool3 = tf.keras.layers.MaxPooling2D((5,5), strides=1, padding='same')(inputs) 82 | out = tf.keras.layers.Concatenate()([pool1, pool2, pool3, inputs]) 83 | return out 84 | 85 | #transorm yolo feature map 86 | #reference: https://github.com/hunglc007/tensorflow-yolov4-tflite 87 | def transform(conv_output, NUM_CLASS, i=0): 88 | """ 89 | return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes] 90 | contains (x, y, w, h, score, probability) 91 | """ 92 | conv_shape = tf.shape(conv_output) 93 | batch_size = conv_shape[0] 94 | output_size = conv_shape[1] 95 | 96 | conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS)) 97 | conv_raw_xywh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (4, 1, NUM_CLASS), axis=-1) 98 | 99 | pred_conf = tf.sigmoid(conv_raw_conf) 100 | pred_prob = tf.sigmoid(conv_raw_prob) 101 | 102 | return tf.concat([conv_raw_xywh, pred_conf, pred_prob], axis=-1) 103 | 104 | #load weights 105 | #reference: https://github.com/hunglc007/tensorflow-yolov4-tflite 106 | def load_weights(model, weights_file): 107 | wf = open(weights_file, 'rb') 108 | major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5) 109 | 110 | j = 0 111 | for i in range(110): 112 | conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d' 113 | bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization' 114 | 115 | conv_layer = model.get_layer(conv_layer_name) 116 | filters = conv_layer.filters 117 | k_size = conv_layer.kernel_size[0] 118 | in_dim = conv_layer.input_shape[-1] 119 | 120 | if i not in [93, 101, 109]: 121 | # darknet weights: [beta, gamma, mean, variance] 122 | bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters) 123 | # tf weights: [gamma, beta, mean, variance] 124 | bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]] 125 | bn_layer = model.get_layer(bn_layer_name) 126 | j += 1 127 | else: 128 | conv_bias = np.fromfile(wf, dtype=np.float32, count=filters) 129 | 130 | # darknet shape (out_dim, in_dim, height, width) 131 | conv_shape = (filters, in_dim, k_size, k_size) 132 | conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape)) 133 | # tf shape (height, width, in_dim, out_dim) 134 | conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0]) 135 | 136 | if i not in [93, 101, 109]: 137 | conv_layer.set_weights([conv_weights]) 138 | bn_layer.set_weights(bn_weights) 139 | else: 140 | conv_layer.set_weights([conv_weights, conv_bias]) 141 | 142 | assert len(wf.read()) == 0, 'failed to read all data' 143 | print("load OK") 144 | wf.close() 145 | 146 | 147 | def yolo_body(inputs, classes): 148 | #cspdarknet53 149 | first_conv = single_conv_mish(inputs, 32, 3, 1) 150 | res_block_1 = ResBlock(first_conv, 64, 1, False) 151 | res_block_2 = ResBlock(res_block_1, 128, 2, True) 152 | res_block_3 = ResBlock(res_block_2, 256, 8, True) 153 | 154 | intermediate_1 = res_block_3 155 | 156 | res_block_4 = ResBlock(res_block_3, 512, 8, True) 157 | 158 | intermediate_2 = res_block_4 159 | 160 | res_block_5 = ResBlock(res_block_4, 1024, 4, True) 161 | 162 | pred_spp = make_leaky_convs(res_block_5, 3, 512, 1) 163 | spp_out = spp_module(pred_spp) 164 | succ_spp = make_leaky_convs(spp_out, 3, 512, 1) 165 | 166 | intermediate_3 = succ_spp 167 | 168 | head2_right = make_leaky_convs(intermediate_3, 1, 256, 1) 169 | head2_right = tf.keras.layers.UpSampling2D()(head2_right) 170 | head2_left = make_leaky_convs(intermediate_2, 1, 256, 1) 171 | head2 = tf.keras.layers.Concatenate()([head2_left, head2_right]) 172 | head2 = make_leaky_convs(head2, 5, 256, 1) 173 | 174 | intermediate_4 = head2 175 | 176 | head1_right = make_leaky_convs(intermediate_4, 1, 128, 1) 177 | head1_right = tf.keras.layers.UpSampling2D()(head1_right) 178 | head1_left = make_leaky_convs(intermediate_1, 1, 128, 1) 179 | head1 = tf.keras.layers.Concatenate()([head1_left, head1_right]) 180 | head1 = make_leaky_convs(head1, 5, 128, 1) #conv92 181 | 182 | intermediate_5 = head1 183 | 184 | head1 = single_conv_leaky(head1, 256, 3, 1) 185 | head1_out = tf.keras.layers.Conv2D(3*(4+1+classes), kernel_size=1, padding='same')(head1) 186 | 187 | head2_side = single_conv_leaky(intermediate_5, 256, 3, 2) 188 | head2 = tf.keras.layers.Concatenate()([head2_side, intermediate_4]) 189 | head2 = make_leaky_convs(head2, 5, 256, 1) 190 | 191 | intermediate_6 = head2 192 | 193 | head2 = single_conv_leaky(head2, 512, 3, 1) 194 | head2_out = tf.keras.layers.Conv2D(3*(4+1+classes), kernel_size=1, padding='same')(head2) 195 | 196 | head3_right = single_conv_leaky(intermediate_6, 512, 3, 2) 197 | head3 = tf.keras.layers.Concatenate()([head3_right, intermediate_3]) 198 | head3 = make_leaky_convs(head3, 5, 512, 1) 199 | head3 = single_conv_leaky(head3, 1024, 3, 1) 200 | head3_out = tf.keras.layers.Conv2D(3*(4+1+classes), kernel_size=1, padding='same')(head3) 201 | 202 | conv_out = [head1_out, head2_out, head3_out] 203 | 204 | return conv_out 205 | 206 | def Yolo_Model(): 207 | inputs = tf.keras.layers.Input(shape=[IMAGE_WIDTH, IMAGE_HEIGHT, 3]) 208 | yolobody_out = yolo_body(inputs, CATEGORY_NUM) 209 | conv_outs = [] 210 | for i, conv_out in enumerate(yolobody_out): 211 | transformed_out = transform(conv_out, CATEGORY_NUM, i) 212 | conv_outs.append(transformed_out) 213 | 214 | return tf.keras.Model(inputs=inputs, outputs=conv_outs) 215 | 216 | #simple test 217 | if __name__ == "__main__": 218 | inputs = tf.keras.layers.Input(shape=(608, 608, 3)) 219 | outputs = Yolo_Model(inputs, 80) 220 | model = tf.keras.Model(inputs=inputs, outputs=outputs) 221 | x = tf.random.normal(shape=(1, 608, 608, 3)) 222 | out = model(x) 223 | head1, head2, head3 = out[0], out[1], out[2] 224 | 225 | print("head1 shape: ", head1.shape) 226 | print("head2 shape: ", head2.shape) 227 | print("head3 shape: ", head3.shape) -------------------------------------------------------------------------------- /mosaic_argumentation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klauspa/Yolov4-tensorflow/802a9245c94983db7d702e9d5a62512a0539fe51/mosaic_argumentation.jpg -------------------------------------------------------------------------------- /nms.py: -------------------------------------------------------------------------------- 1 | #greedy nms tensorflow implementation 2 | import tensorflow as tf 3 | 4 | CONFIDENCE_THRESHOLD = 0.45 5 | NUM_CLASS = 80 6 | MAX_BOX_NUM = 20 7 | 8 | def xywh2xyxy(x): 9 | y = x.new(x.shape) 10 | y[..., 0] = x[..., 0] - x[..., 2] / 2 11 | y[..., 1] = x[..., 1] - x[..., 3] / 2 12 | y[..., 2] = x[..., 0] + x[..., 2] / 2 13 | y[..., 3] = x[..., 1] + x[..., 3] / 2 14 | return y 15 | 16 | def nms(pred_boxes, conf_thres=0.5, nms_thres=0.4): 17 | pred_boxes[..., :4] = xywh2xyxy(pred_boxes[..., :4]) 18 | output = [None for _ in range(len(pred_boxes))] 19 | 20 | for image_i, image_pred in enumerate(pred_boxes): 21 | # Filter out confidence scores below threshold 22 | image_pred = image_pred[image_pred[:, 4] >= conf_thres] 23 | # If none are remaining => process next image 24 | if not image_pred.size(0): 25 | continue 26 | # Object confidence times class confidence 27 | score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0] 28 | # Sort by it 29 | image_pred = image_pred[(-score).argsort()] 30 | boxes = image_pred[:, :4] 31 | scores = image_pred[:, 4] 32 | selected_indices = tf.image.non_max_suppression( 33 | boxes, scores, MAX_BOX_NUM, nms_thres 34 | ) 35 | selected_boxes = tf.gather(boxes, selected_indices) 36 | selected_scores = tf.gather(scores, selected_indices) -------------------------------------------------------------------------------- /prepost_process.py: -------------------------------------------------------------------------------- 1 | #reference: https://github.com/hunglc007/tensorflow-yolov4-tflite 2 | import tensorflow as tf 3 | import numpy as np 4 | import time 5 | import random 6 | import colorsys 7 | import cv2 8 | from conf import COCO_CLASSES 9 | 10 | def image_preporcess(image, target_size, gt_boxes=None): 11 | 12 | ih, iw = target_size 13 | h, w, _ = image.shape 14 | 15 | scale = min(iw/w, ih/h) 16 | nw, nh = int(scale * w), int(scale * h) 17 | image_resized = cv2.resize(image, (nw, nh)) 18 | 19 | image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0) 20 | dw, dh = (iw - nw) // 2, (ih-nh) // 2 21 | image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized 22 | image_paded = image_paded / 255. 23 | 24 | if gt_boxes is None: 25 | return image_paded 26 | 27 | else: 28 | gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw 29 | gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh 30 | return image_paded, gt_boxes 31 | 32 | def process_feature(conv_output, NUM_CLASS, STRIDES, ANCHORS, CONF_THRESH, i=0, XYSCALE=[1,1,1]): 33 | conv_shape = tf.shape(conv_output) 34 | batch_size = conv_shape[0] 35 | output_size = conv_shape[1] 36 | 37 | conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS)) 38 | conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (2, 2, 1, NUM_CLASS), axis=-1) 39 | 40 | x = tf.tile(tf.expand_dims(tf.range(output_size, dtype=tf.int32), axis=0), [output_size, 1]) 41 | y = tf.tile(tf.expand_dims(tf.range(output_size, dtype=tf.int32), axis=1), [1, output_size]) 42 | xy_grid = tf.expand_dims(tf.stack([x, y], axis=-1), axis=2) 43 | 44 | 45 | xy_grid = tf.tile(tf.expand_dims(xy_grid, axis=0), [batch_size, 1, 1, 3, 1]) 46 | xy_grid = tf.cast(xy_grid, tf.float32) 47 | 48 | pred_xy = ((tf.sigmoid(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * STRIDES[i] 49 | pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) 50 | pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1) 51 | 52 | pred_conf = tf.sigmoid(conv_raw_conf) 53 | pred_prob = tf.sigmoid(conv_raw_prob) 54 | 55 | return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1) 56 | 57 | def postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=[1,1,1]): 58 | for i, pred in enumerate(pred_bbox): 59 | conv_shape = pred.shape 60 | output_size = conv_shape[1] 61 | conv_raw_dxdy = pred[:, :, :, :, 0:2] 62 | conv_raw_dwdh = pred[:, :, :, :, 2:4] 63 | xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size)) 64 | xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2) # [gx, gy, 1, 2] 65 | 66 | xy_grid = np.tile(tf.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1]) 67 | xy_grid = xy_grid.astype(np.float) 68 | 69 | # pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i] 70 | pred_xy = ((tf.sigmoid(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * STRIDES[i] 71 | # pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i] 72 | pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) 73 | pred[:, :, :, :, 0:4] = tf.concat([pred_xy, pred_wh], axis=-1) 74 | 75 | 76 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] 77 | pred_bbox = tf.concat(pred_bbox, axis=0) 78 | return pred_bbox 79 | 80 | def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold): 81 | 82 | valid_scale=[0, np.inf] 83 | pred_bbox = np.array(pred_bbox) 84 | 85 | pred_xywh = pred_bbox[:, 0:4] 86 | pred_conf = pred_bbox[:, 4] 87 | 88 | pred_prob = pred_bbox[:, 5:] 89 | 90 | # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax) 91 | pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5, 92 | pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1) 93 | 94 | # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org) 95 | org_h, org_w = org_img_shape 96 | resize_ratio = min(input_size / org_w, input_size / org_h) 97 | 98 | dw = (input_size - resize_ratio * org_w) / 2 99 | dh = (input_size - resize_ratio * org_h) / 2 100 | 101 | pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio 102 | pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio 103 | 104 | # # (3) clip some boxes those are out of range 105 | pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]), 106 | np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1) 107 | invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3])) 108 | pred_coor[invalid_mask] = 0 109 | 110 | # # (4) discard some invalid boxes 111 | bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1)) 112 | scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])) 113 | 114 | # # (5) discard some boxes with low scores 115 | classes = np.argmax(pred_prob, axis=-1) 116 | scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes] 117 | # scores = pred_prob[np.arange(len(pred_coor)), classes] 118 | score_mask = scores > score_threshold 119 | mask = np.logical_and(scale_mask, score_mask) 120 | coors, scores, classes = pred_coor[mask], scores[mask], classes[mask] 121 | 122 | return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1) 123 | 124 | def nms(bboxes, iou_threshold, sigma=0.3, method='nms'): 125 | """ 126 | :param bboxes: (xmin, ymin, xmax, ymax, score, class) 127 | 128 | Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf 129 | https://github.com/bharatsingh430/soft-nms 130 | """ 131 | classes_in_img = list(set(bboxes[:, 5])) 132 | best_bboxes = [] 133 | 134 | for cls in classes_in_img: 135 | cls_mask = (bboxes[:, 5] == cls) 136 | cls_bboxes = bboxes[cls_mask] 137 | 138 | while len(cls_bboxes) > 0: 139 | max_ind = np.argmax(cls_bboxes[:, 4]) 140 | best_bbox = cls_bboxes[max_ind] 141 | best_bboxes.append(best_bbox) 142 | cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) 143 | iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) 144 | weight = np.ones((len(iou),), dtype=np.float32) 145 | 146 | assert method in ['nms', 'soft-nms'] 147 | 148 | if method == 'nms': 149 | iou_mask = iou > iou_threshold 150 | weight[iou_mask] = 0.0 151 | 152 | if method == 'soft-nms': 153 | weight = np.exp(-(1.0 * iou ** 2 / sigma)) 154 | 155 | cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight 156 | score_mask = cls_bboxes[:, 4] > 0. 157 | cls_bboxes = cls_bboxes[score_mask] 158 | 159 | return best_bboxes 160 | 161 | def bboxes_iou(boxes1, boxes2): 162 | 163 | boxes1 = np.array(boxes1) 164 | boxes2 = np.array(boxes2) 165 | 166 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) 167 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) 168 | 169 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) 170 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) 171 | 172 | inter_section = np.maximum(right_down - left_up, 0.0) 173 | inter_area = inter_section[..., 0] * inter_section[..., 1] 174 | union_area = boxes1_area + boxes2_area - inter_area 175 | ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) 176 | 177 | return ious 178 | 179 | def draw_bbox(image, bboxes, classes=COCO_CLASSES, show_label=True): 180 | """ 181 | bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates. 182 | """ 183 | 184 | num_classes = len(classes) 185 | image_h, image_w, _ = image.shape 186 | hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] 187 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 188 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) 189 | 190 | random.seed(0) 191 | random.shuffle(colors) 192 | random.seed(None) 193 | 194 | for i, bbox in enumerate(bboxes): 195 | coor = np.array(bbox[:4], dtype=np.int32) 196 | fontScale = 0.5 197 | score = bbox[4] 198 | class_ind = int(bbox[5]) 199 | bbox_color = colors[class_ind] 200 | bbox_thick = int(0.6 * (image_h + image_w) / 600) 201 | c1, c2 = (coor[0], coor[1]), (coor[2], coor[3]) 202 | cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) 203 | 204 | if show_label: 205 | bbox_mess = '%s: %.2f' % (classes[class_ind], score) 206 | t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0] 207 | cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled 208 | 209 | cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX, 210 | fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA) 211 | 212 | return image 213 | 214 | -------------------------------------------------------------------------------- /read_txt.py: -------------------------------------------------------------------------------- 1 | from conf import MAX_TRUE_BOX_NUM_PER_IMG 2 | 3 | class ReadTxt(object): 4 | def __init__(self, line_bytes): 5 | super(ReadTxt, self).__init__() 6 | # bytes -> string 7 | self.line_str = bytes.decode(line_bytes, encoding="utf-8") 8 | 9 | def parse_line(self): 10 | line_info = self.line_str.strip('\n') 11 | split_line = line_info.split(" ") 12 | box_num = (len(split_line) - 1) / 5 13 | image_name = split_line[0] 14 | # print("Reading {}".format(image_name)) 15 | split_line = split_line[1:] 16 | boxes = [] 17 | for i in range(MAX_TRUE_BOX_NUM_PER_IMG): 18 | if i < box_num: 19 | box_xmin = float(split_line[i * 5 + 1]) 20 | box_ymin = float(split_line[i * 5 + 2]) 21 | box_xmax = float(split_line[i * 5 + 3]) 22 | box_ymax = float(split_line[i * 5 + 4]) 23 | class_id = int(split_line[i * 5]) 24 | boxes.append([class_id, box_xmin, box_ymin, box_xmax, box_ymax]) 25 | """ 26 | else: 27 | box_xmin = 0 28 | box_ymin = 0 29 | box_xmax = 0 30 | box_ymax = 0 31 | class_id = 0 32 | boxes.append([box_xmin, box_ymin, box_xmax, box_ymax, class_id]) 33 | """ 34 | 35 | return image_name, boxes -------------------------------------------------------------------------------- /torchx.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: argoproj.io/v1alpha1 2 | kind: Workflow 3 | metadata: 4 | generateName: pipeline- 5 | annotations: {pipelines.kubeflow.org/kfp_sdk_version: 1.6.2, pipelines.kubeflow.org/pipeline_compilation_time: '2021-12-16T17:39:24.231309', 6 | pipelines.kubeflow.org/pipeline_spec: '{"name": "Pipeline"}'} 7 | labels: {pipelines.kubeflow.org/kfp_sdk_version: 1.6.2} 8 | spec: 9 | entrypoint: pipeline 10 | templates: 11 | - name: cv-interpret-worker 12 | container: 13 | args: [] 14 | command: [python, -m, torchx.examples.apps.lightning_classy_vision.interpret, 15 | --load_path, /tmp/output/models/last.ckpt, --output_path, /tmp/output/interpret, 16 | --data_path, /tmp/output/processed] 17 | image: ghcr.io/pytorch/torchx:0.1.1 18 | resources: 19 | limits: {cpu: 1000m, memory: 1024M} 20 | requests: {cpu: 1000m, memory: 1024M} 21 | tty: true 22 | metadata: 23 | labels: {torchx.pytorch.org/version: 0.1.1, torchx.pytorch.org/app-name: cv-interpret, 24 | torchx.pytorch.org/role-index: '0', torchx.pytorch.org/role-name: worker, 25 | torchx.pytorch.org/replica-id: '0', pipelines.kubeflow.org/kfp_sdk_version: 1.6.2, 26 | pipelines.kubeflow.org/pipeline-sdk-type: kfp} 27 | annotations: {pipelines.kubeflow.org/component_spec: '{"description": "KFP wrapper 28 | for TorchX component cv-interpret, role worker", "implementation": {"container": 29 | {"command": ["python", "-m", "torchx.examples.apps.lightning_classy_vision.interpret", 30 | "--load_path", "/tmp/output/models/last.ckpt", "--output_path", "/tmp/output/interpret", 31 | "--data_path", "/tmp/output/processed"], "env": {}, "image": "ghcr.io/pytorch/torchx:0.1.1"}}, 32 | "name": "cv-interpret-worker", "outputs": []}', pipelines.kubeflow.org/component_ref: '{"digest": 33 | "a21fbc29a0eb30707292ef6dda4ae8f46eed5ecbc7ec852f166d018d5b09fed4"}'} 34 | - name: cv-trainer-worker 35 | container: 36 | args: [] 37 | command: [python, -m, torchx.examples.apps.lightning_classy_vision.train, --load_path, 38 | '', --log_path, /tmp/output/logs, --epochs, '1', --output_path, /tmp/output/models, 39 | --num_samples, '200', --data_path, /tmp/output/processed] 40 | image: ghcr.io/pytorch/torchx:0.1.1 41 | resources: 42 | limits: {cpu: 1000m, memory: 4000M} 43 | requests: {cpu: 1000m, memory: 4000M} 44 | tty: true 45 | volumeMounts: 46 | - {mountPath: /tmp/, name: tmp} 47 | outputs: 48 | artifacts: 49 | - {name: mlpipeline-ui-metadata, path: /tmp/outputs/mlpipeline-ui-metadata/data.json} 50 | metadata: 51 | labels: {torchx.pytorch.org/version: 0.1.1, torchx.pytorch.org/app-name: cv-trainer, 52 | torchx.pytorch.org/role-index: '0', torchx.pytorch.org/role-name: worker, 53 | torchx.pytorch.org/replica-id: '0', pipelines.kubeflow.org/kfp_sdk_version: 1.6.2, 54 | pipelines.kubeflow.org/pipeline-sdk-type: kfp} 55 | annotations: {pipelines.kubeflow.org/component_spec: '{"description": "KFP wrapper 56 | for TorchX component cv-trainer, role worker", "implementation": {"container": 57 | {"command": ["python", "-m", "torchx.examples.apps.lightning_classy_vision.train", 58 | "--load_path", "", "--log_path", "/tmp/output/logs", "--epochs", "1", "--output_path", 59 | "/tmp/output/models", "--num_samples", "200", "--data_path", "/tmp/output/processed"], 60 | "env": {}, "image": "ghcr.io/pytorch/torchx:0.1.1"}}, "name": "cv-trainer-worker", 61 | "outputs": [{"description": "ui metadata", "name": "mlpipeline-ui-metadata", 62 | "type": "MLPipeline UI Metadata"}]}', pipelines.kubeflow.org/component_ref: '{"digest": 63 | "24a9c860e919337e5ff14db25ae7703cf9b9b2eaeeeb1a6b982ae4be3e9a0e09"}'} 64 | sidecars: 65 | - command: [sh, -c, 'mkdir -p /tmp/outputs/mlpipeline-ui-metadata; echo ''{"outputs": 66 | [{"type": "tensorboard", "source": "/tmp/output/logs/lightning_logs"}]}'' 67 | > /tmp/outputs/mlpipeline-ui-metadata/data.json'] 68 | image: alpine 69 | name: ui-metadata-sidecar 70 | mirrorVolumeMounts: true 71 | volumes: 72 | - emptyDir: {} 73 | name: tmp 74 | - name: datapreproc-worker 75 | container: 76 | args: [] 77 | command: [python, -m, torchx.examples.apps.datapreproc.datapreproc, --input_path, 78 | /tmp/output/tiny-imagenet-200.zip, --output_path, /tmp/output/processed] 79 | image: ghcr.io/pytorch/torchx:0.1.1 80 | resources: 81 | limits: {cpu: 1000m, memory: 1024M} 82 | requests: {cpu: 1000m, memory: 1024M} 83 | tty: true 84 | metadata: 85 | labels: {torchx.pytorch.org/version: 0.1.1, torchx.pytorch.org/app-name: datapreproc, 86 | torchx.pytorch.org/role-index: '0', torchx.pytorch.org/role-name: worker, 87 | torchx.pytorch.org/replica-id: '0', pipelines.kubeflow.org/kfp_sdk_version: 1.6.2, 88 | pipelines.kubeflow.org/pipeline-sdk-type: kfp} 89 | annotations: {pipelines.kubeflow.org/component_spec: '{"description": "KFP wrapper 90 | for TorchX component datapreproc, role worker", "implementation": {"container": 91 | {"command": ["python", "-m", "torchx.examples.apps.datapreproc.datapreproc", 92 | "--input_path", "/tmp/output/tiny-imagenet-200.zip", "--output_path", "/tmp/output/processed"], 93 | "env": {}, "image": "ghcr.io/pytorch/torchx:0.1.1"}}, "name": "datapreproc-worker", 94 | "outputs": []}', pipelines.kubeflow.org/component_ref: '{"digest": "306620766906929f355231a5a37dfedf5faff9bf03d1b6f648fe8d6765633e78"}'} 95 | - name: pipeline 96 | dag: 97 | tasks: 98 | - name: cv-interpret-worker 99 | template: cv-interpret-worker 100 | dependencies: [cv-trainer-worker] 101 | - name: cv-trainer-worker 102 | template: cv-trainer-worker 103 | dependencies: [datapreproc-worker] 104 | - name: datapreproc-worker 105 | template: datapreproc-worker 106 | dependencies: [torchx-utils-copy-torchx-utils-copy] 107 | - name: torchx-torchserve-worker 108 | template: torchx-torchserve-worker 109 | dependencies: [cv-trainer-worker] 110 | - {name: torchx-utils-copy-torchx-utils-copy, template: torchx-utils-copy-torchx-utils-copy} 111 | - name: torchx-torchserve-worker 112 | container: 113 | args: [] 114 | command: [python, -m, torchx.apps.serve.serve, --model_path, /tmp/output/models/model.mar, 115 | --management_api, 'http://torchserve.default.svc.cluster.local:8081', --model_name, 116 | tiny_image_net] 117 | image: ghcr.io/pytorch/torchx:0.1.1 118 | ports: 119 | - {containerPort: 8222, name: model-download} 120 | tty: true 121 | metadata: 122 | labels: {torchx.pytorch.org/version: 0.1.1, torchx.pytorch.org/app-name: torchx-torchserve, 123 | torchx.pytorch.org/role-index: '0', torchx.pytorch.org/role-name: worker, 124 | torchx.pytorch.org/replica-id: '0', pipelines.kubeflow.org/kfp_sdk_version: 1.6.2, 125 | pipelines.kubeflow.org/pipeline-sdk-type: kfp} 126 | annotations: {pipelines.kubeflow.org/component_spec: '{"description": "KFP wrapper 127 | for TorchX component torchx-torchserve, role worker", "implementation": 128 | {"container": {"command": ["python", "-m", "torchx.apps.serve.serve", "--model_path", 129 | "/tmp/output/models/model.mar", "--management_api", "http://torchserve.default.svc.cluster.local:8081", 130 | "--model_name", "tiny_image_net"], "env": {}, "image": "ghcr.io/pytorch/torchx:0.1.1"}}, 131 | "name": "torchx-torchserve-worker", "outputs": []}', pipelines.kubeflow.org/component_ref: '{"digest": 132 | "7b971194b2a921896419135a1a663036634aa9a40ae15578e7fdb60f38109351"}'} 133 | - name: torchx-utils-copy-torchx-utils-copy 134 | container: 135 | args: [] 136 | command: [python, -m, torchx.apps.utils.copy_main, --src, 'http://cs231n.stanford.edu/tiny-imagenet-200.zip', 137 | --dst, /tmp/output/tiny-imagenet-200.zip] 138 | image: ghcr.io/pytorch/torchx:0.1.1 139 | tty: true 140 | metadata: 141 | labels: {torchx.pytorch.org/version: 0.1.1, torchx.pytorch.org/app-name: torchx-utils-copy, 142 | torchx.pytorch.org/role-index: '0', torchx.pytorch.org/role-name: torchx-utils-copy, 143 | torchx.pytorch.org/replica-id: '0', pipelines.kubeflow.org/kfp_sdk_version: 1.6.2, 144 | pipelines.kubeflow.org/pipeline-sdk-type: kfp} 145 | annotations: {pipelines.kubeflow.org/component_spec: '{"description": "KFP wrapper 146 | for TorchX component torchx-utils-copy, role torchx-utils-copy", "implementation": 147 | {"container": {"command": ["python", "-m", "torchx.apps.utils.copy_main", 148 | "--src", "http://cs231n.stanford.edu/tiny-imagenet-200.zip", "--dst", "/tmp/output/tiny-imagenet-200.zip"], 149 | "env": {}, "image": "ghcr.io/pytorch/torchx:0.1.1"}}, "name": "torchx-utils-copy-torchx-utils-copy", 150 | "outputs": []}', pipelines.kubeflow.org/component_ref: '{"digest": "390cfaf8fe8483cb9182e4a8fcccf9fcac9fdfaaff594e7bdb2236dacc367bc7"}'} 151 | arguments: 152 | parameters: [] 153 | serviceAccountName: pipeline-runner 154 | --------------------------------------------------------------------------------