├── README.md ├── figs ├── DQN.png ├── RCNN.png ├── RL1.png ├── seq1.png ├── ssd.png ├── vgg.png ├── yolo.png ├── actions.png ├── flowchart.png ├── test_hist.png └── train_hist.png ├── requirements.txt ├── config.py ├── extract_data.py ├── utils.py ├── svm_pred.py └── q_learning.py /README.md: -------------------------------------------------------------------------------- 1 | # Object-Localization-using-Deep-Reinforcement-Learning 2 | 3 | ![alt text](figs/flowchart.png) 4 | -------------------------------------------------------------------------------- /figs/DQN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/DQN.png -------------------------------------------------------------------------------- /figs/RCNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/RCNN.png -------------------------------------------------------------------------------- /figs/RL1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/RL1.png -------------------------------------------------------------------------------- /figs/seq1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/seq1.png -------------------------------------------------------------------------------- /figs/ssd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/ssd.png -------------------------------------------------------------------------------- /figs/vgg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/vgg.png -------------------------------------------------------------------------------- /figs/yolo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/yolo.png -------------------------------------------------------------------------------- /figs/actions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/actions.png -------------------------------------------------------------------------------- /figs/flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/flowchart.png -------------------------------------------------------------------------------- /figs/test_hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/test_hist.png -------------------------------------------------------------------------------- /figs/train_hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/train_hist.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Keras==2.13.1 2 | matplotlib==3.2.1 3 | numpy==1.22.0 4 | opencv-python==4.8.1.78 5 | pandas==1.0.4 6 | scikit-learn==1.5.0 7 | scipy==1.11.1 8 | tensorflow==2.12.1 9 | tqdm==4.66.3 10 | xmltodict==0.12.0 11 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # Change parameters of model 2 | target_update_interval = 1000 3 | alpha = .1 4 | experience_buffer_size = 2000 5 | experience_sample_size = 15 6 | gamma = .1 7 | history_length = 10 8 | epsilon_min = .1 9 | epsilon_max = 1.0 10 | epsilon_dec_steps = 5 11 | epsilon_dec = (epsilon_max - epsilon_min) / epsilon_dec_steps 12 | max_steps = 40 13 | 14 | training = False 15 | 16 | # training_ratio = 0.8 -------------------------------------------------------------------------------- /extract_data.py: -------------------------------------------------------------------------------- 1 | #%% 2 | import os 3 | from os import listdir 4 | from os.path import isfile, join 5 | import shutil 6 | import xmltodict 7 | import cv2 8 | import pickle 9 | #%% 10 | # Defining constants 11 | ROOT_PATH = "VOC2012" 12 | ANNOTATION_FOLDER = "Annotations" 13 | IMAGE_FOLDER = "JPEGImages" 14 | CLASSES = ["person", "bird", "cat", "cow", "dog", "horse", "sheep", "aeroplane", "bicycle", "boat", "bus", "car", 15 | "motorbike", "train", "bottle", "chair", "diningtable", "pottedplant", "sofa", "tvmonitor"] 16 | 17 | CLASS_TO_LABEL = {class_name: label for (class_name, label) in zip(CLASSES, range(len(CLASSES)))} 18 | #%% 19 | # Defining functions for extracting relevant data from the VOC 2012 database 20 | def extract_objects(xml): 21 | objects = xml['annotation']['object'] 22 | return [objects] if isinstance(objects,xmltodict.OrderedDict) else objects 23 | 24 | def extract_labels(xml): 25 | return [CLASS_TO_LABEL[object['name']] for object in extract_objects(xml)] 26 | 27 | def extract_bounding_boxes(xml): 28 | return [tuple([int(round(float(object['bndbox'][key]))) 29 | for key in ['xmin', 'ymin', 'xmax', 'ymax']]) for object in extract_objects(xml)] 30 | 31 | def format_data(data): 32 | data_tuples = [(value["image"],value["labels"],value["bounding_boxes"]) for key,value in data.items()] 33 | 34 | values = [(image, bounding_boxes,labels) 35 | for (image, labels, bounding_boxes) in data_tuples] 36 | 37 | return values 38 | 39 | def process_data(): 40 | annotation_directory_path = os.path.join(ROOT_PATH, ANNOTATION_FOLDER) 41 | image_directory_path = os.path.join(ROOT_PATH, IMAGE_FOLDER) 42 | 43 | names_and_image_paths_and_xml_paths = [ 44 | (os.path.splitext(filename)[0],join(image_directory_path, os.path.splitext(filename)[0] + ".jpg"), join(annotation_directory_path, filename)) 45 | 46 | for filename in listdir(annotation_directory_path)] 47 | 48 | names_and_image_paths_and_xml_paths = [(name,image_path, annotation_path) 49 | for (name,image_path, annotation_path) in names_and_image_paths_and_xml_paths 50 | if isfile(annotation_path)] 51 | 52 | data = {} 53 | 54 | for (name,image_path, annotation_path) in names_and_image_paths_and_xml_paths: 55 | xml = xmltodict.parse(open(annotation_path, 'rb')) 56 | 57 | labels = extract_labels(xml) 58 | bounding_boxes = extract_bounding_boxes(xml) 59 | 60 | data[name] = { 61 | "image":name, 62 | "labels":labels, 63 | "bounding_boxes":bounding_boxes 64 | } 65 | print("Processed %s" % name) 66 | 67 | formatted_data = format_data(data) 68 | rl_data = formatted_data 69 | 70 | if os.path.exists('out_rl/imgs'): 71 | shutil.rmtree('out_rl/imgs') 72 | os.mkdir('out_rl/imgs') 73 | else: 74 | os.mkdir('out_rl') 75 | os.mkdir('out_rl/imgs') 76 | 77 | for ((image,bounding_boxes,labels),i) in zip(rl_data,range(len(rl_data))): 78 | image = cv2.imread(ROOT_PATH+'/'+IMAGE_FOLDER+'/'+image+'.jpg') 79 | cv2.imwrite("out_rl/imgs/"+str(i)+".png",image) 80 | print(str(i)) 81 | 82 | bounding_boxes = [bounding_boxes for (image,bounding_boxes,labels) in rl_data] 83 | pickle.dump(bounding_boxes, open("out_rl/"+"bounding_boxes.p", "wb"),protocol=4) 84 | 85 | labels_rl = [labels for (image,bounding_boxes,labels) in rl_data] 86 | pickle.dump(labels_rl, open("out_rl/"+"labels_rl.p", "wb"),protocol=4) 87 | #%% 88 | process_data() 89 | #%% -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | #%% 2 | import os 3 | import pickle 4 | import math 5 | import cv2 6 | import numpy as np 7 | from config import * 8 | 9 | #%% 10 | class State: 11 | cnn_model = load_model(os.path.join("vgg16.h5")) 12 | feature_extractor = K.function([cnn_model.layers[0].input], [cnn_model.layers[20].output]) 13 | 14 | def __init__(self, history, bb, image): 15 | self.history = history 16 | self.bb = bb 17 | self.feature = State.compute_feature(history, bb, image) 18 | 19 | @staticmethod 20 | def compute_feature(history, bb, image): 21 | history_feature = State.get_history_feature(history) 22 | image_feature = State.get_image_feature(image, bb) 23 | feature = np.concatenate((image_feature, history_feature)) 24 | return np.array([feature]) 25 | 26 | @staticmethod 27 | def get_image_feature(image, bb): 28 | cropped = crop_image(bb, image) 29 | feature = State.feature_extractor([cropped.reshape(1, 224, 224, 3)])[0] 30 | return np.ndarray.flatten(feature) 31 | 32 | @staticmethod 33 | def get_history_feature(history): 34 | assert len(history) == history_length 35 | feature = np.zeros((90,)) 36 | for i in range(history_length): 37 | action = history[i] 38 | if action != -1: 39 | feature[i * 9 + action] = 1 40 | return feature 41 | 42 | #%% 43 | def load_data(training_ratio): 44 | bbs = pickle.load(open(os.path.join("out_rl/", "bounding_boxes.p"), "rb")) 45 | print('loaded bbs') 46 | labels = pickle.load(open(os.path.join("out_rl/", "labels_rl.p"), "rb")) 47 | print('loaded labels') 48 | 49 | unique_indices = [i for i in range(len(labels)) if len(labels[i]) == 1] 50 | indices_to_load = unique_indices 51 | 52 | bbs = [bbs[i][0] for i in indices_to_load] 53 | labels = [labels[i] for i in indices_to_load] 54 | images = [cv2.imread(os.path.join("out_rl/imgs/", str(i) + ".png")) for i in indices_to_load] 55 | 56 | bbs_train = bbs[:int(len(bbs) * training_ratio)] 57 | bbs_test = bbs[int(len(bbs) * training_ratio):] 58 | labels_train = labels[:int(len(labels) * training_ratio)] 59 | labels_test = labels[int(len(labels) * training_ratio):] 60 | images_train = images[:int(len(images) * training_ratio)] 61 | images_test = images[int(len(images) * training_ratio):] 62 | 63 | return bbs_train, bbs_test, labels_train, labels_test, images_train, images_test, indices_to_load 64 | #%% 65 | def iou(boxA, boxB): 66 | xA = max(boxA[0], boxB[0]) 67 | yA = max(boxA[1], boxB[1]) 68 | xB = min(boxA[2], boxB[2]) 69 | yB = min(boxA[3], boxB[3]) 70 | 71 | interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1) 72 | boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) 73 | boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) 74 | 75 | iou = interArea / float(boxAArea + boxBArea - interArea) 76 | return iou 77 | #%% 78 | def crop_image(bb, image): 79 | w, h, d = image.shape 80 | bb = [int(math.floor(b)) for b in bb] 81 | bb[0] = max(bb[0], 0) 82 | bb[1] = max(bb[1], 0) 83 | bb[2] = min(bb[2], h) 84 | bb[3] = min(bb[3], w) 85 | cropped = image[bb[1]:bb[3], bb[0]:bb[2]] 86 | w, h, d = cropped.shape 87 | if w == 0 or h == 0: 88 | cropped = np.zeros((224, 224, 3)) 89 | else: 90 | cropped = cv2.resize(cropped, (224, 224)) 91 | return cropped 92 | #%% 93 | def get_features(images, bbs, labels): 94 | 95 | feature_to_all = [] 96 | label = [] 97 | 98 | for xi, yi, l, data_index in tqdm(zip(images, bbs, labels, range(len(images)))): 99 | (width, height, d) = xi.shape 100 | initial_history = [-1] * history_length 101 | initial_bb = (0, 0, height, width) 102 | s = State(initial_history, initial_bb, xi) 103 | feature = s.get_image_feature(xi, yi) 104 | feature_to_all.append(feature) 105 | label.append(l) 106 | 107 | return feature_to_all, label 108 | #%%w -------------------------------------------------------------------------------- /svm_pred.py: -------------------------------------------------------------------------------- 1 | #%% 2 | import csv 3 | from sklearn.svm import LinearSVC 4 | from sklearn.svm import SVC 5 | from collections import OrderedDict 6 | from keras import backend as K 7 | from keras.models import Sequential, load_model 8 | import os 9 | import cv2 10 | import numpy as np 11 | from config import * 12 | from utils import * 13 | import pickle 14 | from tqdm import tqdm 15 | #%% 16 | def load_data_1(training_ratio): 17 | bbs = pickle.load(open(os.path.join("out_rl_07/", "bounding_boxes.p"), "rb")) 18 | print('loaded bbs') 19 | labels = pickle.load(open(os.path.join("out_rl_07/", "labels_rl.p"), "rb")) 20 | print('loaded labels') 21 | 22 | unique_indices = [i for i in range(len(labels)) if len(labels[i]) == 1] 23 | indices_to_load = unique_indices 24 | 25 | bbs = [bbs[i][0] for i in indices_to_load] 26 | labels = [labels[i] for i in indices_to_load] 27 | images = [cv2.imread(os.path.join("out_rl_07/imgs/", str(i) + ".png")) for i in indices_to_load] 28 | 29 | bbs_train = bbs[:int(len(bbs) * training_ratio)] 30 | bbs_test = bbs[int(len(bbs) * training_ratio):] 31 | labels_train = labels[:int(len(labels) * training_ratio)] 32 | labels_test = labels[int(len(labels) * training_ratio):] 33 | images_train = images[:int(len(images) * training_ratio)] 34 | images_test = images[int(len(images) * training_ratio):] 35 | 36 | return bbs_train, bbs_test, labels_train, labels_test, images_train, images_test, indices_to_load 37 | 38 | #%% 39 | CLASSES = ["person", "bird", "cat", "cow", "dog", "horse", "sheep", "aeroplane", "bicycle", "boat", "bus", "car", 40 | "motorbike", "train", "bottle", "chair", "diningtable", "pottedplant", "sofa", "tvmonitor"] 41 | 42 | #%% 43 | training_ratio = 0 44 | 45 | bbs_train, bbs_test, labels_train, labels_test, images_train, images_test, indices_to_load = load_data_1(training_ratio) 46 | #%% 47 | feature_train = [] 48 | with open('features.csv') as f: 49 | feature = csv.reader(f, delimiter=',') 50 | for row in feature: 51 | feature_train.append([float(i) for i in row]) 52 | #%% 53 | label_train = [] 54 | with open('lables.csv') as f: 55 | label = csv.reader(f, delimiter=',') 56 | for row in label: 57 | label_train.append([float(i) for i in row]) 58 | #%% 59 | label_train = np.ndarray.flatten(np.array(label_train)) 60 | svm = LinearSVC() 61 | svm.fit(feature_train, label_train) 62 | print("fit completed") 63 | #%% 64 | predict_bbs = [] 65 | with open('predicted_bounding_boxes.csv') as f: 66 | bbs = csv.reader(f, delimiter=',') 67 | for row in bbs: 68 | predict_bbs.append([float(i) for i in row]) 69 | #%% 70 | 71 | predict_feature = get_features(images_test, predict_bbs, labels_test) 72 | #%% 73 | ground_truth = get_features(images_test, bbs_test, labels_test) 74 | #%% 75 | label_DeepQ = svm.predict(predict_feature[0]) 76 | label_ground = svm.predict(ground_truth[0]) 77 | 78 | 79 | # accuracy_predict = OrderedDict() 80 | # accuracy_ground = OrderedDict() 81 | # sum_correct_predict = [] 82 | # sum_correct_ground = [] 83 | # #%% 84 | # # labels_test = np.array(labels_test)####### 85 | # for i in range(20): 86 | # unique, counts = np.unique(labels_test, return_counts=True) 87 | # count = dict(zip(unique, counts))[i] 88 | # indexes = np.where(labels_test == i)[0] 89 | # unique1, counts1 = np.unique(label_DeepQ[indexes], return_counts=True) 90 | # count_predict = dict(zip(unique1, counts1))[i] 91 | # unique2, counts2 = np.unique(label_ground[indexes], return_counts=True) 92 | # count_ground = dict(zip(unique2, counts2))[i] 93 | # sum_correct_predict.append(count_predict) 94 | # sum_correct_ground.append(count_ground) 95 | # accuracy_predict[CLASSES[i]] = count_predict/count 96 | # accuracy_ground[CLASSES[i]] = count_ground/count 97 | 98 | # total_accu_predict = sum(sum_correct_predict)/len(labels_test) 99 | # total_accu_ground = sum(sum_correct_ground)/len(labels_test) 100 | # print('accuracy of predicted bounding boxes of test data in SVM:', accuracy_predict) 101 | # print('accuracy of ground truth bounding boxes of test data in SVM:', accuracy_ground) 102 | # print('accuracy of total correct classification for prediction = ', total_accu_predict) 103 | # print('accuracy of total correct classification for ground truth = ', total_accu_ground) 104 | # print("relative:") 105 | # print([x/y for x,y in zip(accuracy_predict.values(), accuracy_ground.values())]) 106 | #%% -------------------------------------------------------------------------------- /q_learning.py: -------------------------------------------------------------------------------- 1 | #%% 2 | import random 3 | from keras.layers import Dense 4 | from keras import backend as K 5 | from keras.models import Sequential, load_model 6 | import numpy as np 7 | import pandas as pd 8 | import cv2 9 | import pickle 10 | import math 11 | import matplotlib.pyplot as plt 12 | import sys 13 | import os 14 | import tensorflow as tf 15 | from collections import deque 16 | import collections 17 | from tqdm import tqdm 18 | 19 | from utils import * 20 | from config import * 21 | 22 | HUBER_DELTA = 1.0 23 | 24 | def smoothL1(y_true, y_pred): 25 | x = K.abs(y_true - y_pred) 26 | x = tf.where(x < HUBER_DELTA, 0.5 * x ** 2, HUBER_DELTA * (x - 0.5 * HUBER_DELTA)) 27 | return K.sum(x) 28 | 29 | 30 | def initialize_model(): 31 | model = Sequential() 32 | model.add(Dense(1024, input_shape=(4096 + 90,), activation='relu')) 33 | model.add(Dense(1024, activation='relu')) 34 | model.add(Dense(9, activation='linear')) 35 | model.compile(loss=smoothL1, optimizer='adam') 36 | return model 37 | 38 | 39 | loss_arr = [] 40 | 41 | def fit(model, x, y): 42 | global loss_arr 43 | loss = model.train_on_batch(x, y) 44 | loss_arr.append(loss) 45 | if len(loss_arr) == 100: 46 | print("loss %s" % str(sum(loss_arr) / len(loss_arr))) 47 | loss_arr = [] 48 | 49 | 50 | def transform(bb, a): 51 | 52 | alpha = .2 53 | alpha_w = alpha * (bb[2] - bb[0]) 54 | alpha_h = alpha * (bb[3] - bb[1]) 55 | dx1 = 0 56 | dy1 = 0 57 | dx2 = 0 58 | dy2 = 0 59 | 60 | if a == 0: 61 | dx1 = alpha_w 62 | dx2 = alpha_w 63 | elif a == 1: 64 | dx1 = -alpha_w 65 | dx2 = -alpha_w 66 | elif a == 2: 67 | dy1 = alpha_h 68 | dy2 = alpha_h 69 | elif a == 3: 70 | dy1 = -alpha_h 71 | dy2 = -alpha_h 72 | elif a == 4: 73 | dx1 = -alpha_w 74 | dx2 = alpha_w 75 | dy1 = -alpha_h 76 | dy2 = alpha_h 77 | elif a == 5: 78 | dx1 = alpha_w 79 | dx2 = -alpha_w 80 | dy1 = alpha_h 81 | dy2 = -alpha_h 82 | elif a == 6: 83 | dy1 = alpha_h 84 | dy2 = -alpha_h 85 | elif a == 7: 86 | dx1 = alpha_w 87 | dx2 = -alpha_w 88 | 89 | bb = (bb[0] + dx1, bb[1] + dy1, bb[2] + dx2, bb[3] + dy2) 90 | bb = ( 91 | min(bb[0], bb[2]), 92 | min(bb[1], bb[3]), 93 | max(bb[0], bb[2]), 94 | max(bb[1], bb[3]), 95 | ) 96 | 97 | return bb 98 | 99 | 100 | def trigger_reward(bb, true_bb): 101 | return 3 if iou(bb, true_bb) > .6 else -3 102 | 103 | 104 | def transform_reward(bb, bbp, true_bb): 105 | return 1 if iou(bbp, true_bb) > iou(bb, true_bb) else -1 106 | 107 | 108 | def get_q(s, model): 109 | return np.ndarray.flatten(model.predict(s.feature)) 110 | 111 | 112 | def select_action(s, true_bb, step, epsilon, action_values): 113 | 114 | if step == max_steps: 115 | a = 8 116 | 117 | else: 118 | if random.random() > epsilon: 119 | a = np.argmax(action_values) 120 | 121 | else: 122 | 123 | action_rewards = [transform_reward(s.bb, transform(s.bb, a_tmp), true_bb) for a_tmp in range(8)] 124 | action_rewards.append(trigger_reward(s.bb, true_bb)) 125 | action_rewards = np.array(action_rewards) 126 | positive_action_indices = np.where(action_rewards >= 0)[0] 127 | 128 | if len(positive_action_indices) == 0: 129 | positive_action_indices = list(range(0, 9)) 130 | a = np.random.choice(positive_action_indices) 131 | 132 | 133 | return a 134 | 135 | 136 | def take_action(s, true_bb, a, image): 137 | 138 | if a == 8: 139 | sp = s 140 | r = trigger_reward(s.bb, true_bb) 141 | took_trigger = True 142 | 143 | else: 144 | 145 | bb = s.bb 146 | bbp = transform(bb, a) 147 | r = transform_reward(bb, bbp, true_bb) 148 | took_trigger = False 149 | historyp = s.history[1:] 150 | historyp.append(a) 151 | assert len(historyp) == history_length 152 | sp = State(historyp, bbp, image) 153 | 154 | return sp, r, took_trigger 155 | 156 | 157 | def weights_from_errors(errors): 158 | 159 | sorted_inds = sorted(range(len(errors)),key=lambda x: errors[x]) 160 | inv_ranks = [0]*len(errors) 161 | 162 | for i in range(len(inv_ranks)): 163 | inv_ranks[sorted_inds[i]] = 1.0/(len(inv_ranks)-i) 164 | 165 | 166 | return inv_ranks 167 | 168 | 169 | def apply_experience(main_model, target_model,experience, experience_errors): 170 | 171 | weights = weights_from_errors(experience_errors) 172 | sample_inds = random.choices(range(len(experience)), k=experience_sample_size, weights = weights) 173 | sample = [experience[i] for i in sample_inds] 174 | 175 | targets = np.zeros((experience_sample_size, 9)) 176 | 177 | for i in range(experience_sample_size): 178 | s, a, r, sp, done = sample[i] 179 | target = r 180 | 181 | if not done: 182 | target = compute_target(r, sp, target_model) 183 | targets[i, :] = get_q(s, main_model) 184 | targets[i][a] = target 185 | 186 | x = np.concatenate([s.feature for (s, a, r, sp, d) in sample]) 187 | fit(main_model, x, targets) 188 | 189 | 190 | def compute_target(r, sp, target_model): 191 | return r + gamma * np.amax(get_q(sp, target_model)) 192 | 193 | 194 | def copy_main_to_target_model_weights(main_model, target_model): 195 | weights = main_model.get_weights() 196 | target_model.set_weights(weights) 197 | 198 | def q_learning_train(x, y, labels, epochs, main_model, target_model): 199 | 200 | epsilon = epsilon_max 201 | experience = collections.deque(maxlen=experience_buffer_size) 202 | experience_errors = collections.deque(maxlen=experience_buffer_size) 203 | total_steps = 0 204 | 205 | for epoch in range(epochs): 206 | 207 | print("epoch %i" % epoch) 208 | 209 | for xi, yi, l, data_index in zip(x, y, labels, range(len(x))): 210 | 211 | (width, height, d) = xi.shape 212 | initial_history = [-1] * history_length 213 | initial_bb = (0, 0, height, width) 214 | s = State(initial_history, initial_bb, xi) 215 | done = False 216 | total_reward = 0 217 | step = 0 218 | 219 | while not done: 220 | 221 | action_values = get_q(s, main_model) 222 | a = select_action(s, yi, step, epsilon, action_values) 223 | sp, r, done = take_action(s, yi, a, xi) 224 | step_experience = (s, a, r, sp, done) 225 | 226 | #add the experience and td-error to our buffer 227 | experience.append(step_experience) 228 | experience_errors.append(abs(action_values[a]-compute_target(r,sp,target_model))) 229 | 230 | #apply the experience 231 | apply_experience(main_model, target_model, experience, experience_errors) 232 | s = sp 233 | total_reward += r 234 | step += 1 235 | total_steps += 1 236 | 237 | #update the target Q-network 238 | if total_steps % target_update_interval == 0: 239 | copy_main_to_target_model_weights(main_model,target_model) 240 | 241 | # try: 242 | # start_point = (s.bb[0], s.bb[2]) 243 | # print("start point {}".format(start_point)) 244 | # end_point = (s.bb[1], s.bb[3]) 245 | # print("end point {}".format(end_point)) 246 | 247 | # color = (255, 0, 0) 248 | # thickness = 2 249 | 250 | # image = cv2.rectangle(xi, start_point, end_point, color, thickness) 251 | # cv2.imshow('img', image) 252 | # cv2.waitKey(10) 253 | # except: 254 | # pass 255 | 256 | print("data_index %s" % data_index) 257 | print("reward %i" % total_reward) 258 | print("iou %f" % iou(s.bb, yi)) 259 | 260 | if epoch < epsilon_dec_steps: 261 | epsilon -= epsilon_dec 262 | print("epsilon changed to %f" % epsilon) 263 | 264 | return main_model 265 | 266 | 267 | def q_learning_predict(x,model): 268 | 269 | y = [] 270 | count = 0 271 | for xi in x: 272 | 273 | (width, height, d) = xi.shape 274 | initial_history = [-1] * history_length 275 | initial_bb = (0, 0, height, width) 276 | s = State(initial_history, initial_bb, xi) 277 | 278 | # (width, height, d) = xi.shape 279 | # s = (0, 0, height, width) 280 | # history = [-1] * history_length 281 | done = False 282 | 283 | for i in range(sys.maxsize): 284 | 285 | action_values = get_q(s, model) 286 | if i == max_steps - 1: 287 | a = 8 288 | 289 | else: 290 | a = np.argmax(action_values) 291 | if a == 8: 292 | sp = s 293 | done = True 294 | 295 | else: 296 | bbp = transform(s.bb, a) 297 | historyp = s.history[1:] 298 | historyp.append(a) 299 | assert len(historyp) == history_length 300 | sp = State(historyp, bbp, xi) 301 | s = sp 302 | if done: 303 | break 304 | count+=1 305 | print("image ",count," predicted") 306 | 307 | # try: 308 | # s.bb = [int(math.floor(b)) for b in s.bb] 309 | # img = xi[s.bb[1]:s.bb[3], s.bb[0]:s.bb[2]] 310 | # cv2.imshow('img', img) 311 | # if cv2.waitKey(25) & 0xFF == ord('q'): 312 | # break 313 | # print(s.bb) 314 | # except: 315 | # pass 316 | 317 | y.append(s.bb) 318 | 319 | return y 320 | 321 | def main(): 322 | 323 | training_ratio = 1 324 | 325 | bbs_train, bbs_test, labels_train, labels_test, images_train, images_test, indices_to_load = load_data(training_ratio) 326 | 327 | print('images loaded') 328 | 329 | 330 | # features_csv, labels_csv = get_features(images_train, bbs_train, labels_train) 331 | # features_csv = pd.DataFrame(features_csv) 332 | # labels_csv = pd.DataFrame(labels_csv) 333 | # features_csv.to_csv('features.csv', index = False) 334 | # labels_csv.to_csv('lables.csv', index = False) 335 | 336 | if training: 337 | 338 | main_model = initialize_model() 339 | weights = main_model.get_weights() 340 | target_model = initialize_model() 341 | target_model.set_weights(weights) 342 | model = q_learning_train(images_train, bbs_train, labels_train, 15, main_model, target_model) 343 | model.save("dqn.h5") 344 | 345 | else: 346 | 347 | model = load_model("dqn.h5") 348 | y = q_learning_predict(images_test, model) 349 | inds = range(int(len(images_test) * training_ratio), len(images_test)) 350 | 351 | np.savetxt("predicted_bounding_boxes.csv", y, delimiter=',', newline='\n') 352 | np.savetxt("predicted_image_indices.csv", inds, delimiter=',', newline='\n') 353 | np.savetxt("predicted_image_labels.csv", labels_test, delimiter=',', newline='\n') 354 | #%% 355 | main() 356 | #%% 357 | --------------------------------------------------------------------------------