├── README.md
├── figs
    ├── DQN.png
    ├── RCNN.png
    ├── RL1.png
    ├── seq1.png
    ├── ssd.png
    ├── vgg.png
    ├── yolo.png
    ├── actions.png
    ├── flowchart.png
    ├── test_hist.png
    └── train_hist.png
├── requirements.txt
├── config.py
├── extract_data.py
├── utils.py
├── svm_pred.py
└── q_learning.py


/README.md:
--------------------------------------------------------------------------------
1 | # Object-Localization-using-Deep-Reinforcement-Learning
2 | 
3 | ![alt text](figs/flowchart.png)
4 | 


--------------------------------------------------------------------------------
/figs/DQN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/DQN.png


--------------------------------------------------------------------------------
/figs/RCNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/RCNN.png


--------------------------------------------------------------------------------
/figs/RL1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/RL1.png


--------------------------------------------------------------------------------
/figs/seq1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/seq1.png


--------------------------------------------------------------------------------
/figs/ssd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/ssd.png


--------------------------------------------------------------------------------
/figs/vgg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/vgg.png


--------------------------------------------------------------------------------
/figs/yolo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/yolo.png


--------------------------------------------------------------------------------
/figs/actions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/actions.png


--------------------------------------------------------------------------------
/figs/flowchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/flowchart.png


--------------------------------------------------------------------------------
/figs/test_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/test_hist.png


--------------------------------------------------------------------------------
/figs/train_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ambirpatel/Object-Localization-using-Deep-Reinforcement-Learning/HEAD/figs/train_hist.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Keras==2.13.1
 2 | matplotlib==3.2.1
 3 | numpy==1.22.0
 4 | opencv-python==4.8.1.78
 5 | pandas==1.0.4
 6 | scikit-learn==1.5.0
 7 | scipy==1.11.1
 8 | tensorflow==2.12.1
 9 | tqdm==4.66.3
10 | xmltodict==0.12.0
11 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | # Change parameters of model
 2 | target_update_interval = 1000
 3 | alpha = .1
 4 | experience_buffer_size = 2000
 5 | experience_sample_size = 15
 6 | gamma = .1
 7 | history_length = 10
 8 | epsilon_min = .1
 9 | epsilon_max = 1.0
10 | epsilon_dec_steps = 5
11 | epsilon_dec = (epsilon_max - epsilon_min) / epsilon_dec_steps
12 | max_steps = 40
13 | 
14 | training = False
15 | 
16 | # training_ratio = 0.8


--------------------------------------------------------------------------------
/extract_data.py:
--------------------------------------------------------------------------------
 1 | #%%
 2 | import os
 3 | from os import listdir
 4 | from os.path import isfile, join
 5 | import shutil
 6 | import xmltodict
 7 | import cv2
 8 | import pickle
 9 | #%%
10 | # Defining constants
11 | ROOT_PATH = "VOC2012"
12 | ANNOTATION_FOLDER = "Annotations"
13 | IMAGE_FOLDER = "JPEGImages"
14 | CLASSES = ["person", "bird", "cat", "cow", "dog", "horse", "sheep", "aeroplane", "bicycle", "boat", "bus", "car",
15 |            "motorbike", "train", "bottle", "chair", "diningtable", "pottedplant", "sofa", "tvmonitor"]
16 | 
17 | CLASS_TO_LABEL = {class_name: label for (class_name, label) in zip(CLASSES, range(len(CLASSES)))}
18 | #%%
19 | # Defining functions for extracting relevant data from the VOC 2012 database
20 | def extract_objects(xml):
21 |     objects = xml['annotation']['object']
22 |     return [objects] if isinstance(objects,xmltodict.OrderedDict) else objects
23 | 
24 | def extract_labels(xml):
25 |     return [CLASS_TO_LABEL[object['name']] for object in extract_objects(xml)]
26 | 
27 | def extract_bounding_boxes(xml):
28 |     return [tuple([int(round(float(object['bndbox'][key])))
29 |                    for key in ['xmin', 'ymin', 'xmax', 'ymax']]) for object in extract_objects(xml)]
30 | 
31 | def format_data(data):
32 |     data_tuples = [(value["image"],value["labels"],value["bounding_boxes"]) for key,value in data.items()]
33 | 
34 |     values = [(image, bounding_boxes,labels)
35 |               for (image, labels, bounding_boxes) in data_tuples]
36 | 
37 |     return values
38 | 
39 | def process_data():
40 |     annotation_directory_path = os.path.join(ROOT_PATH, ANNOTATION_FOLDER)
41 |     image_directory_path = os.path.join(ROOT_PATH, IMAGE_FOLDER)
42 | 
43 |     names_and_image_paths_and_xml_paths = [
44 |         (os.path.splitext(filename)[0],join(image_directory_path, os.path.splitext(filename)[0] + ".jpg"), join(annotation_directory_path, filename))
45 | 
46 |         for filename in listdir(annotation_directory_path)]
47 | 
48 |     names_and_image_paths_and_xml_paths = [(name,image_path, annotation_path)
49 |                                            for (name,image_path, annotation_path) in names_and_image_paths_and_xml_paths
50 |                                            if isfile(annotation_path)]
51 | 
52 |     data = {}
53 | 
54 |     for (name,image_path, annotation_path) in names_and_image_paths_and_xml_paths:
55 |         xml = xmltodict.parse(open(annotation_path, 'rb'))
56 | 
57 |         labels = extract_labels(xml)
58 |         bounding_boxes = extract_bounding_boxes(xml)
59 | 
60 |         data[name] = {
61 |             "image":name,
62 |             "labels":labels,
63 |             "bounding_boxes":bounding_boxes
64 |         }
65 |         print("Processed %s" % name)
66 | 
67 |     formatted_data = format_data(data)
68 |     rl_data = formatted_data
69 | 
70 |     if os.path.exists('out_rl/imgs'):
71 |         shutil.rmtree('out_rl/imgs')
72 |         os.mkdir('out_rl/imgs')
73 |     else:
74 |         os.mkdir('out_rl')
75 |         os.mkdir('out_rl/imgs')
76 |     
77 |     for ((image,bounding_boxes,labels),i) in zip(rl_data,range(len(rl_data))):
78 |         image = cv2.imread(ROOT_PATH+'/'+IMAGE_FOLDER+'/'+image+'.jpg')
79 |         cv2.imwrite("out_rl/imgs/"+str(i)+".png",image)
80 |         print(str(i))
81 | 
82 |     bounding_boxes = [bounding_boxes for (image,bounding_boxes,labels) in rl_data]
83 |     pickle.dump(bounding_boxes, open("out_rl/"+"bounding_boxes.p", "wb"),protocol=4)
84 | 
85 |     labels_rl = [labels for (image,bounding_boxes,labels) in rl_data]
86 |     pickle.dump(labels_rl, open("out_rl/"+"labels_rl.p", "wb"),protocol=4)
87 | #%%
88 | process_data()
89 | #%%


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | #%%
  2 | import os
  3 | import pickle
  4 | import math
  5 | import cv2
  6 | import numpy as np
  7 | from config import *
  8 | 
  9 | #%%
 10 | class State:
 11 |     cnn_model = load_model(os.path.join("vgg16.h5"))
 12 |     feature_extractor = K.function([cnn_model.layers[0].input], [cnn_model.layers[20].output])
 13 | 
 14 |     def __init__(self, history, bb, image):
 15 |         self.history = history
 16 |         self.bb = bb
 17 |         self.feature = State.compute_feature(history, bb, image)
 18 | 
 19 |     @staticmethod
 20 |     def compute_feature(history, bb, image):
 21 |         history_feature = State.get_history_feature(history)
 22 |         image_feature = State.get_image_feature(image, bb)
 23 |         feature = np.concatenate((image_feature, history_feature))
 24 |         return np.array([feature])
 25 | 
 26 |     @staticmethod
 27 |     def get_image_feature(image, bb):
 28 |         cropped = crop_image(bb, image)
 29 |         feature = State.feature_extractor([cropped.reshape(1, 224, 224, 3)])[0]
 30 |         return np.ndarray.flatten(feature)
 31 | 
 32 |     @staticmethod
 33 |     def get_history_feature(history):
 34 |         assert len(history) == history_length
 35 |         feature = np.zeros((90,))
 36 |         for i in range(history_length):
 37 |             action = history[i]
 38 |             if action != -1:
 39 |                 feature[i * 9 + action] = 1
 40 |         return feature
 41 | 
 42 | #%%
 43 | def load_data(training_ratio):
 44 |     bbs = pickle.load(open(os.path.join("out_rl/", "bounding_boxes.p"), "rb"))
 45 |     print('loaded bbs')
 46 |     labels = pickle.load(open(os.path.join("out_rl/", "labels_rl.p"), "rb"))
 47 |     print('loaded labels')
 48 | 
 49 |     unique_indices = [i for i in range(len(labels)) if len(labels[i]) == 1]
 50 |     indices_to_load = unique_indices
 51 | 
 52 |     bbs = [bbs[i][0] for i in indices_to_load]
 53 |     labels = [labels[i] for i in indices_to_load]
 54 |     images = [cv2.imread(os.path.join("out_rl/imgs/", str(i) + ".png")) for i in indices_to_load]
 55 | 
 56 |     bbs_train = bbs[:int(len(bbs) * training_ratio)]
 57 |     bbs_test = bbs[int(len(bbs) * training_ratio):]
 58 |     labels_train = labels[:int(len(labels) * training_ratio)]
 59 |     labels_test = labels[int(len(labels) * training_ratio):]
 60 |     images_train = images[:int(len(images) * training_ratio)]
 61 |     images_test = images[int(len(images) * training_ratio):]
 62 | 
 63 |     return bbs_train, bbs_test, labels_train, labels_test, images_train, images_test, indices_to_load
 64 | #%%
 65 | def iou(boxA, boxB):
 66 | 	xA = max(boxA[0], boxB[0])
 67 | 	yA = max(boxA[1], boxB[1])
 68 | 	xB = min(boxA[2], boxB[2])
 69 | 	yB = min(boxA[3], boxB[3])
 70 | 
 71 | 	interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
 72 | 	boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
 73 | 	boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
 74 | 
 75 | 	iou = interArea / float(boxAArea + boxBArea - interArea)
 76 | 	return iou
 77 | #%%
 78 | def crop_image(bb, image):
 79 |     w, h, d = image.shape
 80 |     bb = [int(math.floor(b)) for b in bb]
 81 |     bb[0] = max(bb[0], 0)
 82 |     bb[1] = max(bb[1], 0)
 83 |     bb[2] = min(bb[2], h)
 84 |     bb[3] = min(bb[3], w)
 85 |     cropped = image[bb[1]:bb[3], bb[0]:bb[2]]
 86 |     w, h, d = cropped.shape
 87 |     if w == 0 or h == 0:
 88 |         cropped = np.zeros((224, 224, 3))
 89 |     else:
 90 |         cropped = cv2.resize(cropped, (224, 224))
 91 |     return cropped
 92 | #%%
 93 | def get_features(images, bbs, labels):
 94 |     
 95 |     feature_to_all = []
 96 |     label = []
 97 | 
 98 |     for xi, yi, l, data_index in tqdm(zip(images, bbs, labels, range(len(images)))):
 99 |         (width, height, d) = xi.shape
100 |         initial_history = [-1] * history_length
101 |         initial_bb = (0, 0, height, width)
102 |         s = State(initial_history, initial_bb, xi)
103 |         feature = s.get_image_feature(xi, yi)
104 |         feature_to_all.append(feature)
105 |         label.append(l)
106 |         
107 |     return feature_to_all, label
108 | #%%w


--------------------------------------------------------------------------------
/svm_pred.py:
--------------------------------------------------------------------------------
  1 | #%%
  2 | import csv
  3 | from sklearn.svm import LinearSVC
  4 | from sklearn.svm import SVC
  5 | from collections import OrderedDict
  6 | from keras import backend as K
  7 | from keras.models import Sequential, load_model
  8 | import os
  9 | import cv2 
 10 | import numpy as np
 11 | from config import *
 12 | from utils import *
 13 | import pickle
 14 | from tqdm import tqdm
 15 | #%%
 16 | def load_data_1(training_ratio):
 17 |     bbs = pickle.load(open(os.path.join("out_rl_07/", "bounding_boxes.p"), "rb"))
 18 |     print('loaded bbs')
 19 |     labels = pickle.load(open(os.path.join("out_rl_07/", "labels_rl.p"), "rb"))
 20 |     print('loaded labels')
 21 | 
 22 |     unique_indices = [i for i in range(len(labels)) if len(labels[i]) == 1]
 23 |     indices_to_load = unique_indices
 24 | 
 25 |     bbs = [bbs[i][0] for i in indices_to_load]
 26 |     labels = [labels[i] for i in indices_to_load]
 27 |     images = [cv2.imread(os.path.join("out_rl_07/imgs/", str(i) + ".png")) for i in indices_to_load]
 28 | 
 29 |     bbs_train = bbs[:int(len(bbs) * training_ratio)]
 30 |     bbs_test = bbs[int(len(bbs) * training_ratio):]
 31 |     labels_train = labels[:int(len(labels) * training_ratio)]
 32 |     labels_test = labels[int(len(labels) * training_ratio):]
 33 |     images_train = images[:int(len(images) * training_ratio)]
 34 |     images_test = images[int(len(images) * training_ratio):]
 35 | 
 36 |     return bbs_train, bbs_test, labels_train, labels_test, images_train, images_test, indices_to_load
 37 | 
 38 | #%%
 39 | CLASSES = ["person", "bird", "cat", "cow", "dog", "horse", "sheep", "aeroplane", "bicycle", "boat", "bus", "car",
 40 |            "motorbike", "train", "bottle", "chair", "diningtable", "pottedplant", "sofa", "tvmonitor"]
 41 | 
 42 | #%%
 43 | training_ratio = 0
 44 | 
 45 | bbs_train, bbs_test, labels_train, labels_test, images_train, images_test, indices_to_load = load_data_1(training_ratio)
 46 | #%%
 47 | feature_train = []
 48 | with open('features.csv') as f:
 49 |     feature = csv.reader(f, delimiter=',')
 50 |     for row in feature:
 51 |         feature_train.append([float(i) for i in row])
 52 | #%%        
 53 | label_train = []
 54 | with open('lables.csv') as f:
 55 |     label = csv.reader(f, delimiter=',')
 56 |     for row in label:
 57 |         label_train.append([float(i) for i in row])
 58 | #%%
 59 | label_train = np.ndarray.flatten(np.array(label_train))
 60 | svm = LinearSVC()
 61 | svm.fit(feature_train, label_train)
 62 | print("fit completed")
 63 | #%%
 64 | predict_bbs = []
 65 | with open('predicted_bounding_boxes.csv') as f:
 66 |     bbs = csv.reader(f, delimiter=',')
 67 |     for row in bbs:
 68 |         predict_bbs.append([float(i) for i in row])
 69 | #%%
 70 |         
 71 | predict_feature = get_features(images_test, predict_bbs, labels_test)
 72 | #%%
 73 | ground_truth = get_features(images_test, bbs_test, labels_test)
 74 | #%%
 75 | label_DeepQ = svm.predict(predict_feature[0])
 76 | label_ground = svm.predict(ground_truth[0])
 77 | 
 78 | 
 79 | # accuracy_predict = OrderedDict()
 80 | # accuracy_ground = OrderedDict()
 81 | # sum_correct_predict = []
 82 | # sum_correct_ground = []
 83 | # #%%
 84 | # # labels_test = np.array(labels_test)#######    
 85 | # for i in range(20):
 86 | #     unique, counts = np.unique(labels_test, return_counts=True)
 87 | #     count = dict(zip(unique, counts))[i]
 88 | #     indexes = np.where(labels_test == i)[0]
 89 | #     unique1, counts1 = np.unique(label_DeepQ[indexes], return_counts=True)
 90 | #     count_predict = dict(zip(unique1, counts1))[i]
 91 | #     unique2, counts2 = np.unique(label_ground[indexes], return_counts=True)
 92 | #     count_ground = dict(zip(unique2, counts2))[i]
 93 | #     sum_correct_predict.append(count_predict)
 94 | #     sum_correct_ground.append(count_ground)
 95 | #     accuracy_predict[CLASSES[i]] = count_predict/count
 96 | #     accuracy_ground[CLASSES[i]] = count_ground/count   
 97 |     
 98 | # total_accu_predict = sum(sum_correct_predict)/len(labels_test)
 99 | # total_accu_ground = sum(sum_correct_ground)/len(labels_test)
100 | # print('accuracy of predicted bounding boxes of test data in SVM:', accuracy_predict)
101 | # print('accuracy of ground truth bounding boxes of test data in SVM:', accuracy_ground)
102 | # print('accuracy of total correct classification for prediction = ', total_accu_predict)
103 | # print('accuracy of total correct classification for ground truth = ', total_accu_ground)
104 | # print("relative:")
105 | # print([x/y for x,y in zip(accuracy_predict.values(), accuracy_ground.values())])
106 | #%%


--------------------------------------------------------------------------------
/q_learning.py:
--------------------------------------------------------------------------------
  1 | #%%
  2 | import random
  3 | from keras.layers import Dense
  4 | from keras import backend as K
  5 | from keras.models import Sequential, load_model
  6 | import numpy as np
  7 | import pandas as pd
  8 | import cv2
  9 | import pickle
 10 | import math
 11 | import matplotlib.pyplot as plt
 12 | import sys
 13 | import os
 14 | import tensorflow as tf
 15 | from collections import deque
 16 | import collections
 17 | from tqdm import tqdm
 18 | 
 19 | from utils import *
 20 | from config import *
 21 | 
 22 | HUBER_DELTA = 1.0
 23 | 
 24 | def smoothL1(y_true, y_pred):
 25 |     x = K.abs(y_true - y_pred)
 26 |     x = tf.where(x < HUBER_DELTA, 0.5 * x ** 2, HUBER_DELTA * (x - 0.5 * HUBER_DELTA))
 27 |     return K.sum(x)
 28 | 
 29 | 
 30 | def initialize_model():
 31 |     model = Sequential()
 32 |     model.add(Dense(1024, input_shape=(4096 + 90,), activation='relu'))
 33 |     model.add(Dense(1024, activation='relu'))
 34 |     model.add(Dense(9, activation='linear'))
 35 |     model.compile(loss=smoothL1, optimizer='adam')
 36 |     return model
 37 | 
 38 | 
 39 | loss_arr = []
 40 | 
 41 | def fit(model, x, y):
 42 |     global loss_arr
 43 |     loss = model.train_on_batch(x, y)
 44 |     loss_arr.append(loss)
 45 |     if len(loss_arr) == 100:
 46 |         print("loss %s" % str(sum(loss_arr) / len(loss_arr)))
 47 |         loss_arr = []
 48 | 
 49 | 
 50 | def transform(bb, a):
 51 | 
 52 |     alpha = .2
 53 |     alpha_w = alpha * (bb[2] - bb[0])
 54 |     alpha_h = alpha * (bb[3] - bb[1])
 55 |     dx1 = 0
 56 |     dy1 = 0
 57 |     dx2 = 0
 58 |     dy2 = 0
 59 | 
 60 |     if a == 0:
 61 |         dx1 = alpha_w
 62 |         dx2 = alpha_w
 63 |     elif a == 1:
 64 |         dx1 = -alpha_w
 65 |         dx2 = -alpha_w
 66 |     elif a == 2:
 67 |         dy1 = alpha_h
 68 |         dy2 = alpha_h
 69 |     elif a == 3:
 70 |         dy1 = -alpha_h
 71 |         dy2 = -alpha_h
 72 |     elif a == 4:
 73 |         dx1 = -alpha_w
 74 |         dx2 = alpha_w
 75 |         dy1 = -alpha_h
 76 |         dy2 = alpha_h
 77 |     elif a == 5:
 78 |         dx1 = alpha_w
 79 |         dx2 = -alpha_w
 80 |         dy1 = alpha_h
 81 |         dy2 = -alpha_h
 82 |     elif a == 6:
 83 |         dy1 = alpha_h
 84 |         dy2 = -alpha_h
 85 |     elif a == 7:
 86 |         dx1 = alpha_w
 87 |         dx2 = -alpha_w
 88 | 
 89 |     bb = (bb[0] + dx1, bb[1] + dy1, bb[2] + dx2, bb[3] + dy2)
 90 |     bb = (
 91 |         min(bb[0], bb[2]),
 92 |         min(bb[1], bb[3]),
 93 |         max(bb[0], bb[2]),
 94 |         max(bb[1], bb[3]),
 95 |     )
 96 | 
 97 |     return bb
 98 | 
 99 | 
100 | def trigger_reward(bb, true_bb):
101 |     return 3 if iou(bb, true_bb) > .6 else -3
102 | 
103 | 
104 | def transform_reward(bb, bbp, true_bb):
105 |     return 1 if iou(bbp, true_bb) > iou(bb, true_bb) else -1
106 | 
107 | 
108 | def get_q(s, model):
109 |     return np.ndarray.flatten(model.predict(s.feature))
110 | 
111 | 
112 | def select_action(s, true_bb, step, epsilon, action_values):
113 | 
114 |     if step == max_steps:
115 |         a = 8
116 | 
117 |     else:
118 |         if random.random() > epsilon:
119 |             a = np.argmax(action_values)
120 | 
121 |         else:
122 | 
123 |             action_rewards = [transform_reward(s.bb, transform(s.bb, a_tmp), true_bb) for a_tmp in range(8)]
124 |             action_rewards.append(trigger_reward(s.bb, true_bb))
125 |             action_rewards = np.array(action_rewards)
126 |             positive_action_indices = np.where(action_rewards >= 0)[0]
127 | 
128 |             if len(positive_action_indices) == 0:
129 |                 positive_action_indices = list(range(0, 9))
130 |             a = np.random.choice(positive_action_indices)
131 | 
132 | 
133 |     return a
134 | 
135 | 
136 | def take_action(s, true_bb, a, image):
137 | 
138 |     if a == 8:
139 |         sp = s
140 |         r = trigger_reward(s.bb, true_bb)
141 |         took_trigger = True
142 | 
143 |     else:
144 | 
145 |         bb = s.bb
146 |         bbp = transform(bb, a)
147 |         r = transform_reward(bb, bbp, true_bb)
148 |         took_trigger = False
149 |         historyp = s.history[1:]
150 |         historyp.append(a)
151 |         assert len(historyp) == history_length
152 |         sp = State(historyp, bbp, image)
153 | 
154 |     return sp, r, took_trigger
155 | 
156 | 
157 | def weights_from_errors(errors):
158 | 
159 |     sorted_inds = sorted(range(len(errors)),key=lambda x: errors[x])
160 |     inv_ranks = [0]*len(errors)
161 | 
162 |     for i in range(len(inv_ranks)):
163 |         inv_ranks[sorted_inds[i]] = 1.0/(len(inv_ranks)-i)
164 | 
165 | 
166 |     return inv_ranks
167 | 
168 | 
169 | def apply_experience(main_model, target_model,experience, experience_errors):
170 | 
171 |     weights = weights_from_errors(experience_errors)
172 |     sample_inds = random.choices(range(len(experience)), k=experience_sample_size, weights = weights)
173 |     sample = [experience[i] for i in sample_inds]
174 | 
175 |     targets = np.zeros((experience_sample_size, 9))
176 | 
177 |     for i in range(experience_sample_size):
178 |         s, a, r, sp, done = sample[i]
179 |         target = r
180 | 
181 |         if not done:
182 |             target = compute_target(r, sp, target_model)
183 |         targets[i, :] = get_q(s, main_model)
184 |         targets[i][a] = target
185 | 
186 |     x = np.concatenate([s.feature for (s, a, r, sp, d) in sample])
187 |     fit(main_model, x, targets)
188 | 
189 | 
190 | def compute_target(r, sp, target_model):
191 |     return r + gamma * np.amax(get_q(sp, target_model))
192 | 
193 | 
194 | def copy_main_to_target_model_weights(main_model, target_model):
195 |     weights = main_model.get_weights()
196 |     target_model.set_weights(weights)
197 | 
198 | def q_learning_train(x, y, labels, epochs, main_model, target_model):
199 | 
200 |     epsilon = epsilon_max
201 |     experience = collections.deque(maxlen=experience_buffer_size)
202 |     experience_errors = collections.deque(maxlen=experience_buffer_size)
203 |     total_steps = 0
204 | 
205 |     for epoch in range(epochs):
206 | 
207 |         print("epoch %i" % epoch)
208 | 
209 |         for xi, yi, l, data_index in zip(x, y, labels, range(len(x))):
210 | 
211 |             (width, height, d) = xi.shape
212 |             initial_history = [-1] * history_length
213 |             initial_bb = (0, 0, height, width)
214 |             s = State(initial_history, initial_bb, xi)
215 |             done = False
216 |             total_reward = 0
217 |             step = 0
218 | 
219 |             while not done:
220 | 
221 |                 action_values = get_q(s, main_model)
222 |                 a = select_action(s, yi, step, epsilon, action_values)
223 |                 sp, r, done = take_action(s, yi, a, xi)
224 |                 step_experience = (s, a, r, sp, done)
225 | 
226 |                 #add the experience and td-error to our buffer
227 |                 experience.append(step_experience)
228 |                 experience_errors.append(abs(action_values[a]-compute_target(r,sp,target_model)))
229 | 
230 |                 #apply the experience
231 |                 apply_experience(main_model, target_model, experience, experience_errors)
232 |                 s = sp
233 |                 total_reward += r
234 |                 step += 1
235 |                 total_steps += 1
236 | 
237 |                 #update the target Q-network
238 |                 if total_steps % target_update_interval == 0:
239 |                     copy_main_to_target_model_weights(main_model,target_model)
240 | 
241 |                 # try:
242 |                 #     start_point = (s.bb[0], s.bb[2])
243 |                 #     print("start point {}".format(start_point))
244 |                 #     end_point = (s.bb[1], s.bb[3])
245 |                 #     print("end point {}".format(end_point))
246 | 
247 |                 #     color = (255, 0, 0)
248 |                 #     thickness = 2
249 |                     
250 |                 #     image = cv2.rectangle(xi, start_point, end_point, color, thickness)
251 |                 #     cv2.imshow('img', image)
252 |                 #     cv2.waitKey(10)
253 |                 # except:
254 |                 #     pass
255 | 
256 |             print("data_index %s" % data_index)
257 |             print("reward %i" % total_reward)
258 |             print("iou %f" % iou(s.bb, yi))
259 | 
260 |         if epoch < epsilon_dec_steps:
261 |             epsilon -= epsilon_dec
262 |             print("epsilon changed to %f" % epsilon)
263 | 
264 |     return main_model
265 | 
266 | 
267 | def q_learning_predict(x,model):
268 | 
269 |     y = []
270 |     count = 0
271 |     for xi in x:
272 | 
273 |         (width, height, d) = xi.shape
274 |         initial_history = [-1] * history_length
275 |         initial_bb = (0, 0, height, width)
276 |         s = State(initial_history, initial_bb, xi)
277 | 
278 |         # (width, height, d) = xi.shape
279 |         # s = (0, 0, height, width)
280 |         # history = [-1] * history_length
281 |         done = False
282 | 
283 |         for i in range(sys.maxsize):
284 | 
285 |             action_values = get_q(s, model)
286 |             if i == max_steps - 1:
287 |                 a = 8
288 | 
289 |             else:
290 |                 a = np.argmax(action_values)
291 |             if a == 8:
292 |                 sp = s
293 |                 done = True
294 | 
295 |             else:
296 |                 bbp = transform(s.bb, a)
297 |                 historyp = s.history[1:]
298 |                 historyp.append(a)
299 |                 assert len(historyp) == history_length
300 |                 sp = State(historyp, bbp, xi)
301 |                 s = sp
302 |             if done:
303 |                 break
304 |         count+=1
305 |         print("image ",count," predicted")
306 |         
307 |         # try:
308 |         #     s.bb = [int(math.floor(b)) for b in s.bb]
309 |         #     img = xi[s.bb[1]:s.bb[3], s.bb[0]:s.bb[2]]
310 |         #     cv2.imshow('img', img)
311 |         #     if cv2.waitKey(25) & 0xFF == ord('q'):
312 |         #         break
313 |         #     print(s.bb)
314 |         # except:
315 |         #     pass
316 | 
317 |         y.append(s.bb)
318 | 
319 |     return y
320 | 
321 | def main():
322 | 
323 |     training_ratio = 1
324 | 
325 |     bbs_train, bbs_test, labels_train, labels_test, images_train, images_test, indices_to_load = load_data(training_ratio)
326 | 
327 |     print('images loaded')
328 | 
329 | 
330 |     # features_csv, labels_csv = get_features(images_train, bbs_train, labels_train)
331 |     # features_csv = pd.DataFrame(features_csv)
332 |     # labels_csv = pd.DataFrame(labels_csv)
333 |     # features_csv.to_csv('features.csv', index = False)
334 |     # labels_csv.to_csv('lables.csv', index = False)
335 | 
336 |     if training:
337 | 
338 |         main_model = initialize_model()
339 |         weights = main_model.get_weights()
340 |         target_model = initialize_model()
341 |         target_model.set_weights(weights)
342 |         model = q_learning_train(images_train, bbs_train, labels_train, 15, main_model, target_model)
343 |         model.save("dqn.h5")
344 | 
345 |     else:
346 |         
347 |         model = load_model("dqn.h5")
348 |         y = q_learning_predict(images_test, model)
349 |         inds = range(int(len(images_test) * training_ratio), len(images_test))
350 | 
351 |         np.savetxt("predicted_bounding_boxes.csv", y, delimiter=',', newline='\n')
352 |         np.savetxt("predicted_image_indices.csv", inds, delimiter=',', newline='\n')
353 |         np.savetxt("predicted_image_labels.csv", labels_test, delimiter=',', newline='\n')
354 | #%%
355 | main()
356 | #%%
357 | 


--------------------------------------------------------------------------------