├── DQN_sekiro_testing_gpu.py ├── DQN_sekiro_training_gpu.py ├── DQN_tensorflow_gpu.py ├── LICENSE ├── README.md ├── directkeys.py ├── find_blood_location.py ├── getkeys.py ├── grabscreen.py └── restart.py /DQN_sekiro_testing_gpu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Jan 27 21:10:06 2021 4 | 5 | @author: pang 6 | """ 7 | 8 | import numpy as np 9 | from grabscreen import grab_screen 10 | import cv2 11 | import time 12 | import directkeys 13 | from getkeys import key_check 14 | import random 15 | from DQN_tensorflow_gpu import DQN 16 | import os 17 | import pandas as pd 18 | from restart import restart 19 | import random 20 | import tensorflow.compat.v1 as tf 21 | 22 | def pause_game(paused): 23 | keys = key_check() 24 | if 'T' in keys: 25 | if paused: 26 | paused = False 27 | print('start game') 28 | time.sleep(1) 29 | else: 30 | paused = True 31 | print('pause game') 32 | time.sleep(1) 33 | if paused: 34 | print('paused') 35 | while True: 36 | keys = key_check() 37 | # pauses game and can get annoying. 38 | if 'T' in keys: 39 | if paused: 40 | paused = False 41 | print('start game') 42 | time.sleep(1) 43 | break 44 | else: 45 | paused = True 46 | time.sleep(1) 47 | return paused 48 | 49 | def self_blood_count(self_gray): 50 | self_blood = 0 51 | for self_bd_num in self_gray[469]: 52 | # self blood gray pixel 80~98 53 | # 血量灰度值80~98 54 | if self_bd_num > 90 and self_bd_num < 98: 55 | self_blood += 1 56 | return self_blood 57 | 58 | def boss_blood_count(boss_gray): 59 | boss_blood = 0 60 | for boss_bd_num in boss_gray[0]: 61 | # boss blood gray pixel 65~75 62 | # 血量灰度值65~75 63 | if boss_bd_num > 65 and boss_bd_num < 75: 64 | boss_blood += 1 65 | return boss_blood 66 | 67 | def take_action(action): 68 | if action == 0: # n_choose 69 | pass 70 | elif action == 1: # j 71 | directkeys.attack() 72 | elif action == 2: # k 73 | directkeys.jump() 74 | elif action == 3: # m 75 | directkeys.define() 76 | elif action == 4: # r 77 | directkeys.dodge() 78 | 79 | def action_judge(boss_blood, next_boss_blood, self_blood, next_self_blood, stop, emergence_break): 80 | # get action reward 81 | # emergence_break is used to break down training 82 | # 用于防止出现意外紧急停止训练防止错误训练数据扰乱神经网络 83 | if next_self_blood < 3: # self dead 84 | if emergence_break < 2: 85 | reward = -10 86 | done = 1 87 | stop = 0 88 | emergence_break += 1 89 | return reward, done, stop, emergence_break 90 | else: 91 | reward = -10 92 | done = 1 93 | stop = 0 94 | emergence_break = 100 95 | return reward, done, stop, emergence_break 96 | elif next_boss_blood - boss_blood > 15: #boss dead 97 | if emergence_break < 2: 98 | reward = 20 99 | done = 0 100 | stop = 0 101 | emergence_break += 1 102 | return reward, done, stop, emergence_break 103 | else: 104 | reward = 20 105 | done = 0 106 | stop = 0 107 | emergence_break = 100 108 | return reward, done, stop, emergence_break 109 | else: 110 | self_blood_reward = 0 111 | boss_blood_reward = 0 112 | # print(next_self_blood - self_blood) 113 | # print(next_boss_blood - boss_blood) 114 | if next_self_blood - self_blood < -7: 115 | if stop == 0: 116 | self_blood_reward = -6 117 | stop = 1 118 | # 防止连续取帧时一直计算掉血 119 | else: 120 | stop = 0 121 | if next_boss_blood - boss_blood <= -3: 122 | boss_blood_reward = 4 123 | # print("self_blood_reward: ",self_blood_reward) 124 | # print("boss_blood_reward: ",boss_blood_reward) 125 | reward = self_blood_reward + boss_blood_reward 126 | done = 0 127 | emergence_break = 0 128 | return reward, done, stop, emergence_break 129 | 130 | 131 | DQN_model_path = "model_gpu_5" 132 | DQN_log_path = "logs_gpu/" 133 | WIDTH = 96 134 | HEIGHT = 88 135 | window_size = (320,100,704,452)#384,352 192,176 96,88 48,44 24,22 136 | # station window_size 137 | 138 | blood_window = (60,91,280,562) 139 | # used to get boss and self blood 140 | 141 | action_size = 5 142 | # action[n_choose,j,k,m,r] 143 | # j-attack, k-jump, m-defense, r-dodge, n_choose-do nothing 144 | 145 | paused = True 146 | # used to stop training 147 | 148 | if __name__ == '__main__': 149 | agent = DQN(WIDTH, HEIGHT, action_size, DQN_model_path, DQN_log_path) 150 | # DQN init 151 | paused = pause_game(paused) 152 | # paused at the begin 153 | screen_gray = cv2.cvtColor(grab_screen(window_size),cv2.COLOR_BGR2GRAY) 154 | blood_window_gray = cv2.cvtColor(grab_screen(blood_window),cv2.COLOR_BGR2GRAY) 155 | # collect station gray graph 156 | station = cv2.resize(screen_gray,(WIDTH,HEIGHT)) 157 | # change graph to WIDTH * HEIGHT for station input 158 | boss_blood = boss_blood_count(blood_window_gray) 159 | self_blood = self_blood_count(blood_window_gray) 160 | # count init blood 161 | target_step = 0 162 | # used to update target Q network 163 | done = 0 164 | total_reward = 0 165 | stop = 0 166 | # 用于防止连续帧重复计算reward 167 | last_time = time.time() 168 | emergence_break = 0 169 | while True: 170 | station = np.array(station).reshape(-1,HEIGHT,WIDTH,1)[0] 171 | # reshape station for tf input placeholder 172 | print('loop took {} seconds'.format(time.time()-last_time)) 173 | last_time = time.time() 174 | # get the action by state 175 | action = agent.Choose_Action(station) 176 | take_action(action) 177 | # take station then the station change 178 | screen_gray = cv2.cvtColor(grab_screen(window_size),cv2.COLOR_BGR2GRAY) 179 | # collect station gray graph 180 | blood_window_gray = cv2.cvtColor(grab_screen(blood_window),cv2.COLOR_BGR2GRAY) 181 | # collect blood gray graph for count self and boss blood 182 | next_station = cv2.resize(screen_gray,(WIDTH,HEIGHT)) 183 | next_station = np.array(next_station).reshape(-1,HEIGHT,WIDTH,1)[0] 184 | station = next_station 185 | next_boss_blood = boss_blood_count(blood_window_gray) 186 | next_self_blood = self_blood_count(blood_window_gray) 187 | reward, done, stop, emergence_break = action_judge(boss_blood, next_boss_blood, 188 | self_blood, next_self_blood, 189 | stop, emergence_break) 190 | # get action reward 191 | if emergence_break == 100: 192 | # emergence break , save model and paused 193 | # 遇到紧急情况,保存数据,并且暂停 194 | print("emergence_break") 195 | agent.save_model() 196 | paused = True 197 | keys = key_check() 198 | paused = pause_game(paused) 199 | if 'G' in keys: 200 | print('stop testing DQN') 201 | break 202 | if done == 1: 203 | restart() 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | -------------------------------------------------------------------------------- /DQN_sekiro_training_gpu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Jan 27 21:10:06 2021 4 | 5 | @author: pang 6 | """ 7 | 8 | import numpy as np 9 | from grabscreen import grab_screen 10 | import cv2 11 | import time 12 | import directkeys 13 | from getkeys import key_check 14 | import random 15 | from DQN_tensorflow_gpu import DQN 16 | import os 17 | import pandas as pd 18 | from restart import restart 19 | import random 20 | import tensorflow.compat.v1 as tf 21 | 22 | def pause_game(paused): 23 | keys = key_check() 24 | if 'T' in keys: 25 | if paused: 26 | paused = False 27 | print('start game') 28 | time.sleep(1) 29 | else: 30 | paused = True 31 | print('pause game') 32 | time.sleep(1) 33 | if paused: 34 | print('paused') 35 | while True: 36 | keys = key_check() 37 | # pauses game and can get annoying. 38 | if 'T' in keys: 39 | if paused: 40 | paused = False 41 | print('start game') 42 | time.sleep(1) 43 | break 44 | else: 45 | paused = True 46 | time.sleep(1) 47 | return paused 48 | 49 | def self_blood_count(self_gray): 50 | self_blood = 0 51 | for self_bd_num in self_gray[469]: 52 | # self blood gray pixel 80~98 53 | # 血量灰度值80~98 54 | if self_bd_num > 90 and self_bd_num < 98: 55 | self_blood += 1 56 | return self_blood 57 | 58 | def boss_blood_count(boss_gray): 59 | boss_blood = 0 60 | for boss_bd_num in boss_gray[0]: 61 | # boss blood gray pixel 65~75 62 | # 血量灰度值65~75 63 | if boss_bd_num > 65 and boss_bd_num < 75: 64 | boss_blood += 1 65 | return boss_blood 66 | 67 | def take_action(action): 68 | if action == 0: # n_choose 69 | pass 70 | elif action == 1: # j 71 | directkeys.attack() 72 | elif action == 2: # k 73 | directkeys.jump() 74 | elif action == 3: # m 75 | directkeys.defense() 76 | elif action == 4: # r 77 | directkeys.dodge() 78 | 79 | 80 | def action_judge(boss_blood, next_boss_blood, self_blood, next_self_blood, stop, emergence_break): 81 | # get action reward 82 | # emergence_break is used to break down training 83 | # 用于防止出现意外紧急停止训练防止错误训练数据扰乱神经网络 84 | if next_self_blood < 3: # self dead 85 | if emergence_break < 2: 86 | reward = -10 87 | done = 1 88 | stop = 0 89 | emergence_break += 1 90 | return reward, done, stop, emergence_break 91 | else: 92 | reward = -10 93 | done = 1 94 | stop = 0 95 | emergence_break = 100 96 | return reward, done, stop, emergence_break 97 | elif next_boss_blood - boss_blood > 15: #boss dead 98 | if emergence_break < 2: 99 | reward = 20 100 | done = 0 101 | stop = 0 102 | emergence_break += 1 103 | return reward, done, stop, emergence_break 104 | else: 105 | reward = 20 106 | done = 0 107 | stop = 0 108 | emergence_break = 100 109 | return reward, done, stop, emergence_break 110 | else: 111 | self_blood_reward = 0 112 | boss_blood_reward = 0 113 | # print(next_self_blood - self_blood) 114 | # print(next_boss_blood - boss_blood) 115 | if next_self_blood - self_blood < -7: 116 | if stop == 0: 117 | self_blood_reward = -6 118 | stop = 1 119 | # 防止连续取帧时一直计算掉血 120 | else: 121 | stop = 0 122 | if next_boss_blood - boss_blood <= -3: 123 | boss_blood_reward = 4 124 | # print("self_blood_reward: ",self_blood_reward) 125 | # print("boss_blood_reward: ",boss_blood_reward) 126 | reward = self_blood_reward + boss_blood_reward 127 | done = 0 128 | emergence_break = 0 129 | return reward, done, stop, emergence_break 130 | 131 | 132 | DQN_model_path = "model_gpu" 133 | DQN_log_path = "logs_gpu/" 134 | WIDTH = 96 135 | HEIGHT = 88 136 | window_size = (320,100,704,452)#384,352 192,176 96,88 48,44 24,22 137 | # station window_size 138 | 139 | blood_window = (60,91,280,562) 140 | # used to get boss and self blood 141 | 142 | action_size = 5 143 | # action[n_choose,j,k,m,r] 144 | # j-attack, k-jump, m-defense, r-dodge, n_choose-do nothing 145 | 146 | EPISODES = 3000 147 | big_BATCH_SIZE = 16 148 | UPDATE_STEP = 50 149 | # times that evaluate the network 150 | num_step = 0 151 | # used to save log graph 152 | target_step = 0 153 | # used to update target Q network 154 | paused = True 155 | # used to stop training 156 | 157 | if __name__ == '__main__': 158 | agent = DQN(WIDTH, HEIGHT, action_size, DQN_model_path, DQN_log_path) 159 | # DQN init 160 | paused = pause_game(paused) 161 | # paused at the begin 162 | emergence_break = 0 163 | # emergence_break is used to break down training 164 | # 用于防止出现意外紧急停止训练防止错误训练数据扰乱神经网络 165 | for episode in range(EPISODES): 166 | screen_gray = cv2.cvtColor(grab_screen(window_size),cv2.COLOR_BGR2GRAY) 167 | # collect station gray graph 168 | blood_window_gray = cv2.cvtColor(grab_screen(blood_window),cv2.COLOR_BGR2GRAY) 169 | # collect blood gray graph for count self and boss blood 170 | station = cv2.resize(screen_gray,(WIDTH,HEIGHT)) 171 | # change graph to WIDTH * HEIGHT for station input 172 | boss_blood = boss_blood_count(blood_window_gray) 173 | self_blood = self_blood_count(blood_window_gray) 174 | # count init blood 175 | target_step = 0 176 | # used to update target Q network 177 | done = 0 178 | total_reward = 0 179 | stop = 0 180 | # 用于防止连续帧重复计算reward 181 | last_time = time.time() 182 | while True: 183 | station = np.array(station).reshape(-1,HEIGHT,WIDTH,1)[0] 184 | # reshape station for tf input placeholder 185 | print('loop took {} seconds'.format(time.time()-last_time)) 186 | last_time = time.time() 187 | target_step += 1 188 | # get the action by state 189 | action = agent.Choose_Action(station) 190 | take_action(action) 191 | # take station then the station change 192 | screen_gray = cv2.cvtColor(grab_screen(window_size),cv2.COLOR_BGR2GRAY) 193 | # collect station gray graph 194 | blood_window_gray = cv2.cvtColor(grab_screen(blood_window),cv2.COLOR_BGR2GRAY) 195 | # collect blood gray graph for count self and boss blood 196 | next_station = cv2.resize(screen_gray,(WIDTH,HEIGHT)) 197 | next_station = np.array(next_station).reshape(-1,HEIGHT,WIDTH,1)[0] 198 | next_boss_blood = boss_blood_count(blood_window_gray) 199 | next_self_blood = self_blood_count(blood_window_gray) 200 | reward, done, stop, emergence_break = action_judge(boss_blood, next_boss_blood, 201 | self_blood, next_self_blood, 202 | stop, emergence_break) 203 | # get action reward 204 | if emergence_break == 100: 205 | # emergence break , save model and paused 206 | # 遇到紧急情况,保存数据,并且暂停 207 | print("emergence_break") 208 | agent.save_model() 209 | paused = True 210 | agent.Store_Data(station, action, reward, next_station, done) 211 | if len(agent.replay_buffer) > big_BATCH_SIZE: 212 | num_step += 1 213 | # save loss graph 214 | # print('train') 215 | agent.Train_Network(big_BATCH_SIZE, num_step) 216 | if target_step % UPDATE_STEP == 0: 217 | agent.Update_Target_Network() 218 | # update target Q network 219 | station = next_station 220 | self_blood = next_self_blood 221 | boss_blood = next_boss_blood 222 | total_reward += reward 223 | paused = pause_game(paused) 224 | if done == 1: 225 | break 226 | if episode % 10 == 0: 227 | agent.save_model() 228 | # save model 229 | print('episode: ', episode, 'Evaluation Average Reward:', total_reward/target_step) 230 | restart() 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | -------------------------------------------------------------------------------- /DQN_tensorflow_gpu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jan 18 19:56:38 2021 4 | 5 | @author: pang 6 | """ 7 | 8 | import gym 9 | import tensorflow.compat.v1 as tf 10 | tf.disable_v2_behavior() 11 | import random 12 | from collections import deque 13 | import numpy as np 14 | import os 15 | 16 | 17 | # experiences replay buffer size 18 | REPLAY_SIZE = 2000 19 | # memory size 1000 20 | # size of minibatch 21 | small_BATCH_SIZE = 16 22 | big_BATCH_SIZE = 128 23 | BATCH_SIZE_door = 1000 24 | 25 | # these are the hyper Parameters for DQN 26 | # discount factor for target Q to caculate the TD aim value 27 | GAMMA = 0.9 28 | # the start value of epsilon E 29 | INITIAL_EPSILON = 0.5 30 | # the final value of epsilon 31 | FINAL_EPSILON = 0.01 32 | 33 | class DQN(): 34 | def __init__(self, observation_width, observation_height, action_space, model_file, log_file): 35 | # the state is the input vector of network, in this env, it has four dimensions 36 | self.state_dim = observation_width * observation_height 37 | self.state_w = observation_width 38 | self.state_h = observation_height 39 | # the action is the output vector and it has two dimensions 40 | self.action_dim = action_space 41 | # init experience replay, the deque is a list that first-in & first-out 42 | self.replay_buffer = deque() 43 | # you can create the network by the two parameters 44 | self.create_Q_network() 45 | # after create the network, we can define the training methods 46 | self.create_updating_method() 47 | # set the value in choose_action 48 | self.epsilon = INITIAL_EPSILON 49 | self.model_path = model_file + "/save_model.ckpt" 50 | self.model_file = model_file 51 | self.log_file = log_file 52 | # 因为保存的模型名字不太一样,只能检查路径是否存在 53 | # Init session 54 | self.session = tf.InteractiveSession() 55 | if os.path.exists(self.model_file): 56 | print("model exists , load model\n") 57 | self.saver = tf.train.Saver() 58 | self.saver.restore(self.session, self.model_path) 59 | else: 60 | print("model don't exists , create new one\n") 61 | self.session.run(tf.global_variables_initializer()) 62 | self.saver = tf.train.Saver() 63 | # init 64 | # 只有把框图保存到文件中,才能加载到浏览器中观看 65 | self.writer = tf.summary.FileWriter(self.log_file, self.session.graph) 66 | ####### 路径中不要有中文字符,否则加载不进来 ########### 67 | # tensorboard --logdir=logs_gpu --host=127.0.0.1 68 | self.merged = tf.summary.merge_all() 69 | # 把所有summary合并在一起,就是把所有loss,w,b这些的数据打包 70 | # 注意merged也需要被sess.run才能发挥作用 71 | 72 | 73 | 74 | # the function that give the weight initial value 75 | def weight_variable(self, shape): 76 | initial = tf.truncated_normal(shape, stddev=0.1) 77 | return tf.Variable(initial) 78 | 79 | # the function that give the bias initial value 80 | def bias_variable(self, shape): 81 | initial = tf.constant(0.01, shape=shape) 82 | return tf.Variable(initial) 83 | 84 | def conv2d(self, x, W): 85 | return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME') 86 | # stride第一个和最后一个元素一定要为1,中间两个分别是x和y轴的跨度,此处设为1 87 | # SAME 抽取时外面有填充,抽取大小是一样的 88 | 89 | def max_pool_2x2(self, x): 90 | return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') 91 | 92 | # the function to create the network 93 | # there are two networks, the one is action_value and the other is target_action_value 94 | # these two networks has same architecture 95 | def create_Q_network(self): 96 | with tf.name_scope('inputs'): 97 | # first, set the input of networks 98 | self.state_input = tf.placeholder("float", [None, self.state_h, self.state_w, 1]) 99 | # second, create the current_net 100 | with tf.variable_scope('current_net'): 101 | # first, set the network's weights 102 | W_conv1 = self.weight_variable([5,5,1,32]) 103 | b_conv1 = self.bias_variable([32]) 104 | W_conv2 = self.weight_variable([5,5,32,64]) 105 | b_conv2 = self.bias_variable([64]) 106 | # W_conv3 = self.weight_variable([5,5,64,128]) 107 | # b_conv3 = self.bias_variable([128]) 108 | W1 = self.weight_variable([int((self.state_w/4) * (self.state_h/4) * 64), 512]) 109 | b1 = self.bias_variable([512]) 110 | W2 = self.weight_variable([512, 256]) 111 | b2 = self.bias_variable([256]) 112 | W3 = self.weight_variable([256, self.action_dim]) 113 | b3 = self.bias_variable([self.action_dim]) 114 | # second, set the layers 115 | # conv layer one 116 | h_conv1 = tf.nn.relu(self.conv2d(self.state_input, W_conv1) + b_conv1) 117 | # self.state_w * self.state_h * 32 118 | # pooling layer one 119 | h_pool1 = self.max_pool_2x2(h_conv1) 120 | # self.state_w/2 * self.state_h/2 * 32 121 | # conv layer two 122 | h_conv2 = tf.nn.relu(self.conv2d(h_pool1, W_conv2) + b_conv2) 123 | # pooling layer two 124 | h_pool2 = self.max_pool_2x2(h_conv2) 125 | # self.state_w/4 * self.state_h/4 * 64 126 | # conv layer three 127 | # h_conv3 = tf.nn.relu(self.conv2d(h_pool2, W_conv3) + b_conv3) 128 | # self.state_w/4 * self.state_h/4 * 128 129 | h_conv2_flat = tf.reshape(h_pool2, [-1,int((self.state_w/4) * (self.state_h/4) * 64)]) 130 | # hidden layer one 131 | h_layer_one = tf.nn.relu(tf.matmul(h_conv2_flat, W1) + b1) 132 | # dropout 133 | h_layer_one = tf.nn.dropout(h_layer_one, 1) 134 | # hidden layer two 135 | h_layer_two = tf.nn.relu(tf.matmul(h_layer_one, W2) + b2) 136 | # dropout 137 | h_layer_two = tf.nn.dropout(h_layer_two, 1) 138 | # the output of current_net 139 | Q_value = tf.matmul(h_layer_two, W3) + b3 140 | # dropout 141 | self.Q_value = tf.nn.dropout(Q_value, 1) 142 | # third, create the current_net 143 | with tf.variable_scope('target_net'): 144 | # first, set the network's weights 145 | t_W_conv1 = self.weight_variable([5,5,1,32]) 146 | t_b_conv1 = self.bias_variable([32]) 147 | t_W_conv2 = self.weight_variable([5,5,32,64]) 148 | t_b_conv2 = self.bias_variable([64]) 149 | # t_W_conv3 = self.weight_variable([5,5,64,128]) 150 | # t_b_conv3 = self.bias_variable([128]) 151 | t_W1 = self.weight_variable([int((self.state_w/4) * (self.state_h/4) * 64), 512]) 152 | t_b1 = self.bias_variable([512]) 153 | t_W2 = self.weight_variable([512, 256]) 154 | t_b2 = self.bias_variable([256]) 155 | t_W3 = self.weight_variable([256, self.action_dim]) 156 | t_b3 = self.bias_variable([self.action_dim]) 157 | # second, set the layers 158 | # conv layer one 159 | t_h_conv1 = tf.nn.relu(self.conv2d(self.state_input, t_W_conv1) + t_b_conv1) 160 | # self.state_w * self.state_h * 32 161 | # pooling layer one 162 | t_h_pool1 = self.max_pool_2x2(t_h_conv1) 163 | # self.state_w/2 * self.state_h/2 * 32 164 | # conv layer two 165 | t_h_conv2 = tf.nn.relu(self.conv2d(t_h_pool1, t_W_conv2) + t_b_conv2) 166 | # pooling layer one 167 | t_h_pool2 = self.max_pool_2x2(t_h_conv2) 168 | # self.state_w/4 * self.state_h/4 * 64 169 | # conv layer three 170 | # t_h_conv3 = tf.nn.relu(self.conv2d(t_h_pool2, t_W_conv3) + t_b_conv3) 171 | # self.state_w/4 * self.state_h/4 * 128 172 | t_h_conv2_flat = tf.reshape(t_h_pool2, [-1,int((self.state_w/4) * (self.state_h/4) * 64)]) 173 | # hidden layer one 174 | t_h_layer_one = tf.nn.relu(tf.matmul(t_h_conv2_flat, t_W1) + t_b1) 175 | # dropout 176 | t_h_layer_one = tf.nn.dropout(t_h_layer_one, 1) 177 | # 防止过拟合 178 | # hidden layer two 179 | t_h_layer_two = tf.nn.relu(tf.matmul(t_h_layer_one, t_W2) + t_b2) 180 | # dropout 181 | t_h_layer_two = tf.nn.dropout(t_h_layer_two, 1) 182 | # the output of current_net 183 | target_Q_value = tf.matmul(t_h_layer_two, t_W3) + t_b3 184 | # dropout 185 | self.target_Q_value = tf.nn.dropout(target_Q_value, 1) 186 | # at last, solve the parameters replace problem 187 | # the parameters of current_net 188 | e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='current_net') 189 | # the parameters of target_net 190 | t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target_net') 191 | # define the operation that replace the target_net's parameters by current_net's parameters 192 | with tf.variable_scope('soft_replacement'): 193 | self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)] 194 | 195 | # this the function that define the method to update the current_net's parameters 196 | def create_updating_method(self): 197 | # this the input action, use one hot presentation 198 | self.action_input = tf.placeholder("float", [None, self.action_dim]) 199 | # this the TD aim value 200 | self.y_input = tf.placeholder("float", [None]) 201 | # this the action's Q_value 202 | Q_action = tf.reduce_sum(tf.multiply(self.Q_value, self.action_input), reduction_indices=1) 203 | # 生成的Q_value实际上是一个action大小的list,action_input是一个one-hot向量, 204 | # 两者相乘实际上是取出了执行操作的Q值进行单独更新 205 | # this is the lost 206 | self.cost = tf.reduce_mean(tf.square(self.y_input - Q_action)) 207 | # 均方差损失函数 208 | # drawing loss graph 209 | tf.summary.scalar('loss',self.cost) 210 | # loss graph save 211 | with tf.name_scope('train_loss'): 212 | # use the loss to optimize the network 213 | self.optimizer = tf.train.AdamOptimizer(0.001).minimize(self.cost) 214 | # learning_rate=0.0001 215 | 216 | # this is the function that use the network output the action 217 | def Choose_Action(self, state): 218 | # the output is a tensor, so the [0] is to get the output as a list 219 | Q_value = self.Q_value.eval(feed_dict={ 220 | self.state_input: [state] 221 | })[0] 222 | # use epsilon greedy to get the action 223 | if random.random() <= self.epsilon: 224 | # if lower than epsilon, give a random value 225 | self.epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / 10000 226 | return random.randint(0, self.action_dim - 1) 227 | else: 228 | # if bigger than epsilon, give the argmax value 229 | self.epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / 10000 230 | return np.argmax(Q_value) 231 | 232 | # this the function that store the data in replay memory 233 | def Store_Data(self, state, action, reward, next_state, done): 234 | # generate a list with all 0,and set the action is 1 235 | one_hot_action = np.zeros(self.action_dim) 236 | one_hot_action[action] = 1 237 | # store all the elements 238 | self.replay_buffer.append((state, one_hot_action, reward, next_state, done)) 239 | # if the length of replay_buffer is bigger than REPLAY_SIZE 240 | # delete the left value, make the len is stable 241 | if len(self.replay_buffer) > REPLAY_SIZE: 242 | self.replay_buffer.popleft() 243 | # update replay_buffer 244 | 245 | # train the network, update the parameters of Q_value 246 | def Train_Network(self, BATCH_SIZE, num_step): 247 | # Step 1: obtain random minibatch from replay memory 248 | minibatch = random.sample(self.replay_buffer, BATCH_SIZE) 249 | # 从记忆库中采样BATCH_SIZE 250 | state_batch = [data[0] for data in minibatch] 251 | action_batch = [data[1] for data in minibatch] 252 | reward_batch = [data[2] for data in minibatch] 253 | next_state_batch = [data[3] for data in minibatch] 254 | 255 | # Step 2: calculate TD aim value 256 | y_batch = [] 257 | # give the next_state_batch flow to target_Q_value and caculate the next state's Q_value 258 | Q_value_batch = self.target_Q_value.eval(feed_dict={self.state_input: next_state_batch}) 259 | # caculate the TD aim value by the formulate 260 | for i in range(0, BATCH_SIZE): 261 | done = minibatch[i][4] 262 | # see if the station is the final station 263 | if done: 264 | y_batch.append(reward_batch[i]) 265 | else: 266 | # the Q value caculate use the max directly 267 | y_batch.append(reward_batch[i] + GAMMA * np.max(Q_value_batch[i])) 268 | 269 | # step 3: update the network 270 | self.optimizer.run(feed_dict={ 271 | self.y_input: y_batch, 272 | # y即为更新后的Q值,与Q_action构成损失函数更新网络 273 | self.action_input: action_batch, 274 | self.state_input: state_batch 275 | }) 276 | if num_step % 100 == 0: 277 | # save loss graph 278 | result = self.session.run(self.merged,feed_dict={ 279 | self.y_input: y_batch, 280 | self.action_input: action_batch, 281 | self.state_input: state_batch 282 | }) 283 | # 把merged的数据放进writer中才能画图 284 | self.writer.add_summary(result, num_step) 285 | 286 | def Update_Target_Network(self): 287 | # update target Q netowrk 288 | self.session.run(self.target_replace_op) 289 | 290 | # use for test 291 | def action(self, state): 292 | return np.argmax(self.Q_value.eval(feed_dict={ 293 | self.state_input: [state] 294 | })[0]) 295 | 296 | def save_model(self): 297 | self.save_path = self.saver.save(self.session, self.model_path) 298 | print("Save to path:", self.save_path) 299 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 analoganddigital 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Update for using DQN to play sekiro 2021.2.2(English Version) 2 | This is the code of using DQN to play Sekiro . 3 | 4 | I am very glad to tell that I have writen the codes of using DQN to play Sekiro . 5 | As is known to all , Supervised learning can only learn skills from the data we provide for it . However , this time by using Reinforcement Learning , we can see a more clever agent playing Sekiro . 6 | 7 | Reinforcement Learning can update its network by itself , using the reward feedback , which means we no longer need to collect our own data sets this time . All the data sets come from the real-time interaction between DQN network and the game. 8 | By using this DQN network , you can fight any boss you want in the game . 9 | There still something you need to know : 10 | * In order to shorten the time of restart the game , we need game modifier . 11 | * The link for game modifier : https://patch.ali213.net/showpatch/118405.html 12 | * See the tutorial video for specific code usage , link : https://www.bilibili.com/video/BV1by4y1n7pe/ 13 | 14 | Have fun ! 15 | 16 | # Old version sekiro_tensorflow 17 | Code link for using Supervised learning to play Sekiro : https://github.com/analoganddigital/sekiro_tensorflow 18 | 19 | Hello everyone , this is analoganddigital . I use this code to complete an interesting porgram of using machine learning to play Sekiro . 20 | You can see the final presentation in https://www.bilibili.com/video/BV1wC4y1s7oa/ . 21 | I am a junior student in university , which means I can't spend too much time on this program . What a shame ! On the other hand , many audiences hope me share this code . Thus , I eventually put it on the GitHub . 22 | This is an interesting program , and I hope everyone can enjoy it. In addition , I really welcome you to improve this program , to make this AI more smart ! 23 | There still something you need to konw: 24 | * The window size I set is 96*86 , you can change it by yourselves . 25 | * I finally collected 300M training data , if you want better result , maybe you need to collect more data . 26 | * I use Alexnet to finish the training . This program is depend on Supervised learning. 27 | * I have no idea about using Reinforcement learning yet , so I will really appreciate it if someone can help me to overcome this difficulty.(already finished) 28 | * See the tutorial video for specific code usage , link : https://www.bilibili.com/video/BV1bz4y1R7kB 29 | 30 | Reference : 31 | https://github.com/Sentdex/pygta5/blob/master/LICENSE 32 | 33 | 34 | # 更新——强化学习DQN打只狼 2021.2.2(中文说明) 35 | 我非常高兴地告诉大家,我最近又开发出了用DQN强化学习打只狼的代码。 36 | 众所周知,监督学习只能学习到我们所提供的数据集的相关技能,但是利用强化学习,我们将看到一个完全不一样的只狼。 37 | 38 | 强化学习会根据reward奖励进行判断并且自己学习一种打斗方法。更重要的是,我们这次不再需要自己收集数据集了,所有更新数据均来自于DQN网络与游戏的实时交互。 39 | 利用这个DQN代码(链接见下方),你可以挑战只狼中任何一个boss,只要boss的血条位置不变即可(因为我采用的是图像抓取的方式获取只狼的血量与boss的血量进行reward判断)。 40 | 然后还有一些注意事项: 41 | * 为了缩短只狼复活周期,在这个项目训练中,我们需要采用只狼的24项修改器,让只狼能够原地复活继续训练。 42 | * 修改器下载地址:https://patch.ali213.net/showpatch/118405.html 43 | * 具体代码使用方法请见我在b站上发布的DQN打只狼的教程视频,链接:https://www.bilibili.com/video/BV1by4y1n7pe/ 44 | 45 | 祝各位玩得愉快! 46 | 47 | # 旧版本用机器学习打只狼 48 | 旧版本的利用监督学习打只狼的代码链接: https://github.com/analoganddigital/sekiro_tensorflow 。 49 | 50 | 各位观众大家好,我GitHub用户名是analoganddigital。我用这个程序完成了机器学习打只狼这个项目。 51 | 最终效果视频可以看b站https://www.bilibili.com/video/BV1wC4y1s7oa/ 。 52 | 我是一个大三学生,真的非常抱歉没能长时间更新这个项目,所以我把它放到了GitHub上面,之前很多观众也是私信我想要代码。 53 | 总之我还是希望大家能喜欢这个小项目吧。当然,我非常希望大家能帮忙完善这个程序,万分感激,大家共同讨论我们会获益更多,这其实就是开源的意义。现在由于代码比较基础,所以训练效果不太好。我相信大家会有更多的点子,如果能更新一点算法,我们将会看到一个更机智的AI。我很感谢大家对之前视频的支持(受宠若惊),也十分期待大家有趣的优化,就算没有优化直接用也可以。 54 | 还有一些细节我这声明一下: 55 | * 我截取的图像大小是96*86的,各位可以根据自身情况选择。 56 | * 我最终只收集了300M的数据,如果你想训练效果更好的话,可能要收集更多。 57 | * 我用的神经网络是Alexnet,基于监督学习完成的。 58 | * 由于我能力有限,我还没想好如何用强化学习优化算法,所以如果有大佬能分享一下自己的才华,那将十分感谢。(目前已经实现) 59 | * 具体代码使用方法请见我在b站上发布的机器学习打只狼的教程视频,链接: https://www.bilibili.com/video/BV1bz4y1R7kB 60 | 61 | 部分参考代码: 62 | https://github.com/Sentdex/pygta5/blob/master/LICENSE 63 | -------------------------------------------------------------------------------- /directkeys.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Apr 8 10:37:50 2020 4 | 5 | @author: pang 6 | """ 7 | 8 | import ctypes 9 | import time 10 | 11 | SendInput = ctypes.windll.user32.SendInput 12 | 13 | 14 | W = 0x11 15 | A = 0x1E 16 | S = 0x1F 17 | D = 0x20 18 | 19 | M = 0x32 20 | J = 0x24 21 | K = 0x25 22 | LSHIFT = 0x2A 23 | R = 0x13#用R代替识破 24 | V = 0x2F 25 | 26 | Q = 0x10 27 | I = 0x17 28 | O = 0x18 29 | P = 0x19 30 | C = 0x2E 31 | F = 0x21 32 | 33 | up = 0xC8 34 | down = 0xD0 35 | left = 0xCB 36 | right = 0xCD 37 | 38 | esc = 0x01 39 | 40 | # C struct redefinitions 41 | PUL = ctypes.POINTER(ctypes.c_ulong) 42 | class KeyBdInput(ctypes.Structure): 43 | _fields_ = [("wVk", ctypes.c_ushort), 44 | ("wScan", ctypes.c_ushort), 45 | ("dwFlags", ctypes.c_ulong), 46 | ("time", ctypes.c_ulong), 47 | ("dwExtraInfo", PUL)] 48 | 49 | class HardwareInput(ctypes.Structure): 50 | _fields_ = [("uMsg", ctypes.c_ulong), 51 | ("wParamL", ctypes.c_short), 52 | ("wParamH", ctypes.c_ushort)] 53 | 54 | class MouseInput(ctypes.Structure): 55 | _fields_ = [("dx", ctypes.c_long), 56 | ("dy", ctypes.c_long), 57 | ("mouseData", ctypes.c_ulong), 58 | ("dwFlags", ctypes.c_ulong), 59 | ("time",ctypes.c_ulong), 60 | ("dwExtraInfo", PUL)] 61 | 62 | class Input_I(ctypes.Union): 63 | _fields_ = [("ki", KeyBdInput), 64 | ("mi", MouseInput), 65 | ("hi", HardwareInput)] 66 | 67 | class Input(ctypes.Structure): 68 | _fields_ = [("type", ctypes.c_ulong), 69 | ("ii", Input_I)] 70 | 71 | # Actuals Functions 72 | 73 | def PressKey(hexKeyCode): 74 | extra = ctypes.c_ulong(0) 75 | ii_ = Input_I() 76 | ii_.ki = KeyBdInput( 0, hexKeyCode, 0x0008, 0, ctypes.pointer(extra) ) 77 | x = Input( ctypes.c_ulong(1), ii_ ) 78 | ctypes.windll.user32.SendInput(1, ctypes.pointer(x), ctypes.sizeof(x)) 79 | 80 | def ReleaseKey(hexKeyCode): 81 | extra = ctypes.c_ulong(0) 82 | ii_ = Input_I() 83 | ii_.ki = KeyBdInput( 0, hexKeyCode, 0x0008 | 0x0002, 0, ctypes.pointer(extra) ) 84 | x = Input( ctypes.c_ulong(1), ii_ ) 85 | ctypes.windll.user32.SendInput(1, ctypes.pointer(x), ctypes.sizeof(x)) 86 | 87 | 88 | def defense(): 89 | PressKey(M) 90 | time.sleep(0.05) 91 | ReleaseKey(M) 92 | #time.sleep(0.1) 93 | 94 | def attack(): 95 | PressKey(J) 96 | time.sleep(0.05) 97 | ReleaseKey(J) 98 | #time.sleep(0.1) 99 | 100 | def go_forward(): 101 | PressKey(W) 102 | time.sleep(0.4) 103 | ReleaseKey(W) 104 | 105 | def go_back(): 106 | PressKey(S) 107 | time.sleep(0.4) 108 | ReleaseKey(S) 109 | 110 | def go_left(): 111 | PressKey(A) 112 | time.sleep(0.4) 113 | ReleaseKey(A) 114 | 115 | def go_right(): 116 | PressKey(D) 117 | time.sleep(0.4) 118 | ReleaseKey(D) 119 | 120 | def jump(): 121 | PressKey(K) 122 | time.sleep(0.1) 123 | ReleaseKey(K) 124 | #time.sleep(0.1) 125 | 126 | def dodge():#闪避 127 | PressKey(R) 128 | time.sleep(0.1) 129 | ReleaseKey(R) 130 | #time.sleep(0.1) 131 | 132 | def lock_vision(): 133 | PressKey(V) 134 | time.sleep(0.3) 135 | ReleaseKey(V) 136 | time.sleep(0.1) 137 | 138 | def go_forward_QL(t): 139 | PressKey(W) 140 | time.sleep(t) 141 | ReleaseKey(W) 142 | 143 | def turn_left(t): 144 | PressKey(left) 145 | time.sleep(t) 146 | ReleaseKey(left) 147 | 148 | def turn_up(t): 149 | PressKey(up) 150 | time.sleep(t) 151 | ReleaseKey(up) 152 | 153 | def turn_right(t): 154 | PressKey(right) 155 | time.sleep(t) 156 | ReleaseKey(right) 157 | 158 | def F_go(): 159 | PressKey(F) 160 | time.sleep(0.5) 161 | ReleaseKey(F) 162 | 163 | def forward_jump(t): 164 | PressKey(W) 165 | time.sleep(t) 166 | PressKey(K) 167 | ReleaseKey(W) 168 | ReleaseKey(K) 169 | 170 | def press_esc(): 171 | PressKey(esc) 172 | time.sleep(0.3) 173 | ReleaseKey(esc) 174 | 175 | def dead(): 176 | PressKey(M) 177 | time.sleep(0.5) 178 | ReleaseKey(M) 179 | 180 | if __name__ == '__main__': 181 | time.sleep(5) 182 | time1 = time.time() 183 | while(True): 184 | if abs(time.time()-time1) > 5: 185 | break 186 | else: 187 | PressKey(M) 188 | time.sleep(0.1) 189 | ReleaseKey(M) 190 | time.sleep(0.2) 191 | 192 | 193 | PressKey(W) 194 | time.sleep(0.4) 195 | ReleaseKey(W) 196 | time.sleep(1) 197 | 198 | PressKey(J) 199 | time.sleep(0.1) 200 | ReleaseKey(J) 201 | time.sleep(1) -------------------------------------------------------------------------------- /find_blood_location.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Apr 8 09:45:04 2020 4 | 5 | @author: pang 6 | """ 7 | 8 | import numpy as np 9 | from PIL import ImageGrab 10 | import cv2 11 | import time 12 | import grabscreen 13 | import os 14 | 15 | def self_blood_count(self_gray): 16 | self_blood = 0 17 | for self_bd_num in self_gray[469]: 18 | # self blood gray pixel 80~98 19 | # 血量灰度值80~98 20 | print(self_bd_num) 21 | if self_bd_num > 90 and self_bd_num < 98: 22 | self_blood += 1 23 | return self_blood 24 | 25 | def boss_blood_count(boss_gray): 26 | boss_blood = 0 27 | for boss_bd_num in boss_gray[0]: 28 | # boss blood gray pixel 65~75 29 | # 血量灰度值65~75 30 | # print(boss_bd_num) 31 | if boss_bd_num > 65 and boss_bd_num < 75: 32 | boss_blood += 1 33 | return boss_blood 34 | 35 | wait_time = 5 36 | L_t = 3 37 | 38 | window_size = (320,104,704,448)#384,344 192,172 96,86 39 | blood_window = (60,91,280,562) 40 | 41 | 42 | 43 | for i in list(range(wait_time))[::-1]: 44 | print(i+1) 45 | time.sleep(1) 46 | 47 | last_time = time.time() 48 | while(True): 49 | 50 | #printscreen = np.array(ImageGrab.grab(bbox=(window_size))) 51 | #printscreen_numpy = np.array(printscreen_pil.getdata(),dtype='uint8')\ 52 | #.reshape((printscreen_pil.size[1],printscreen_pil.size[0],3)) 53 | #pil格式耗时太长 54 | 55 | screen_gray = cv2.cvtColor(grabscreen.grab_screen(blood_window),cv2.COLOR_BGR2GRAY)#灰度图像收集 56 | # screen_reshape = cv2.resize(screen_gray,(96,86)) 57 | self_blood = self_blood_count(screen_gray) 58 | boss_blood = boss_blood_count(screen_gray) 59 | 60 | cv2.imshow('window1',screen_gray) 61 | #cv2.imshow('window3',printscreen) 62 | #cv2.imshow('window2',screen_reshape) 63 | 64 | #测试时间用 65 | print('loop took {} seconds'.format(time.time()-last_time)) 66 | last_time = time.time() 67 | 68 | 69 | if cv2.waitKey(5) & 0xFF == ord('q'): 70 | break 71 | cv2.waitKey()# 视频结束后,按任意键退出 72 | cv2.destroyAllWindows() 73 | -------------------------------------------------------------------------------- /getkeys.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Apr 8 12:03:44 2020 4 | 5 | @author: pang 6 | """ 7 | 8 | import win32api as wapi 9 | import time 10 | 11 | keyList = ["\b"] 12 | for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ 123456789,.'£$/\\": 13 | keyList.append(char) 14 | 15 | def key_check(): 16 | keys = [] 17 | for key in keyList: 18 | if wapi.GetAsyncKeyState(ord(key)): 19 | keys.append(key) 20 | return keys 21 | -------------------------------------------------------------------------------- /grabscreen.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Apr 8 12:14:29 2020 4 | 5 | @author: pang 6 | """ 7 | 8 | import cv2 9 | import numpy as np 10 | import win32gui, win32ui, win32con, win32api 11 | 12 | def grab_screen(region=None): 13 | 14 | hwin = win32gui.GetDesktopWindow() 15 | 16 | if region: 17 | left,top,x2,y2 = region 18 | width = x2 - left + 1 19 | height = y2 - top + 1 20 | else: 21 | width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN) 22 | height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN) 23 | left = win32api.GetSystemMetrics(win32con.SM_XVIRTUALSCREEN) 24 | top = win32api.GetSystemMetrics(win32con.SM_YVIRTUALSCREEN) 25 | 26 | hwindc = win32gui.GetWindowDC(hwin) 27 | srcdc = win32ui.CreateDCFromHandle(hwindc) 28 | memdc = srcdc.CreateCompatibleDC() 29 | bmp = win32ui.CreateBitmap() 30 | bmp.CreateCompatibleBitmap(srcdc, width, height) 31 | memdc.SelectObject(bmp) 32 | memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY) 33 | 34 | signedIntsArray = bmp.GetBitmapBits(True) 35 | img = np.fromstring(signedIntsArray, dtype='uint8') 36 | img.shape = (height,width,4) 37 | 38 | srcdc.DeleteDC() 39 | memdc.DeleteDC() 40 | win32gui.ReleaseDC(hwin, hwindc) 41 | win32gui.DeleteObject(bmp.GetHandle()) 42 | 43 | return img -------------------------------------------------------------------------------- /restart.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Jul 4 18:31:36 2020 4 | 5 | @author: pang 6 | """ 7 | 8 | import directkeys 9 | import time 10 | 11 | def restart(): 12 | print("死,restart") 13 | time.sleep(8) 14 | directkeys.lock_vision() 15 | time.sleep(0.2) 16 | directkeys.attack() 17 | print("开始新一轮") 18 | 19 | if __name__ == "__main__": 20 | restart() --------------------------------------------------------------------------------