├── .gitignore ├── DirectInputMac.py ├── DirectInputWindows.py ├── GDBot.py ├── Helpers.py ├── README.md └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .idea/ 3 | 4 | -------------------------------------------------------------------------------- /DirectInputMac.py: -------------------------------------------------------------------------------- 1 | #Simulates keypresses on Mac 2 | from Quartz.CoreGraphics import CGEventCreateKeyboardEvent 3 | from Quartz.CoreGraphics import CGEventPost 4 | from Quartz.CoreGraphics import kCGHIDEventTap 5 | import time 6 | 7 | #Press the up button to bounce the cube 8 | def bounce(): 9 | CGEventPost(kCGHIDEventTap, CGEventCreateKeyboardEvent(None, 0x7E, True)) 10 | time.sleep(0.05) 11 | CGEventPost(kCGHIDEventTap, CGEventCreateKeyboardEvent(None, 0x7E, False)) 12 | 13 | #Press the space bar to restart the level 14 | def restart(): 15 | CGEventPost(kCGHIDEventTap, CGEventCreateKeyboardEvent(None, 0x31, True)) 16 | time.sleep(0.05) 17 | CGEventPost(kCGHIDEventTap, CGEventCreateKeyboardEvent(None, 0x31, False)) 18 | -------------------------------------------------------------------------------- /DirectInputWindows.py: -------------------------------------------------------------------------------- 1 | #Simulates keypresses on Mac 2 | from pynput.mouse import Button, Controller 3 | 4 | mouse = Controller() 5 | 6 | #Bounce the cube by clicking on the screen 7 | def bounce(): 8 | mouse.position = (180, 350) 9 | mouse.click(Button.left, 1) 10 | 11 | #Restart the level by clicking on the restart button 12 | def restart(): 13 | mouse.position = (180, 425) 14 | mouse.click(Button.left, 1) 15 | -------------------------------------------------------------------------------- /GDBot.py: -------------------------------------------------------------------------------- 1 | # Main script to run the bot from 2 | import random 3 | import math 4 | import numpy as np 5 | import keyboard 6 | import platform 7 | from PIL import Image 8 | from collections import namedtuple 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | import torch.nn.functional as F 13 | import torchvision.transforms as T 14 | from Helpers import get_screen,isalive 15 | 16 | # Set restart and bounce according to OS 17 | if(platform.system() == "Windows"): 18 | from DirectInputWindows import bounce, restart 19 | else: 20 | from DirectInputMac import bounce, restart 21 | 22 | #Check if GPU is supported 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | # 2 possible actions (bounce, not bounce) 26 | n_actions = 2 27 | 28 | # Hyperparameters for model training 29 | BATCH_SIZE = 28 30 | GAMMA = 0.999 31 | EPS_START = 0.9 32 | EPS_END = 0.05 33 | EPS_DECAY = 300 34 | TARGET_UPDATE = 10 35 | num_episodes = 1500 36 | 37 | 38 | # Transition dtype that conists of a state (two frames), the action the model took, 39 | # and the resulting state and reward 40 | Transition = namedtuple('Transition', 41 | ('state', 'action', 'next_state', 'reward')) 42 | 43 | 44 | resize = T.Compose([T.ToPILImage(), 45 | T.Resize(40, interpolation=(Image.CUBIC)), 46 | T.ToTensor()]) 47 | 48 | # Helper class to feed data into network 49 | # (based on https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html) 50 | class ReplayMemory(object): 51 | 52 | def __init__(self, capacity): 53 | self.capacity = capacity 54 | self.memory = [] 55 | self.position = 0 56 | 57 | def push(self, *args): 58 | #Saves a transition 59 | if len(self.memory) < self.capacity: 60 | self.memory.append(None) 61 | self.memory[self.position] = Transition(*args) 62 | self.position = (self.position + 1) % self.capacity 63 | 64 | def sample(self, batch_size): 65 | return random.sample(self.memory, batch_size) 66 | 67 | def __len__(self): 68 | return len(self.memory) 69 | 70 | 71 | # Deep Q Network 72 | # (based on https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html) 73 | class DQN(nn.Module): 74 | 75 | def __init__(self, h, w, outputs): 76 | super(DQN, self).__init__() 77 | self.conv1 = nn.Conv2d(1, 16, kernel_size=5, stride=2) 78 | self.bn1 = nn.BatchNorm2d(16) 79 | self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2) 80 | self.bn2 = nn.BatchNorm2d(32) 81 | self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2) 82 | self.bn3 = nn.BatchNorm2d(32) 83 | 84 | # Number of Linear input connections depends on output of conv2d layers 85 | # and therefore the input image size, so compute it. 86 | def conv2d_size_out(size, kernel_size = 5, stride = 2): 87 | return (size - (kernel_size - 1) - 1) // stride + 1 88 | convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w))) 89 | convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h))) 90 | linear_input_size = convw * convh * 32 91 | self.head = nn.Linear(linear_input_size, outputs) 92 | 93 | # Called with either one element to determine next action, or a batch 94 | # during optimization. Returns tensor([[left0exp,right0exp]...]). 95 | def forward(self, x): 96 | x = F.relu(self.bn1(self.conv1(x))) 97 | x = F.relu(self.bn2(self.conv2(x))) 98 | x = F.relu(self.bn3(self.conv3(x))) 99 | return self.head(x.view(x.size(0), -1)) 100 | 101 | # Initialize policy and target network 102 | policy_net = DQN(45, 40, 2).to(device) 103 | target_net = DQN(45, 40, 2).to(device) 104 | target_net.load_state_dict(policy_net.state_dict()) 105 | target_net.eval() 106 | 107 | #Initialize optimizer, memory (so planned training set size), 108 | # and set steps done to 0 109 | optimizer = optim.RMSprop(policy_net.parameters()) 110 | memory = ReplayMemory(10000) 111 | steps_done = 0 112 | 113 | # Select an action either according to the network or random selection, where 114 | # the likeliness of a random choice decreases toward 0 with steps_done getting bigger. 115 | # This is to allow for more exploration when the model is still learning and more exploitation 116 | # when the model has improved. 117 | def select_action(state): 118 | global steps_done 119 | sample = random.random() 120 | eps_threshold = EPS_END + (EPS_START - EPS_END) * \ 121 | math.exp(-1. * steps_done / EPS_DECAY) 122 | steps_done += 1 123 | # Pick an action according to network 124 | if sample > eps_threshold: 125 | with torch.no_grad(): 126 | # policy_net(state).max(1) returns the largest column of 127 | # each row. The second column is the largest expected reward. 128 | return policy_net(state).max(1)[1].view(1, 1) 129 | 130 | else: 131 | # Randomly pick an action 132 | return random.randrange(n_actions) 133 | 134 | # Function for model optimization 135 | # (based on https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html) 136 | def optimize_model(): 137 | if len(memory) < BATCH_SIZE: 138 | return 139 | transitions = memory.sample(BATCH_SIZE) 140 | # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for 141 | # detailed explanation). This converts batch-array of Transitions 142 | # to Transition of batch-arrays. 143 | batch = Transition(*zip(*transitions)) 144 | 145 | # Compute a mask of non-final states and concatenate the batch elements 146 | # (a final state would've been the one after which simulation ended) 147 | non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, 148 | batch.next_state)), device=device, dtype=torch.uint8) 149 | non_final_next_states = torch.cat([s for s in batch.next_state 150 | if s is not None]) 151 | state_batch = torch.cat(batch.state) 152 | action_batch = torch.cat(batch.action) 153 | reward_batch = torch.cat(batch.reward) 154 | 155 | # Compute Q(s_t, a) - the model computes Q(s_t), then we select the 156 | # columns of actions taken. These are the actions which would've been taken 157 | # for each batch state according to policy_net 158 | state_action_values = policy_net(state_batch).gather(1, action_batch) 159 | 160 | # Compute V(s_{t+1}) for all next states. 161 | # Expected values of actions for non_final_next_states are computed based 162 | # on the "older" target_net; selecting their best reward with max(1)[0]. 163 | # This is merged based on the mask, such that we'll have either the expected 164 | # state value or 0 in case the state was final. 165 | next_state_values = torch.zeros(BATCH_SIZE, device=device) 166 | next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach() 167 | # Compute the expected Q values 168 | expected_state_action_values = (next_state_values * GAMMA) + reward_batch 169 | # Compute Huber loss 170 | loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1)) 171 | # Optimize the model 172 | optimizer.zero_grad() 173 | loss.backward() 174 | for param in policy_net.parameters(): 175 | param.grad.data.clamp_(-1, 1) 176 | optimizer.step() 177 | 178 | # Convert image to tensor to feed into model 179 | def convert_to_n(screen): 180 | screen = np.ascontiguousarray(screen, dtype=np.float32) / 255 181 | screen = torch.from_numpy(screen) 182 | # Resize, and add a batch dimension (BCHW) 183 | return resize(screen).unsqueeze(0).to(device) 184 | 185 | 186 | # Main training loop 187 | for i_episode in range(num_episodes): 188 | # Initialize the training variables 189 | alive = True 190 | i = 1 191 | reward = None 192 | last_screen = convert_to_n(get_screen()) 193 | current_screen = convert_to_n(get_screen()) 194 | state = current_screen - last_screen 195 | 196 | while alive: 197 | 198 | # Exit condition for windows 199 | if keyboard.is_pressed('q'): 200 | exit() 201 | 202 | #Get two consecutive frames 203 | last_screen = get_screen() 204 | current_screen = get_screen() 205 | # Get alive boolean 206 | alive = isalive(last_screen, current_screen) 207 | 208 | # Define next state (as we can't assign reward straight away 209 | if alive: 210 | next_state = convert_to_n(current_screen) - convert_to_n(last_screen) 211 | else: 212 | #End episode if cube is dead 213 | next_state = None 214 | restart() 215 | break 216 | 217 | # Select and perform an action and 218 | # give reward no matter the action 219 | # as the length of survival is important 220 | action = select_action(state) 221 | if(action == 0): 222 | reward = i * 0.01 223 | else: 224 | bounce() 225 | reward = i * 0.01 226 | 227 | reward = torch.tensor([reward], device=device) 228 | 229 | # Increase reward multiplier each frame to reward network 230 | # for surviving longer 231 | i = i + 1 232 | 233 | # Add the state and result to memory 234 | memory.push(state, action, next_state, reward) 235 | 236 | state = next_state 237 | 238 | # Perform one step of the optimization (on the target network) 239 | optimize_model() 240 | 241 | # Update the target network every 10 episodes 242 | if i_episode % TARGET_UPDATE == 0: 243 | target_net.load_state_dict(policy_net.state_dict()) 244 | print("Episode " + str(i_episode) + "done") 245 | # Reset the reward multiplier each episode as the cube only gets 246 | # rewarded if it survives long on the same run 247 | i = 1 248 | -------------------------------------------------------------------------------- /Helpers.py: -------------------------------------------------------------------------------- 1 | #Helper functions 2 | import numpy as np 3 | import pyscreenshot 4 | import cv2 5 | from skimage.measure import compare_ssim 6 | 7 | # Gets one frame 8 | def get_screen(): 9 | # 470x410 (size of GD without the things 10 | # behind the cube being recorded and the top being cut off) 11 | screen = np.array(pyscreenshot.grab(bbox=(150, 40, 620, 450))) 12 | # Simplify image 13 | gray_screen = cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY) 14 | gray_screen = cv2.Canny(gray_screen, threshold1 = 200, threshold2=300) 15 | return gray_screen 16 | 17 | # Compares two following images and returns a boolean for alive. If the image is the "Restart?" 18 | # screen, structural similarity index will be 0.99+ which means the cube is dead. Else, it's alive. 19 | def isalive(screen1, screen2): 20 | (score, diff) = compare_ssim(screen1, screen2, full=True) 21 | if(score < 0.995): 22 | return True 23 | else: 24 | return False 25 | 26 | # Records and displays the screen 27 | def screen_record(): 28 | while(True): 29 | gray_printscreen = get_screen() 30 | cv2.imshow('window',gray_printscreen) 31 | #press q to exit screen recording 32 | if cv2.waitKey(25) & 0xFF == ord('q'): 33 | cv2.destroyAllWindows() 34 | break 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Reinforcement Learning - Playing Geometry Dash using PyTorch 2 | 3 | A deep q network that plays Geometry Dash 4 | 5 | # Dependencies 6 | Python 3.6+ 7 | Run `pip3 install -r requirements.txt` to install remaining dependencies (or `pip install -r requirements.txt`, depending on your pip installation) 8 | 9 | # How to run 10 | 11 | Run the Geometry Dash game in top left corner of your screen on the smallest resolution and run GDBot.py, making sure that no window is covering the network's "view" of the game 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | opencv-python 3 | torch 4 | torchvision 5 | pyobjc-framework-Quartz 6 | pynput 7 | pyscreenshot 8 | scikit-image 9 | keyboard 10 | pillow 11 | --------------------------------------------------------------------------------