├── .gitignore
├── DirectInputMac.py
├── DirectInputWindows.py
├── GDBot.py
├── Helpers.py
├── README.md
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .idea/
3 | 
4 | 


--------------------------------------------------------------------------------
/DirectInputMac.py:
--------------------------------------------------------------------------------
 1 | #Simulates keypresses on Mac
 2 | from Quartz.CoreGraphics import CGEventCreateKeyboardEvent
 3 | from Quartz.CoreGraphics import CGEventPost
 4 | from Quartz.CoreGraphics import kCGHIDEventTap
 5 | import time
 6 | 
 7 | #Press the up button to bounce the cube
 8 | def bounce():
 9 |     CGEventPost(kCGHIDEventTap, CGEventCreateKeyboardEvent(None, 0x7E, True))
10 |     time.sleep(0.05)
11 |     CGEventPost(kCGHIDEventTap, CGEventCreateKeyboardEvent(None, 0x7E, False))
12 | 
13 | #Press the space bar to restart the level
14 | def restart():
15 |     CGEventPost(kCGHIDEventTap, CGEventCreateKeyboardEvent(None, 0x31, True))
16 |     time.sleep(0.05)
17 |     CGEventPost(kCGHIDEventTap, CGEventCreateKeyboardEvent(None, 0x31, False))
18 | 


--------------------------------------------------------------------------------
/DirectInputWindows.py:
--------------------------------------------------------------------------------
 1 | #Simulates keypresses on Mac
 2 | from pynput.mouse import Button, Controller
 3 | 
 4 | mouse = Controller()
 5 | 
 6 | #Bounce the cube by clicking on the screen
 7 | def bounce():
 8 |     mouse.position = (180, 350)
 9 |     mouse.click(Button.left, 1)
10 | 
11 | #Restart the level by clicking on the restart button
12 | def restart():
13 |     mouse.position = (180, 425)
14 |     mouse.click(Button.left, 1)
15 | 


--------------------------------------------------------------------------------
/GDBot.py:
--------------------------------------------------------------------------------
  1 | # Main script to run the bot from
  2 | import random
  3 | import math
  4 | import numpy as np
  5 | import keyboard
  6 | import platform
  7 | from PIL import Image
  8 | from collections import namedtuple
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.optim as optim
 12 | import torch.nn.functional as F
 13 | import torchvision.transforms as T
 14 | from Helpers import get_screen,isalive
 15 | 
 16 | # Set restart and bounce according to OS
 17 | if(platform.system() == "Windows"):
 18 |     from DirectInputWindows import bounce, restart
 19 | else:
 20 |     from DirectInputMac import bounce, restart
 21 | 
 22 | #Check if GPU is supported
 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 24 | 
 25 | # 2 possible actions (bounce, not bounce)
 26 | n_actions = 2
 27 | 
 28 | # Hyperparameters for model training
 29 | BATCH_SIZE = 28
 30 | GAMMA = 0.999
 31 | EPS_START = 0.9
 32 | EPS_END = 0.05
 33 | EPS_DECAY = 300
 34 | TARGET_UPDATE = 10
 35 | num_episodes = 1500
 36 | 
 37 | 
 38 | # Transition dtype that conists of a state (two frames), the action the model took,
 39 | # and the resulting state and reward
 40 | Transition = namedtuple('Transition',
 41 |                         ('state', 'action', 'next_state', 'reward'))
 42 | 
 43 | 
 44 | resize = T.Compose([T.ToPILImage(),
 45 |                     T.Resize(40, interpolation=(Image.CUBIC)),
 46 |                     T.ToTensor()])
 47 | 
 48 | # Helper class to feed data into network
 49 | # (based on https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html)
 50 | class ReplayMemory(object):
 51 | 
 52 |     def __init__(self, capacity):
 53 |         self.capacity = capacity
 54 |         self.memory = []
 55 |         self.position = 0
 56 | 
 57 |     def push(self, *args):
 58 |         #Saves a transition
 59 |         if len(self.memory) < self.capacity:
 60 |             self.memory.append(None)
 61 |         self.memory[self.position] = Transition(*args)
 62 |         self.position = (self.position + 1) % self.capacity
 63 | 
 64 |     def sample(self, batch_size):
 65 |         return random.sample(self.memory, batch_size)
 66 | 
 67 |     def __len__(self):
 68 |         return len(self.memory)
 69 | 
 70 | 
 71 | # Deep Q Network
 72 | # (based on https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html)
 73 | class DQN(nn.Module):
 74 | 
 75 |     def __init__(self, h, w, outputs):
 76 |         super(DQN, self).__init__()
 77 |         self.conv1 = nn.Conv2d(1, 16, kernel_size=5, stride=2)
 78 |         self.bn1 = nn.BatchNorm2d(16)
 79 |         self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
 80 |         self.bn2 = nn.BatchNorm2d(32)
 81 |         self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2)
 82 |         self.bn3 = nn.BatchNorm2d(32)
 83 | 
 84 |         # Number of Linear input connections depends on output of conv2d layers
 85 |         # and therefore the input image size, so compute it.
 86 |         def conv2d_size_out(size, kernel_size = 5, stride = 2):
 87 |             return (size - (kernel_size - 1) - 1) // stride  + 1
 88 |         convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
 89 |         convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
 90 |         linear_input_size = convw * convh * 32
 91 |         self.head = nn.Linear(linear_input_size, outputs)
 92 | 
 93 |     # Called with either one element to determine next action, or a batch
 94 |     # during optimization. Returns tensor([[left0exp,right0exp]...]).
 95 |     def forward(self, x):
 96 |         x = F.relu(self.bn1(self.conv1(x)))
 97 |         x = F.relu(self.bn2(self.conv2(x)))
 98 |         x = F.relu(self.bn3(self.conv3(x)))
 99 |         return self.head(x.view(x.size(0), -1))
100 | 
101 | # Initialize policy and target network
102 | policy_net = DQN(45, 40, 2).to(device)
103 | target_net = DQN(45, 40, 2).to(device)
104 | target_net.load_state_dict(policy_net.state_dict())
105 | target_net.eval()
106 | 
107 | #Initialize optimizer, memory (so planned training set size),
108 | # and set steps done to 0
109 | optimizer = optim.RMSprop(policy_net.parameters())
110 | memory = ReplayMemory(10000)
111 | steps_done = 0
112 | 
113 | # Select an action either according to the network or random selection, where
114 | # the likeliness of a random choice decreases toward 0 with steps_done getting bigger.
115 | # This is to allow for more exploration when the model is still learning and more exploitation
116 | # when the model has improved.
117 | def select_action(state):
118 |     global steps_done
119 |     sample = random.random()
120 |     eps_threshold = EPS_END + (EPS_START - EPS_END) * \
121 |         math.exp(-1. * steps_done / EPS_DECAY)
122 |     steps_done += 1
123 |     # Pick an action according to network
124 |     if sample > eps_threshold:
125 |         with torch.no_grad():
126 |             # policy_net(state).max(1) returns the largest column of
127 |             # each row. The second column is the largest expected reward.
128 |             return policy_net(state).max(1)[1].view(1, 1)
129 | 
130 |     else:
131 |         # Randomly pick an action
132 |         return random.randrange(n_actions)
133 | 
134 | # Function for model optimization
135 | # (based on https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html)
136 | def optimize_model():
137 |     if len(memory) < BATCH_SIZE:
138 |         return
139 |     transitions = memory.sample(BATCH_SIZE)
140 |     # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
141 |     # detailed explanation). This converts batch-array of Transitions
142 |     # to Transition of batch-arrays.
143 |     batch = Transition(*zip(*transitions))
144 | 
145 |     # Compute a mask of non-final states and concatenate the batch elements
146 |     # (a final state would've been the one after which simulation ended)
147 |     non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
148 |                                           batch.next_state)), device=device, dtype=torch.uint8)
149 |     non_final_next_states = torch.cat([s for s in batch.next_state
150 |                                                 if s is not None])
151 |     state_batch = torch.cat(batch.state)
152 |     action_batch = torch.cat(batch.action)
153 |     reward_batch = torch.cat(batch.reward)
154 | 
155 |     # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
156 |     # columns of actions taken. These are the actions which would've been taken
157 |     # for each batch state according to policy_net
158 |     state_action_values = policy_net(state_batch).gather(1, action_batch)
159 | 
160 |     # Compute V(s_{t+1}) for all next states.
161 |     # Expected values of actions for non_final_next_states are computed based
162 |     # on the "older" target_net; selecting their best reward with max(1)[0].
163 |     # This is merged based on the mask, such that we'll have either the expected
164 |     # state value or 0 in case the state was final.
165 |     next_state_values = torch.zeros(BATCH_SIZE, device=device)
166 |     next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach()
167 |     # Compute the expected Q values
168 |     expected_state_action_values = (next_state_values * GAMMA) + reward_batch
169 |     # Compute Huber loss
170 |     loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))
171 |      # Optimize the model
172 |     optimizer.zero_grad()
173 |     loss.backward()
174 |     for param in policy_net.parameters():
175 |         param.grad.data.clamp_(-1, 1)
176 |     optimizer.step()
177 | 
178 | # Convert image to tensor to feed into model
179 | def convert_to_n(screen):
180 |     screen = np.ascontiguousarray(screen, dtype=np.float32) / 255
181 |     screen = torch.from_numpy(screen)
182 |     # Resize, and add a batch dimension (BCHW)
183 |     return resize(screen).unsqueeze(0).to(device)
184 | 
185 | 
186 | # Main training loop
187 | for i_episode in range(num_episodes):
188 |     # Initialize the training variables
189 |     alive = True
190 |     i = 1
191 |     reward = None
192 |     last_screen = convert_to_n(get_screen())
193 |     current_screen = convert_to_n(get_screen())
194 |     state = current_screen - last_screen
195 | 
196 |     while alive:
197 | 
198 |         # Exit condition for windows
199 |         if keyboard.is_pressed('q'):
200 |             exit()
201 | 
202 |         #Get two consecutive frames
203 |         last_screen = get_screen()
204 |         current_screen = get_screen()
205 |         # Get alive boolean
206 |         alive = isalive(last_screen, current_screen)
207 | 
208 |         # Define next state (as we can't assign reward straight away
209 |         if alive:
210 |             next_state = convert_to_n(current_screen) - convert_to_n(last_screen)
211 |         else:
212 |             #End episode if cube is dead
213 |             next_state = None
214 |             restart()
215 |             break
216 | 
217 |         # Select and perform an action and
218 |         # give reward no matter the action
219 |         # as the length of survival is important
220 |         action = select_action(state)
221 |         if(action == 0):
222 |             reward = i * 0.01
223 |         else:
224 |             bounce()
225 |             reward = i * 0.01
226 | 
227 |         reward = torch.tensor([reward], device=device)
228 | 
229 |         # Increase reward multiplier each frame to reward network
230 |         # for surviving longer
231 |         i = i + 1
232 | 
233 |         # Add the state and result to memory
234 |         memory.push(state, action, next_state, reward)
235 | 
236 |         state = next_state
237 | 
238 |         # Perform one step of the optimization (on the target network)
239 |         optimize_model()
240 | 
241 |     # Update the target network every 10 episodes
242 |     if i_episode % TARGET_UPDATE == 0:
243 |         target_net.load_state_dict(policy_net.state_dict())
244 |     print("Episode " + str(i_episode) + "done")
245 |     # Reset the reward multiplier each episode as the cube only gets
246 |     # rewarded if it survives long on the same run
247 |     i = 1
248 | 


--------------------------------------------------------------------------------
/Helpers.py:
--------------------------------------------------------------------------------
 1 | #Helper functions
 2 | import numpy as np
 3 | import pyscreenshot
 4 | import cv2
 5 | from skimage.measure import compare_ssim
 6 | 
 7 | # Gets one frame
 8 | def get_screen():
 9 |     # 470x410 (size of GD without the things
10 |     # behind the cube being recorded and the top being cut off)
11 |     screen =  np.array(pyscreenshot.grab(bbox=(150, 40, 620, 450)))
12 |     # Simplify image
13 |     gray_screen = cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY)
14 |     gray_screen = cv2.Canny(gray_screen, threshold1 = 200, threshold2=300)
15 |     return gray_screen
16 | 
17 | # Compares two following images and returns a boolean for alive. If the image is the "Restart?"
18 | # screen, structural similarity index will be 0.99+ which means the cube is dead. Else, it's alive.
19 | def isalive(screen1, screen2):
20 |     (score, diff) = compare_ssim(screen1, screen2, full=True)
21 |     if(score < 0.995):
22 |         return True
23 |     else:
24 |         return False
25 | 
26 | # Records and displays the screen
27 | def screen_record():
28 |     while(True):
29 |         gray_printscreen = get_screen()
30 |         cv2.imshow('window',gray_printscreen)
31 |         #press q to exit screen recording
32 |         if cv2.waitKey(25) & 0xFF == ord('q'):
33 |             cv2.destroyAllWindows()
34 |             break
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Reinforcement Learning - Playing Geometry Dash using PyTorch 
 2 | 
 3 | A deep q network that plays Geometry Dash  
 4 | 
 5 | # Dependencies
 6 | Python 3.6+  
 7 | Run `pip3 install -r requirements.txt` to install remaining dependencies (or `pip install -r requirements.txt`, depending on your pip installation)
 8 | 
 9 | # How to run
10 | 
11 | Run the Geometry Dash game in top left corner of your screen on the smallest resolution and run GDBot.py, making sure that no window is covering the network's "view" of the game
12 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | opencv-python
 3 | torch
 4 | torchvision
 5 | pyobjc-framework-Quartz
 6 | pynput
 7 | pyscreenshot
 8 | scikit-image
 9 | keyboard
10 | pillow
11 | 


--------------------------------------------------------------------------------