├── tests ├── __init__.py ├── main.py ├── rl │ ├── settings.py │ ├── Memory.py │ └── Main.py └── state_machine │ └── Multitask.py ├── FlashRL ├── lib │ ├── __init__.py │ ├── util │ │ ├── __init__.py │ │ ├── preprocess.py │ │ ├── state_trainer.py │ │ └── model.py │ ├── GameEnvironment.py │ ├── Game.py │ └── Environment.py ├── contrib │ ├── __init__.py │ └── environments │ │ ├── __init__.py │ │ └── multitask │ │ ├── multitaskgame.swf │ │ └── __init__.py └── __init__.py ├── examples ├── __init__.py ├── mujaffa │ ├── __init__.py │ ├── contrib │ │ └── environments │ │ │ └── mujaffa-v1.6 │ │ │ ├── mujaffa.swf │ │ │ └── __init__.py │ └── main.py └── multitask │ └── main.py ├── __init__.py ├── .gitignore ├── requirements.txt ├── setup.py └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /FlashRL/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /FlashRL/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /FlashRL/lib/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/mujaffa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from lib.Game import Game -------------------------------------------------------------------------------- /FlashRL/contrib/environments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /FlashRL/__init__.py: -------------------------------------------------------------------------------- 1 | from .lib.Game import Game -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .vscode 3 | test.png 4 | .idea -------------------------------------------------------------------------------- /tests/main.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | if __name__ == "__main__": 4 | pass -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | selenium 2 | easyprocess 3 | git+https://github.com/UIA-CAIR/pyVNC 4 | pyvirtualdisplay 5 | tensorflow 6 | tensorflow-gpu 7 | h5py 8 | keras -------------------------------------------------------------------------------- /FlashRL/contrib/environments/multitask/multitaskgame.swf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cair/FlashRL/HEAD/FlashRL/contrib/environments/multitask/multitaskgame.swf -------------------------------------------------------------------------------- /examples/mujaffa/contrib/environments/mujaffa-v1.6/mujaffa.swf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cair/FlashRL/HEAD/examples/mujaffa/contrib/environments/mujaffa-v1.6/mujaffa.swf -------------------------------------------------------------------------------- /examples/mujaffa/main.py: -------------------------------------------------------------------------------- 1 | from FlashRL.lib.Game import Game 2 | 3 | 4 | def on_frame(state, frame, type, vnc): 5 | print(state.shape, type) 6 | 7 | Game("mujaffa-v1.6", fps=1, frame_callback=on_frame, grayscale=True, normalized=True) 8 | -------------------------------------------------------------------------------- /examples/multitask/main.py: -------------------------------------------------------------------------------- 1 | from FlashRL.lib.Game import Game 2 | 3 | 4 | def on_frame(state, frame, type, vnc): 5 | pass 6 | 7 | 8 | 9 | 10 | Game("multitask", fps=10, frame_callback=on_frame, grayscale=True, normalized=True) 11 | -------------------------------------------------------------------------------- /FlashRL/contrib/environments/multitask/__init__.py: -------------------------------------------------------------------------------- 1 | define = { 2 | "swf": "multitaskgame.swf", 3 | "model": "model.h5", 4 | "dataset": "dataset.p", 5 | "action_space": 9, 6 | "action_names": ["terminal", "menu", "score", "lll", "terminal_1", "stage", "score", "stage", "transition"], 7 | "state_space": (84, 84, 3) 8 | } -------------------------------------------------------------------------------- /examples/mujaffa/contrib/environments/mujaffa-v1.6/__init__.py: -------------------------------------------------------------------------------- 1 | define = { 2 | "swf": "mujaffa.swf", 3 | "model": "model.h5", 4 | "dataset": "dataset.p", 5 | "action_space": 4, 6 | "action_names": ["terminal", "menu", "score", "lll", "terminal_1", "stage", "score", "stage", "transition"], 7 | "state_space": (84, 84, 3) 8 | } -------------------------------------------------------------------------------- /tests/rl/settings.py: -------------------------------------------------------------------------------- 1 | settings = { 2 | "learning_rate": 1e-6, 3 | "memory_size": 1000000, # 1 Million frames in memory 4 | "epsilon_start": 0.5, # Start of epsilon decent 5 | "epsilon_end": 0.0, # End of epsilon decent 6 | "epsilon_steps": 100000, # Epsilon steps 7 | "exploration_wins": 0, # Number of victories using random moves before starting epsilon phase 8 | "batch_size": 16, 9 | "discount_factor": 0.99, 10 | "grayscale": False, 11 | "load_latest_checkpoint": False, 12 | } -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | 5 | 6 | setup( 7 | name='FlashRL', # This is the name of your PyPI-package. 8 | version='1.0', # Update the version number for new releases 9 | install_requires=['numpy', 'pygame', "pyVNC", "pillow", "scipy", "easyprocess", "pyvirtualdisplay", "keras", "h5py"], 10 | packages=["FlashRL.lib", "FlashRL.lib.util", "FlashRL.contrib", "FlashRL.contrib.environments", "FlashRL.contrib.environments.multitask"], 11 | package_data={ 12 | '': ['*.swf'], 13 | }, 14 | scripts=[], 15 | dependency_links=[ 16 | "git+ssh://git@github.com:UIA-CAIR/pyVNC.git" 17 | ] 18 | ) 19 | -------------------------------------------------------------------------------- /FlashRL/lib/GameEnvironment.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from easyprocess import EasyProcess 3 | from pyvirtualdisplay import Display 4 | import os 5 | 6 | class GameEnvironment(threading.Thread): 7 | def __init__(self, display, env): 8 | threading.Thread.__init__(self) 9 | #super(self) 10 | self.display = display 11 | self.env = env 12 | 13 | def run(self): 14 | self.vnc(self.display) 15 | 16 | def vnc(self, vnc_display): 17 | os.environ["DISPLAY"] = vnc_display 18 | with Display(backend='xvnc', rfbport=5902, size=(223, 150)) as disp: 19 | with EasyProcess(' '.join(['gnash', os.path.join(self.env.path, self.env.swf), "--width", "150", "--height", "150","--render-mode", "1", "--hide-menubar"])) as proc: 20 | proc.wait() -------------------------------------------------------------------------------- /tests/rl/Memory.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | class Memory: 5 | 6 | def __init__(self, memory_size): 7 | self.buffer = [] 8 | self.count = 0 9 | self.max_memory_size = memory_size 10 | 11 | def _recalibrate(self): 12 | self.count = len(self.buffer) 13 | 14 | def remove_n(self, n): 15 | self.buffer = self.buffer[n-1:-1] 16 | self._recalibrate() 17 | 18 | def add(self, memory): 19 | self.buffer.append(memory) 20 | self.count += 1 21 | 22 | if self.count > self.max_memory_size: 23 | self.buffer.pop(0) 24 | self.count -= 1 25 | 26 | def get(self, batch_size=1): 27 | if self.count <= batch_size: 28 | return np.array(self.buffer) 29 | 30 | return np.array(random.sample(self.buffer, batch_size)) 31 | -------------------------------------------------------------------------------- /FlashRL/lib/Game.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from pyVNC.Client import Client 4 | from .GameEnvironment import GameEnvironment 5 | from .Environment import Environment 6 | 7 | class Game: 8 | def __init__(self, environment_name, fps=10, frame_callback=None, grayscale=False, normalized=False): 9 | original_display = os.environ["DISPLAY"] 10 | vnc_display = ":98" 11 | print("Initialize xVNC") 12 | print("---------------") 13 | print("Display: %s" % original_display) 14 | print("VNC Display: %s" % vnc_display) 15 | 16 | env = Environment(environment_name, fps=fps, frame_callback=frame_callback, grayscale=grayscale, normalized=normalized) 17 | 18 | x_vnc = GameEnvironment(vnc_display, env) 19 | x_vnc.start() 20 | 21 | time.sleep(1) 22 | 23 | os.environ["DISPLAY"] = original_display 24 | py_vnc = Client(host="127.0.0.1", port=5902, gui=True, array=True) 25 | py_vnc.start() 26 | 27 | time.sleep(1) 28 | 29 | env.setup(py_vnc) 30 | -------------------------------------------------------------------------------- /FlashRL/lib/util/preprocess.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | import pickle 4 | from state.multitask_one.model import StateModel 5 | import os 6 | if __name__ == "__main__": 7 | model = StateModel() 8 | 9 | image_class_path = os.path.join(os.getcwd(), "images", "training") 10 | image_classes = [x for x in os.listdir(image_class_path)] 11 | image_classes_path = [os.path.join(image_class_path, x) for x in image_classes] 12 | n_classes = len(image_classes) 13 | print(n_classes) 14 | X = [] 15 | Y = [] 16 | for i, class_path in enumerate(image_classes_path): 17 | files = [os.path.join(class_path, x) for x in os.listdir(class_path)] 18 | 19 | for image_path in files: 20 | img = Image.open(image_path) 21 | 22 | # Preprocess image 23 | np_img = model.preprocess(img) 24 | 25 | # Create class label 26 | y = np.zeros(shape=(n_classes, )) 27 | y[i] = 1 28 | 29 | # Add to dataset 30 | X.append(np_img) 31 | Y.append(y) 32 | 33 | pickle.dump((np.array(X), np.array(Y)), open(os.path.join(os.getcwd(), "dataset.p"), "wb")) 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /FlashRL/lib/util/state_trainer.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import importlib.util 4 | import pickle 5 | from model import Model 6 | dir_path = os.path.dirname(os.path.realpath(__file__)) 7 | 8 | class Trainer: 9 | 10 | def __init__(self, environment_path): 11 | self.env_path = environment_path 12 | self.module_path = os.path.join(self.env_path, "__init__.py") 13 | 14 | self.env_config = None 15 | self.env_training_data = None 16 | 17 | self.load_environment_config() 18 | self.load_training_data() 19 | 20 | self.env_model_path = os.path.join(self.env_path, self.env_config["model"]) 21 | 22 | def load_environment_config(self): 23 | spec = importlib.util.spec_from_file_location("module.define", self.module_path) 24 | mod = importlib.util.module_from_spec(spec) 25 | spec.loader.exec_module(mod) 26 | self.env_config = mod.define 27 | 28 | if self.env_config is None: 29 | raise RuntimeError("Environment configuration is empty!") 30 | 31 | def load_training_data(self): 32 | self.env_training_data = pickle.load(open(os.path.join(self.env_path, self.env_config["dataset"]), "rb")) 33 | 34 | def train(self): 35 | m = Model(self.env_training_data, self.env_model_path) 36 | m.train() 37 | 38 | 39 | if __name__ == "__main__": 40 | 41 | for env in glob.glob(os.path.join(dir_path, "..", "environments/*")): 42 | trainer = Trainer(env) 43 | trainer.train() 44 | -------------------------------------------------------------------------------- /FlashRL/lib/util/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | from PIL.Image import ANTIALIAS 5 | from keras.callbacks import ModelCheckpoint 6 | from keras.layers import Conv2D, Activation, MaxPooling2D, Flatten, Dense, Dropout 7 | from keras.models import Sequential 8 | from keras.optimizers import RMSprop, Adam 9 | 10 | class Model: 11 | 12 | def __init__(self, training_data, model_path): 13 | self.training_data = training_data 14 | self.input_shape = self.training_data[0].shape[1:] 15 | self.classes = self.training_data[1].shape[1] 16 | self.checkpoint_cb = ModelCheckpoint(model_path, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=5) 17 | self.X = self.training_data[0] 18 | self.Y = self.training_data[1] 19 | self.model = self.build_model() 20 | 21 | def build_model(self): 22 | 23 | model = Sequential() 24 | model.add(Conv2D(32, (3, 3), data_format="channels_last", input_shape=self.input_shape)) 25 | model.add(Activation('relu')) 26 | model.add(MaxPooling2D(pool_size=(2, 2))) 27 | 28 | model.add(Conv2D(32, (3, 3), data_format="channels_last")) 29 | model.add(Activation('relu')) 30 | model.add(MaxPooling2D(pool_size=(2, 2))) 31 | 32 | model.add(Conv2D(64, (3, 3), data_format="channels_last")) 33 | model.add(Activation('relu')) 34 | model.add(MaxPooling2D(pool_size=(2, 2))) 35 | 36 | model.add(Conv2D(64, (3, 3), data_format="channels_last")) 37 | model.add(Activation('relu')) 38 | model.add(MaxPooling2D(pool_size=(2, 2))) 39 | 40 | 41 | model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors 42 | model.add(Dense(512)) 43 | model.add(Activation('relu')) 44 | model.add(Dropout(0.5)) 45 | 46 | model.add(Dense(self.classes)) 47 | model.add(Activation('softmax')) 48 | 49 | optimizer = Adam(lr=0.00001, decay=8e-08) 50 | model.compile(loss='categorical_crossentropy', 51 | optimizer=optimizer, 52 | metrics=['accuracy']) 53 | 54 | return model 55 | 56 | def train(self, shuffle=True): 57 | 58 | if shuffle: 59 | idxes = [x for x in range(len(self.X))] 60 | random.shuffle(idxes) 61 | X_NEW = [] 62 | Y_NEW = [] 63 | 64 | for i in idxes: 65 | X_NEW.append(self.X[i]) 66 | Y_NEW.append(self.Y[i]) 67 | 68 | X = np.array(X_NEW) 69 | Y = np.array(Y_NEW) 70 | 71 | acc = self.model.fit( 72 | X, 73 | Y, 74 | batch_size=8, 75 | epochs=300, 76 | verbose=1, 77 | callbacks=[self.checkpoint_cb], 78 | validation_split=0.4 79 | ) 80 | 81 | return acc 82 | 83 | def predict(self, X): 84 | answer = self.model.predict(np.array([X])) 85 | idx = np.argmax(answer) 86 | return self.classes[idx] 87 | -------------------------------------------------------------------------------- /FlashRL/lib/Environment.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from PIL import Image 4 | import importlib 5 | #from keras.models import load_model 6 | dir_path = os.path.dirname(os.path.realpath(__file__)) 7 | 8 | class Environment: 9 | def __init__(self, env_name, fps=10, frame_callback=None, grayscale=False, normalized=False): 10 | self.fps = fps 11 | self.frame_count = 0 12 | self.grayscale = grayscale 13 | self.normalized = normalized 14 | self.frame_callback = frame_callback 15 | self.env_name = env_name 16 | self.path = os.path.join(dir_path, "..", "contrib", "environments", self.env_name) 17 | 18 | if not os.path.isdir(self.path): 19 | self.path = os.path.join("contrib", "environments", self.env_name) 20 | 21 | if not os.path.isdir(self.path): 22 | raise FileExistsError("The specified environment \"%s\" could not be found." % self.env_name) 23 | 24 | self.env_config = self.load_config() 25 | self.swf = self.env_config["swf"] 26 | self.model_path = os.path.join(self.path ,self.env_config["model"]) 27 | self.dataset = self.env_config["dataset"] 28 | self.action_space = self.env_config["action_space"] 29 | self.action_names = self.env_config["action_names"] 30 | self.state_space = self.env_config["state_space"] 31 | 32 | try: 33 | #self.model = load_model(self.model_path) 34 | print("Self.model initialize would occur here") 35 | self.model = None 36 | except OSError as e: 37 | print("No state prediction!") 38 | self.model = None 39 | """# Missing model, prompt for collecting training data 40 | ynq = None 41 | while ynq not in ["y", "n", "q"]: 42 | ynq = input("State prediction model is missing. Collect training data? (Q for quit): ").lower() 43 | 44 | if ynq == "n" or ynq == "q": 45 | print("Exiting!") 46 | exit(0) 47 | else: 48 | print("Starting Game Mode, Collecting unlabeled images") 49 | """ 50 | 51 | 52 | def load_config(self): 53 | spec = importlib.util.spec_from_file_location("module.define", os.path.join(self.path, "__init__.py")) 54 | mod = importlib.util.module_from_spec(spec) 55 | spec.loader.exec_module(mod) 56 | return mod.define 57 | 58 | def setup(self, vnc): 59 | self.vnc = vnc 60 | self.vnc.send_mouse("Left", (self.vnc.screen.size[0], 0)) 61 | self.vnc.add_callback(1 / self.fps, self.on_frame) 62 | 63 | def preprocess(self, pil_image): 64 | img = pil_image.resize((self.state_space[0], self.state_space[1]), Image.ANTIALIAS) 65 | if self.grayscale: 66 | img = img.convert("L") 67 | else: 68 | img = img.convert('RGB') 69 | data = np.array(img) 70 | 71 | if self.normalized: 72 | data = data / 255 73 | 74 | return data 75 | 76 | # NN-Tr XD lets go 77 | 78 | def render(self): 79 | img = self.vnc.screen.get_array() 80 | img = Image.fromarray(img) 81 | arr_img = self.preprocess(img) 82 | return np.array([arr_img]) 83 | 84 | def on_frame(self): 85 | state = self.render() 86 | state_type = None 87 | if self.model: 88 | state_type = self.action_names[np.argmax(self.model.predict(state))] 89 | 90 | if self.frame_callback: 91 | self.frame_callback(state, self.frame_count, state_type, self.vnc) 92 | 93 | self.frame_count += 1 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FlashRL - Flash Platform for Reinforcement Learning 2 | 3 | For the updated version of FlashRL, go to [this link](https://github.com/cair/rl). 4 | 5 | # TODO List 6 | * Fix pyVNC issue. Currently pyVNC fails to start a VNC server for the game to run on. We need to solve this issue in order to run our games in headless mode. 7 | * Begin developing custom environments. 8 | * Begin developing Docker containers for our code to run in. Preferably, create a Dockerfile that can be used to run custom environments without the need for the local machine to have all the dependencies installed. 9 | 10 | # Prerequisites 11 | * Ubuntu 18.04 (Our most recent testing of 20.04 proves that it does not work.) 12 | * Python 3.x.x (Python 3.6.8 is tested) 13 | * gnash 14 | * xvfb 15 | 16 | # Installation 17 | For our testing, we have been working in a python virtual environment. 18 | ```bash 19 | sudo apt-get install xvfb 20 | sudo apt-get install gnash 21 | sudo apt-get install vnc4server 22 | # I would reccomend doing the next steps inside a virtual environment. 23 | pip install git+https://github.com/cair/pyVNC 24 | pip install git+https://github.com/JDaniel41/FlashRL 25 | ``` 26 | 27 | # Deploy new environment 28 | Developers are able to import custom environments through ```project/contrib/environments/``` 29 | 30 | A typical custom implementation looks like this: 31 | ```python 32 | - project 33 | - __init__.py 34 | - main.py 35 | - contrib 36 | - environments 37 | - env_name 38 | - __init__.py 39 | - dataset.p 40 | - model.h5 41 | - env.swf 42 | 43 | ``` 44 | in the following section, we demonstrate how to implement the flash game Mujaffa as an environment for FlashRL. 45 | 46 | ## Mujaffa-1.6 47 | ### Prerequisites 48 | * SWF Game File 49 | * Python 3x 50 | * Keras 51 | 52 | ### 53 | * Create directory structure ```mkdir -p contrib/environments/mujaffa-v1.6``` 54 | * Create Configuration file: 55 | ```python 56 | echo "define = { 57 | "swf": "mujaffa.swf", 58 | "model": "model.h5", 59 | "dataset": "dataset.p", 60 | "scenes": [], 61 | "state_space": (84, 84, 3) 62 | }" > contrib/environments/mujaffa-v1.6/__init__.py 63 | ``` 64 | 65 | * Add swf "mujaffa.swf" to ```contrib/environments/mujaffa-v1.6/``` 66 | * Create file ```main.py in project root``` with following template 67 | 68 | ``` 69 | from FlashRL import Game 70 | 71 | def on_frame(state, type, vnc): 72 | # vnc.send_key("a") # Sends the key "a" 73 | # vnc.send_mouse("Left", (200, 200)) # Left Clicks at x=200, y=200 74 | # vnc.send_mouse("Right", (200, 200)) # Right Clicks at x=200, y=200 75 | pass 76 | 77 | g = Game("mujaffa-v1.6", fps=10, frame_callback=on_frame, grayscale=True, normalized=True) 78 | ``` 79 | 80 | 81 | # Licence 82 | Copyright 2017/2018 Per-Arne Andersen 83 | 84 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 85 | 86 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 87 | 88 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 89 | -------------------------------------------------------------------------------- /tests/state_machine/Multitask.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import numpy as np 3 | import time 4 | 5 | import pygame 6 | from pyVNC.constants import K_LEFT, K_RIGHT 7 | from PIL import Image 8 | from rl.Main import DQN 9 | from state.multitask_one.model import StateModel 10 | import os 11 | 12 | dir_path = os.path.dirname(os.path.realpath(__file__)) 13 | 14 | 15 | class Multitask: 16 | 17 | def state_debugger(self, s, s1, r): 18 | du = np.sum((s - s1)**2) 19 | label = self.pygame_font.render(str(r), 1, (255, 0, 0)) 20 | label2 = self.pygame_font.render(str(du), 1, (255, 0, 0)) 21 | label_surface = pygame.Surface((84, 84)) 22 | label_surface.blit(label, (0, 0)) 23 | label_surface.blit(label2, (0, 48)) 24 | 25 | score = pygame.surfarray.array3d(label_surface).swapaxes(0, 1) 26 | concat_it = np.concatenate((s, score, s1), axis=1) 27 | 28 | im = Image.fromarray(concat_it.astype('uint8')) 29 | im.save("test.png") 30 | 31 | 32 | def create_training_data(self, clazz, s): 33 | pass 34 | 35 | 36 | def handle_menu(self): 37 | 38 | if not self.has_trained: 39 | print(self.episode_reward, self.action_distribution, self.q_model.memory.count) 40 | self.episode_reward = 0 41 | self.action_distribution = [0 for x in self.action_space] 42 | 43 | # Train for 10 epochs 44 | for i in range(100): 45 | self.q_model.train() 46 | 47 | # Set trained flag to true 48 | self.has_trained = True 49 | 50 | if not self.has_pressed_menu: 51 | self.py_vnc.send_key("q") 52 | self.stage_time_dy = None 53 | self.has_pressed_menu = True 54 | 55 | def on_frame(self): 56 | pass 57 | 58 | def __init__(self, py_vnc): 59 | self.py_vnc = py_vnc 60 | self.py_vnc.send_mouse("Left", (self.py_vnc.screen.size[0], 0)) 61 | self.py_vnc.add_callback(1 / 10, self.on_frame) # 10 FPS 62 | self.model = StateModel(True) 63 | self.action_space = [K_LEFT, K_RIGHT, None] 64 | self.q_model = DQN((84, 84, 3), self.action_space) 65 | 66 | self.EPSILON_DECAY = (self.q_model.EPSILON_END - self.q_model.EPSILON_START) / 10000 67 | self.image_save_path = os.path.join(dir_path, "..", "state", "unlabeled") 68 | 69 | self.has_trained = False 70 | self.is_clean = True 71 | self.has_pressed_menu = False 72 | self.has_pressed_score = False 73 | self.been_terminal = False 74 | 75 | self.stage_time_counter = 0 76 | self.stage_time_dy = None 77 | self.stage_time_max = 120 78 | 79 | self.action_distribution = None 80 | self.episode_reward = 0 81 | 82 | # DEbugging stuff 83 | self.pygame_font = pygame.font.SysFont("monospace", 18) 84 | 85 | while True: 86 | raw_img = self.render() 87 | s = self.model.preprocess(raw_img) 88 | 89 | predicted = self.model.predict(s) 90 | print(predicted) 91 | 92 | if predicted == "menu": 93 | self.been_terminal = False 94 | self.handle_menu() 95 | 96 | elif predicted == "prompt": 97 | self.py_vnc.send_key("x") 98 | 99 | elif predicted in ["terminal_1", "terminal_2"]: 100 | self.been_terminal = True 101 | if not self.is_clean: 102 | self.q_model.memory.buffer[self.q_model.memory.count - 1][2] = -1 103 | #print("setting last in pair to negative") 104 | time.sleep(3) 105 | self.py_vnc.send_key("q") 106 | 107 | self.is_clean = True 108 | self.has_pressed_menu = False 109 | 110 | elif predicted == "stage" or True: 111 | if not self.been_terminal: 112 | self.is_clean = False 113 | self.has_trained = False 114 | # 0. Observe (s) 115 | # 1. Do action 116 | # 2. Observe 117 | # 3. Train 118 | # 4. set state+1 to state 119 | 120 | a_idx = self.q_model.act(np.array([s])) 121 | a = self.action_space[a_idx] 122 | self.action_distribution[a_idx] += 1 123 | if a is not None: 124 | self.py_vnc.send_key(a, duration=.1) 125 | 126 | time.sleep(.5) 127 | 128 | raw_img = self.render() 129 | s1 = self.model.preprocess(raw_img) 130 | r = 0.01 131 | self.episode_reward += r 132 | 133 | self.q_model.memory.add([s, a_idx, r, s1, False]) 134 | 135 | 136 | #Debug stuff 137 | self.state_debugger(s, s1, r) 138 | 139 | time.sleep(.1) 140 | 141 | def render(self): 142 | img = self.py_vnc.screen.get_array() 143 | img = Image.fromarray(img) 144 | return img 145 | -------------------------------------------------------------------------------- /tests/rl/Main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import random 4 | import numpy as np 5 | from keras import backend as K 6 | from keras.models import load_model 7 | from keras.optimizers import RMSprop 8 | from keras.engine import Input, Model 9 | from keras.utils.vis_utils import plot_model 10 | from keras.layers.convolutional import Conv2D 11 | from keras.layers.core import Flatten, Dense, Lambda, Reshape 12 | 13 | from rl.Memory import Memory 14 | from rl.settings import settings 15 | 16 | 17 | class DQN: 18 | def __init__(self, state_size, action_space): 19 | self.memory = Memory(settings["memory_size"]) 20 | 21 | # Parameters 22 | self.LEARNING_RATE = settings["learning_rate"] 23 | self.BATCH_SIZE = settings["batch_size"] 24 | self.GAMMA = settings["discount_factor"] 25 | 26 | # Epsilon decent 27 | self.EPSILON_START = settings["epsilon_start"] 28 | self.EPSILON_END = settings["epsilon_end"] 29 | self.EPSILON_DECAY = (self.EPSILON_END - self.EPSILON_START) / settings["epsilon_steps"] 30 | self.epsilon = self.EPSILON_START 31 | 32 | # Exploration parameters (fully random play) 33 | self.EXPLORATION_WINS = settings["exploration_wins"] 34 | self.EXPLORATION_WINS_COUNTER = 0 35 | 36 | # Episode data 37 | self.episode = 0 # Episode Count 38 | self.episode_loss = 0 # Loss sum of a episode 39 | self.episode_reward = 0 # Reward sum of a episode 40 | self.frame = 0 # Frame counter 41 | self.loss_list = [] 42 | 43 | # State data 44 | self.state = None 45 | self.state_size = state_size 46 | 47 | self.is_clean = True 48 | 49 | # Action data 50 | self.action_size = len(action_space) 51 | 52 | #self.model = load_model(last_checkpoint) 53 | #self.target_model = load_model(last_checkpoint) 54 | 55 | self.model = self.build_model() 56 | self.target_model = self.build_model() 57 | 58 | self.model.summary() 59 | print("DQNRUnner inited!") 60 | print("State size is: %s,%s,%s" % self.state_size) 61 | print("Action size is: %s" % self.action_size) 62 | print("Batch size is: %s " % self.BATCH_SIZE) 63 | 64 | def reset(self, state): 65 | 66 | # Get new initial state 67 | self.state = state 68 | 69 | # Update target model 70 | if self.target_model: 71 | self.update_target_model() 72 | 73 | # Save target model 74 | model_name = "./save/dqn_p%s_%s.h5" % (self.player.id, int(time.time())) 75 | self.save(model_name) 76 | else: 77 | pass 78 | # Lost the round, delete memories 79 | # self.memory.remove_n(self.iteration) 80 | 81 | # Print output 82 | print("Episode: %s, Epsilon: %s, Reward: %s, Loss: %s, Memory: %s" % ( 83 | self.episode, self.epsilon, self.episode_reward, self.episode_loss / (self.frame + 1), self.memory.count)) 84 | 85 | self.frame = 0 86 | 87 | # Reset loss sum 88 | self.episode_loss = 0 89 | 90 | # Reset episode reward 91 | self.episode_reward = 0 92 | 93 | 94 | # Increase episode 95 | self.episode += 1 96 | 97 | def update_target_model(self): 98 | # copy weights from model to target_model 99 | self.target_model.set_weights(self.model.get_weights()) 100 | 101 | def build_model(self): 102 | # Neural Net for Deep-Q learning Model 103 | 104 | # Image input 105 | input_layer = Input(shape=self.state_size, name='image_input') 106 | x = Conv2D(16, (3, 3), strides=(2, 2), activation='relu', kernel_initializer='uniform', trainable=True)(input_layer) 107 | x = Conv2D(32, (3, 3), strides=(2, 2), activation='relu', kernel_initializer='uniform', trainable=True)(x) 108 | #x = Conv2D(64, (3, 3), strides=(2, 2), activation='relu', kernel_initializer='uniform', trainable=True)(x) 109 | #x = Conv2D(128, (3, 3), strides=(2, 2), activation='relu', kernel_initializer='uniform')(x) 110 | #x = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', kernel_initializer='uniform')(x) 111 | x = Reshape((int(x.shape[1]), int(x.shape[2]), int(x.shape[3])))(x) 112 | x = Flatten()(x) 113 | 114 | # Value Stream 115 | vs = Dense(512, activation="relu", kernel_initializer='uniform')(x) 116 | vs = Dense(1, kernel_initializer='uniform')(vs) 117 | 118 | # Advantage Stream 119 | ad = Dense(512, activation="relu", kernel_initializer='uniform')(x) 120 | ad = Dense(self.action_size, kernel_initializer='uniform')(ad) 121 | 122 | policy = Lambda(lambda w: w[0] - K.mean(w[0]) + w[1])([vs, ad]) 123 | #policy = keras.layers.merge([vs, ad], mode=lambda x: x[0] - K.mean(x[0]) + x[1], output_shape=(self.action_size,)) 124 | 125 | model = Model(inputs=[input_layer], outputs=[policy]) 126 | optimizer = RMSprop(lr=self.LEARNING_RATE) 127 | model.compile(optimizer=optimizer, loss="mse") 128 | #plot_model(model, to_file='./output/model.png', show_shapes=True, show_layer_names=True) 129 | 130 | return model 131 | 132 | @staticmethod 133 | def huber(y_true, y_pred): 134 | cosh = lambda x: (K.exp(x) + K.exp(-x)) / 2 135 | return K.mean(K.log(cosh(y_pred - y_true)), axis=-1) 136 | 137 | def load(self, name): 138 | self.model = load_model(name) 139 | self.target_model = load_model(name) 140 | 141 | def save(self, name): 142 | self.target_model.save(name) 143 | 144 | def train(self): 145 | 146 | if self.memory.count < self.BATCH_SIZE: 147 | return 148 | 149 | batch_loss = 0 150 | memories = self.memory.get(self.BATCH_SIZE) 151 | for s, a, r, s1, terminal in memories: 152 | # Model = We train on 153 | # Target = Draw actions from 154 | 155 | target = r 156 | 157 | tar_s = self.target_model.predict(np.array([s])) 158 | if not terminal: 159 | tar_s1 = self.target_model.predict(np.array([s1])) 160 | target = r + self.GAMMA * np.amax(tar_s1[0]) 161 | 162 | tar_s[0][a] = target 163 | loss = (r + (self.GAMMA * np.amax(tar_s1[0]) - np.amax(tar_s[0]))) ** 2 164 | 165 | history = self.model.fit(np.array([s]), tar_s, epochs=1, batch_size=1, callbacks=[], verbose=0) 166 | batch_loss += loss 167 | self.episode_loss += loss 168 | 169 | def act(self, state): 170 | if np.random.uniform() <= self.epsilon: 171 | return random.randrange(self.action_size) 172 | 173 | # Exploit Q-Knowledge 174 | act_values = self.target_model.predict(state) 175 | 176 | return np.argmax(act_values[0]) # returns action 177 | 178 | def reward_fn(self): 179 | score = 0 180 | return score 181 | 182 | 183 | def update(self): 184 | self.is_clean = False 185 | 186 | # 1. Do action 187 | # 2. Observe 188 | # 3. Train 189 | # 4. set state+1 to state 190 | action = self.act() 191 | self.action_distribution[action] += 1 192 | s, a, s1, r, terminal, _ = self.state, action, *self.game.step(self.player, action, settings["grayscale"]) 193 | 194 | reward = self.reward_fn() 195 | 196 | self.memory.add([s, a, reward, s1, terminal]) 197 | 198 | self.frame += 1 199 | self.state = s1 200 | self.episode_reward += reward 201 | 202 | self.epsilon = max(0, self.epsilon + self.EPSILON_DECAY) 203 | 204 | --------------------------------------------------------------------------------