├── tests
    ├── __init__.py
    ├── main.py
    ├── rl
    │   ├── settings.py
    │   ├── Memory.py
    │   └── Main.py
    └── state_machine
    │   └── Multitask.py
├── FlashRL
    ├── lib
    │   ├── __init__.py
    │   ├── util
    │   │   ├── __init__.py
    │   │   ├── preprocess.py
    │   │   ├── state_trainer.py
    │   │   └── model.py
    │   ├── GameEnvironment.py
    │   ├── Game.py
    │   └── Environment.py
    ├── contrib
    │   ├── __init__.py
    │   └── environments
    │   │   ├── __init__.py
    │   │   └── multitask
    │   │       ├── multitaskgame.swf
    │   │       └── __init__.py
    └── __init__.py
├── examples
    ├── __init__.py
    ├── mujaffa
    │   ├── __init__.py
    │   ├── contrib
    │   │   └── environments
    │   │   │   └── mujaffa-v1.6
    │   │   │       ├── mujaffa.swf
    │   │   │       └── __init__.py
    │   └── main.py
    └── multitask
    │   └── main.py
├── __init__.py
├── .gitignore
├── requirements.txt
├── setup.py
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/FlashRL/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/FlashRL/contrib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/FlashRL/lib/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/mujaffa/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from lib.Game import Game


--------------------------------------------------------------------------------
/FlashRL/contrib/environments/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/FlashRL/__init__.py:
--------------------------------------------------------------------------------
1 | from .lib.Game import Game


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .vscode
3 | test.png
4 | .idea


--------------------------------------------------------------------------------
/tests/main.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | if __name__ == "__main__":
4 |     pass


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | selenium
2 | easyprocess
3 | git+https://github.com/UIA-CAIR/pyVNC
4 | pyvirtualdisplay
5 | tensorflow
6 | tensorflow-gpu
7 | h5py
8 | keras


--------------------------------------------------------------------------------
/FlashRL/contrib/environments/multitask/multitaskgame.swf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cair/FlashRL/HEAD/FlashRL/contrib/environments/multitask/multitaskgame.swf


--------------------------------------------------------------------------------
/examples/mujaffa/contrib/environments/mujaffa-v1.6/mujaffa.swf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cair/FlashRL/HEAD/examples/mujaffa/contrib/environments/mujaffa-v1.6/mujaffa.swf


--------------------------------------------------------------------------------
/examples/mujaffa/main.py:
--------------------------------------------------------------------------------
1 | from FlashRL.lib.Game import Game
2 | 
3 | 
4 | def on_frame(state, frame, type, vnc):
5 |     print(state.shape, type)
6 | 
7 | Game("mujaffa-v1.6", fps=1, frame_callback=on_frame, grayscale=True, normalized=True)
8 | 


--------------------------------------------------------------------------------
/examples/multitask/main.py:
--------------------------------------------------------------------------------
 1 | from FlashRL.lib.Game import Game
 2 | 
 3 | 
 4 | def on_frame(state, frame, type, vnc):
 5 |     pass
 6 | 
 7 | 
 8 | 
 9 | 
10 | Game("multitask", fps=10, frame_callback=on_frame, grayscale=True, normalized=True)
11 | 


--------------------------------------------------------------------------------
/FlashRL/contrib/environments/multitask/__init__.py:
--------------------------------------------------------------------------------
1 | define = {
2 |     "swf": "multitaskgame.swf",
3 |     "model": "model.h5",
4 |     "dataset": "dataset.p",
5 |     "action_space": 9,
6 |     "action_names": ["terminal", "menu", "score", "lll", "terminal_1", "stage", "score", "stage", "transition"],
7 |     "state_space": (84, 84, 3)
8 | }


--------------------------------------------------------------------------------
/examples/mujaffa/contrib/environments/mujaffa-v1.6/__init__.py:
--------------------------------------------------------------------------------
1 | define = {
2 |     "swf": "mujaffa.swf",
3 |     "model": "model.h5",
4 |     "dataset": "dataset.p",
5 |     "action_space": 4,
6 |     "action_names": ["terminal", "menu", "score", "lll", "terminal_1", "stage", "score", "stage", "transition"],
7 |     "state_space": (84, 84, 3)
8 | }


--------------------------------------------------------------------------------
/tests/rl/settings.py:
--------------------------------------------------------------------------------
 1 | settings = {
 2 |     "learning_rate": 1e-6,
 3 |     "memory_size": 1000000,                 # 1 Million frames in memory
 4 |     "epsilon_start": 0.5,                         # Start of epsilon decent
 5 |     "epsilon_end": 0.0,                     # End of epsilon decent
 6 |     "epsilon_steps": 100000,                     # Epsilon steps
 7 |     "exploration_wins": 0,                  # Number of victories using random moves before starting epsilon phase
 8 |     "batch_size": 16,
 9 |     "discount_factor":  0.99,
10 |     "grayscale": False,
11 |     "load_latest_checkpoint": False,
12 | }


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | import os
 3 | 
 4 | 
 5 | 
 6 | setup(
 7 |     name='FlashRL',    # This is the name of your PyPI-package.
 8 |     version='1.0',                          # Update the version number for new releases
 9 |     install_requires=['numpy', 'pygame', "pyVNC", "pillow", "scipy", "easyprocess", "pyvirtualdisplay", "keras", "h5py"],
10 |     packages=["FlashRL.lib", "FlashRL.lib.util", "FlashRL.contrib", "FlashRL.contrib.environments", "FlashRL.contrib.environments.multitask"],
11 |     package_data={
12 |         '': ['*.swf'],
13 |     },
14 |     scripts=[],
15 |     dependency_links=[
16 |         "git+ssh://git@github.com:UIA-CAIR/pyVNC.git"
17 |     ]
18 | )
19 | 


--------------------------------------------------------------------------------
/FlashRL/lib/GameEnvironment.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | from easyprocess import EasyProcess
 3 | from pyvirtualdisplay import Display
 4 | import os
 5 | 
 6 | class GameEnvironment(threading.Thread):
 7 |     def __init__(self, display, env):
 8 |         threading.Thread.__init__(self)
 9 |         #super(self)
10 |         self.display = display
11 |         self.env = env
12 | 
13 |     def run(self):
14 |         self.vnc(self.display)
15 | 
16 |     def vnc(self, vnc_display):
17 |         os.environ["DISPLAY"] = vnc_display
18 |         with Display(backend='xvnc', rfbport=5902, size=(223, 150)) as disp:
19 |             with EasyProcess(' '.join(['gnash', os.path.join(self.env.path, self.env.swf), "--width", "150", "--height", "150","--render-mode", "1", "--hide-menubar"])) as proc:
20 |                 proc.wait()


--------------------------------------------------------------------------------
/tests/rl/Memory.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | 
 4 | class Memory:
 5 | 
 6 |     def __init__(self, memory_size):
 7 |         self.buffer = []
 8 |         self.count = 0
 9 |         self.max_memory_size = memory_size
10 | 
11 |     def _recalibrate(self):
12 |         self.count = len(self.buffer)
13 | 
14 |     def remove_n(self, n):
15 |         self.buffer = self.buffer[n-1:-1]
16 |         self._recalibrate()
17 | 
18 |     def add(self, memory):
19 |         self.buffer.append(memory)
20 |         self.count += 1
21 | 
22 |         if self.count > self.max_memory_size:
23 |             self.buffer.pop(0)
24 |             self.count -= 1
25 | 
26 |     def get(self, batch_size=1):
27 |         if self.count <= batch_size:
28 |             return np.array(self.buffer)
29 | 
30 |         return np.array(random.sample(self.buffer, batch_size))
31 | 


--------------------------------------------------------------------------------
/FlashRL/lib/Game.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | from pyVNC.Client import Client
 4 | from .GameEnvironment import GameEnvironment
 5 | from .Environment import Environment
 6 | 
 7 | class Game:
 8 |     def __init__(self, environment_name, fps=10, frame_callback=None, grayscale=False, normalized=False):
 9 |         original_display = os.environ["DISPLAY"]
10 |         vnc_display = ":98"
11 |         print("Initialize xVNC")
12 |         print("---------------")
13 |         print("Display: %s" % original_display)
14 |         print("VNC Display: %s" % vnc_display)
15 | 
16 |         env = Environment(environment_name, fps=fps, frame_callback=frame_callback, grayscale=grayscale, normalized=normalized)
17 | 
18 |         x_vnc = GameEnvironment(vnc_display, env)
19 |         x_vnc.start()
20 | 
21 |         time.sleep(1)
22 | 
23 |         os.environ["DISPLAY"] = original_display
24 |         py_vnc = Client(host="127.0.0.1", port=5902, gui=True, array=True)
25 |         py_vnc.start()
26 | 
27 |         time.sleep(1)
28 | 
29 |         env.setup(py_vnc)
30 | 


--------------------------------------------------------------------------------
/FlashRL/lib/util/preprocess.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import numpy as np
 3 | import pickle
 4 | from state.multitask_one.model import StateModel
 5 | import os
 6 | if __name__ == "__main__":
 7 |     model = StateModel()
 8 | 
 9 |     image_class_path = os.path.join(os.getcwd(), "images", "training")
10 |     image_classes = [x for x in os.listdir(image_class_path)]
11 |     image_classes_path = [os.path.join(image_class_path, x) for x in image_classes]
12 |     n_classes = len(image_classes)
13 |     print(n_classes)
14 |     X = []
15 |     Y = []
16 |     for i, class_path in enumerate(image_classes_path):
17 |         files = [os.path.join(class_path, x) for x in os.listdir(class_path)]
18 | 
19 |         for image_path in files:
20 |             img = Image.open(image_path)
21 | 
22 |             # Preprocess image
23 |             np_img = model.preprocess(img)
24 | 
25 |             # Create class label
26 |             y = np.zeros(shape=(n_classes, ))
27 |             y[i] = 1
28 | 
29 |             # Add to dataset
30 |             X.append(np_img)
31 |             Y.append(y)
32 | 
33 |     pickle.dump((np.array(X), np.array(Y)), open(os.path.join(os.getcwd(), "dataset.p"), "wb"))
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/FlashRL/lib/util/state_trainer.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import importlib.util
 4 | import pickle
 5 | from model import Model
 6 | dir_path = os.path.dirname(os.path.realpath(__file__))
 7 | 
 8 | class Trainer:
 9 | 
10 |     def __init__(self, environment_path):
11 |         self.env_path = environment_path
12 |         self.module_path = os.path.join(self.env_path, "__init__.py")
13 | 
14 |         self.env_config = None
15 |         self.env_training_data = None
16 | 
17 |         self.load_environment_config()
18 |         self.load_training_data()
19 | 
20 |         self.env_model_path = os.path.join(self.env_path, self.env_config["model"])
21 | 
22 |     def load_environment_config(self):
23 |         spec = importlib.util.spec_from_file_location("module.define", self.module_path)
24 |         mod = importlib.util.module_from_spec(spec)
25 |         spec.loader.exec_module(mod)
26 |         self.env_config = mod.define
27 | 
28 |         if self.env_config is None:
29 |             raise RuntimeError("Environment configuration is empty!")
30 | 
31 |     def load_training_data(self):
32 |         self.env_training_data = pickle.load(open(os.path.join(self.env_path, self.env_config["dataset"]), "rb"))
33 | 
34 |     def train(self):
35 |         m = Model(self.env_training_data, self.env_model_path)
36 |         m.train()
37 |         
38 | 
39 | if __name__ == "__main__": 
40 | 
41 |     for env in glob.glob(os.path.join(dir_path, "..", "environments/*")):
42 |         trainer = Trainer(env)
43 |         trainer.train()
44 |         


--------------------------------------------------------------------------------
/FlashRL/lib/util/model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import numpy as np
 4 | from PIL.Image import ANTIALIAS
 5 | from keras.callbacks import ModelCheckpoint
 6 | from keras.layers import Conv2D, Activation, MaxPooling2D, Flatten, Dense, Dropout
 7 | from keras.models import Sequential
 8 | from keras.optimizers import RMSprop, Adam
 9 | 
10 | class Model:
11 | 
12 |     def __init__(self, training_data, model_path):
13 |         self.training_data = training_data
14 |         self.input_shape = self.training_data[0].shape[1:]
15 |         self.classes = self.training_data[1].shape[1]
16 |         self.checkpoint_cb = ModelCheckpoint(model_path, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=5)
17 |         self.X = self.training_data[0]
18 |         self.Y = self.training_data[1]
19 |         self.model = self.build_model()
20 | 
21 |     def build_model(self):
22 | 
23 |         model = Sequential()
24 |         model.add(Conv2D(32, (3, 3), data_format="channels_last", input_shape=self.input_shape))
25 |         model.add(Activation('relu'))
26 |         model.add(MaxPooling2D(pool_size=(2, 2)))
27 | 
28 |         model.add(Conv2D(32, (3, 3),  data_format="channels_last"))
29 |         model.add(Activation('relu'))
30 |         model.add(MaxPooling2D(pool_size=(2, 2)))
31 | 
32 |         model.add(Conv2D(64, (3, 3),  data_format="channels_last"))
33 |         model.add(Activation('relu'))
34 |         model.add(MaxPooling2D(pool_size=(2, 2)))
35 |         
36 |         model.add(Conv2D(64, (3, 3),  data_format="channels_last"))
37 |         model.add(Activation('relu'))
38 |         model.add(MaxPooling2D(pool_size=(2, 2)))
39 | 
40 | 
41 |         model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
42 |         model.add(Dense(512))
43 |         model.add(Activation('relu'))
44 |         model.add(Dropout(0.5))
45 | 
46 |         model.add(Dense(self.classes))
47 |         model.add(Activation('softmax'))
48 | 
49 |         optimizer = Adam(lr=0.00001, decay=8e-08)
50 |         model.compile(loss='categorical_crossentropy',
51 |                       optimizer=optimizer,
52 |                       metrics=['accuracy'])
53 | 
54 |         return model
55 | 
56 |     def train(self, shuffle=True):
57 | 
58 |         if shuffle:
59 |             idxes = [x for x in range(len(self.X))]
60 |             random.shuffle(idxes)
61 |             X_NEW = []
62 |             Y_NEW = []
63 | 
64 |             for i in idxes:
65 |                 X_NEW.append(self.X[i])
66 |                 Y_NEW.append(self.Y[i])
67 | 
68 |             X = np.array(X_NEW)
69 |             Y = np.array(Y_NEW)
70 | 
71 |         acc = self.model.fit(
72 |             X,
73 |             Y,
74 |             batch_size=8,
75 |             epochs=300,
76 |             verbose=1,
77 |             callbacks=[self.checkpoint_cb],
78 |             validation_split=0.4
79 |         )
80 | 
81 |         return acc
82 | 
83 |     def predict(self, X):
84 |         answer = self.model.predict(np.array([X]))
85 |         idx = np.argmax(answer)
86 |         return self.classes[idx]
87 | 


--------------------------------------------------------------------------------
/FlashRL/lib/Environment.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from PIL import Image
 4 | import importlib
 5 | #from keras.models import load_model
 6 | dir_path = os.path.dirname(os.path.realpath(__file__))
 7 | 
 8 | class Environment:
 9 |     def __init__(self, env_name, fps=10, frame_callback=None, grayscale=False, normalized=False):
10 |         self.fps = fps
11 |         self.frame_count = 0
12 |         self.grayscale = grayscale
13 |         self.normalized = normalized
14 |         self.frame_callback = frame_callback
15 |         self.env_name = env_name
16 |         self.path = os.path.join(dir_path, "..", "contrib", "environments", self.env_name)
17 | 
18 |         if not os.path.isdir(self.path):
19 |             self.path = os.path.join("contrib", "environments", self.env_name)
20 | 
21 |             if not os.path.isdir(self.path):
22 |                 raise FileExistsError("The specified environment \"%s\" could not be found." % self.env_name)
23 | 
24 |         self.env_config = self.load_config()
25 |         self.swf = self.env_config["swf"]
26 |         self.model_path = os.path.join(self.path ,self.env_config["model"])
27 |         self.dataset = self.env_config["dataset"]
28 |         self.action_space = self.env_config["action_space"]
29 |         self.action_names = self.env_config["action_names"]
30 |         self.state_space = self.env_config["state_space"]
31 | 
32 |         try:
33 |             #self.model = load_model(self.model_path)
34 |             print("Self.model initialize would occur here")
35 |             self.model = None
36 |         except OSError as e:
37 |             print("No state prediction!")
38 |             self.model = None
39 |             """# Missing model, prompt for collecting training data
40 |             ynq = None
41 |             while ynq not in ["y", "n", "q"]:
42 |                 ynq = input("State prediction model is missing. Collect training data? (Q for quit): ").lower()
43 |             
44 |             if ynq == "n" or ynq == "q":
45 |                 print("Exiting!")
46 |                 exit(0)
47 |             else:
48 |                 print("Starting Game Mode, Collecting unlabeled images")
49 |             """
50 | 
51 | 
52 |     def load_config(self):
53 |         spec = importlib.util.spec_from_file_location("module.define", os.path.join(self.path, "__init__.py"))
54 |         mod = importlib.util.module_from_spec(spec)
55 |         spec.loader.exec_module(mod)
56 |         return mod.define
57 | 
58 |     def setup(self, vnc):
59 |         self.vnc = vnc
60 |         self.vnc.send_mouse("Left", (self.vnc.screen.size[0], 0))
61 |         self.vnc.add_callback(1 / self.fps, self.on_frame)
62 | 
63 |     def preprocess(self, pil_image):
64 |         img = pil_image.resize((self.state_space[0], self.state_space[1]), Image.ANTIALIAS)
65 |         if self.grayscale:
66 |             img = img.convert("L")
67 |         else:
68 |             img = img.convert('RGB')
69 |         data = np.array(img)
70 | 
71 |         if self.normalized:
72 |             data = data / 255
73 | 
74 |         return data
75 | 
76 |         # NN-Tr XD lets go
77 | 
78 |     def render(self):
79 |         img = self.vnc.screen.get_array()
80 |         img = Image.fromarray(img)
81 |         arr_img = self.preprocess(img)
82 |         return np.array([arr_img])
83 | 
84 |     def on_frame(self):
85 |         state = self.render()
86 |         state_type = None
87 |         if self.model:
88 |             state_type = self.action_names[np.argmax(self.model.predict(state))]
89 | 
90 |         if self.frame_callback:
91 |             self.frame_callback(state, self.frame_count, state_type, self.vnc)
92 | 
93 |         self.frame_count += 1
94 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # FlashRL - Flash Platform for Reinforcement Learning
 2 | 
 3 | For the updated version of FlashRL, go to [this link](https://github.com/cair/rl).
 4 | 
 5 | # TODO List
 6 | * Fix pyVNC issue. Currently pyVNC fails to start a VNC server for the game to run on. We need to solve this issue in order to run our games in headless mode.
 7 | * Begin developing custom environments.
 8 | * Begin developing Docker containers for our code to run in. Preferably, create a Dockerfile that can be used to run custom environments without the need for the local machine to have all the dependencies installed.
 9 | 
10 | # Prerequisites
11 | * Ubuntu 18.04 (Our most recent testing of 20.04 proves that it does not work.)
12 | * Python 3.x.x (Python 3.6.8 is tested)
13 | * gnash
14 | * xvfb
15 | 
16 | # Installation
17 | For our testing, we have been working in a python virtual environment.
18 | ```bash
19 | sudo apt-get install xvfb
20 | sudo apt-get install gnash
21 | sudo apt-get install vnc4server
22 | # I would reccomend doing the next steps inside a virtual environment.
23 | pip install git+https://github.com/cair/pyVNC
24 | pip install git+https://github.com/JDaniel41/FlashRL
25 | ```
26 | 
27 | # Deploy new environment
28 | Developers are able to import custom environments through ```project/contrib/environments/```
29 | 
30 | A typical custom implementation looks like this:
31 | ```python
32 | - project
33 |     - __init__.py
34 |     - main.py
35 |     - contrib
36 |         - environments
37 |             - env_name
38 |                 - __init__.py
39 |                 - dataset.p
40 |                 - model.h5
41 |                 - env.swf
42 | 
43 | ```
44 | in the following section, we demonstrate how to implement the flash game Mujaffa as an environment for FlashRL.
45 | 
46 | ## Mujaffa-1.6
47 | ### Prerequisites
48 | * SWF Game File
49 | * Python 3x
50 | * Keras
51 | 
52 | ###
53 | *  Create directory structure ```mkdir -p contrib/environments/mujaffa-v1.6```
54 | *  Create Configuration file:  
55 | ```python
56 | echo "define = {
57 |     "swf": "mujaffa.swf",
58 |     "model": "model.h5",
59 |     "dataset": "dataset.p",
60 |     "scenes": [],
61 |     "state_space": (84, 84, 3)
62 | }" > contrib/environments/mujaffa-v1.6/__init__.py
63 | ```
64 | 
65 | * Add swf "mujaffa.swf" to ```contrib/environments/mujaffa-v1.6/```
66 | * Create file ```main.py in project root``` with following template
67 | 
68 | ```
69 | from FlashRL import Game
70 | 
71 | def on_frame(state, type, vnc):
72 |     # vnc.send_key("a") # Sends the key "a"
73 |     # vnc.send_mouse("Left", (200, 200)) # Left Clicks at x=200, y=200
74 |     # vnc.send_mouse("Right", (200, 200)) # Right Clicks at x=200, y=200
75 |     pass
76 | 
77 | g = Game("mujaffa-v1.6", fps=10, frame_callback=on_frame, grayscale=True, normalized=True)
78 | ```
79 | 
80 | 
81 | # Licence
82 | Copyright 2017/2018 Per-Arne Andersen
83 | 
84 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
85 | 
86 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
87 | 
88 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
89 | 


--------------------------------------------------------------------------------
/tests/state_machine/Multitask.py:
--------------------------------------------------------------------------------
  1 | import threading
  2 | import numpy as np
  3 | import time
  4 | 
  5 | import pygame
  6 | from pyVNC.constants import K_LEFT, K_RIGHT
  7 | from PIL import Image
  8 | from rl.Main import DQN
  9 | from state.multitask_one.model import StateModel
 10 | import os
 11 | 
 12 | dir_path = os.path.dirname(os.path.realpath(__file__))
 13 | 
 14 | 
 15 | class Multitask:
 16 | 
 17 |     def state_debugger(self, s, s1, r):
 18 |         du = np.sum((s - s1)**2)
 19 |         label = self.pygame_font.render(str(r), 1, (255, 0, 0))
 20 |         label2 = self.pygame_font.render(str(du), 1, (255, 0, 0))
 21 |         label_surface = pygame.Surface((84, 84))
 22 |         label_surface.blit(label, (0, 0))
 23 |         label_surface.blit(label2, (0, 48))
 24 | 
 25 |         score = pygame.surfarray.array3d(label_surface).swapaxes(0, 1)
 26 |         concat_it = np.concatenate((s, score, s1), axis=1)
 27 | 
 28 |         im = Image.fromarray(concat_it.astype('uint8'))
 29 |         im.save("test.png")
 30 | 
 31 | 
 32 |     def create_training_data(self, clazz, s):
 33 |         pass
 34 | 
 35 | 
 36 |     def handle_menu(self):
 37 | 
 38 |         if not self.has_trained:
 39 |             print(self.episode_reward, self.action_distribution, self.q_model.memory.count)
 40 |             self.episode_reward = 0
 41 |             self.action_distribution = [0 for x in self.action_space]
 42 | 
 43 |             # Train for 10 epochs
 44 |             for i in range(100):
 45 |                 self.q_model.train()
 46 | 
 47 |             # Set trained flag to true
 48 |             self.has_trained = True
 49 | 
 50 |         if not self.has_pressed_menu:
 51 |             self.py_vnc.send_key("q")
 52 |             self.stage_time_dy = None
 53 |             self.has_pressed_menu = True
 54 | 
 55 |     def on_frame(self):
 56 |         pass
 57 | 
 58 |     def __init__(self, py_vnc):
 59 |         self.py_vnc = py_vnc
 60 |         self.py_vnc.send_mouse("Left", (self.py_vnc.screen.size[0], 0))
 61 |         self.py_vnc.add_callback(1 / 10, self.on_frame) # 10 FPS
 62 |         self.model = StateModel(True)
 63 |         self.action_space = [K_LEFT, K_RIGHT, None]
 64 |         self.q_model = DQN((84, 84, 3), self.action_space)
 65 | 
 66 |         self.EPSILON_DECAY = (self.q_model.EPSILON_END - self.q_model.EPSILON_START) / 10000
 67 |         self.image_save_path = os.path.join(dir_path, "..", "state", "unlabeled")
 68 | 
 69 |         self.has_trained = False
 70 |         self.is_clean = True
 71 |         self.has_pressed_menu = False
 72 |         self.has_pressed_score = False
 73 |         self.been_terminal = False
 74 | 
 75 |         self.stage_time_counter = 0
 76 |         self.stage_time_dy = None
 77 |         self.stage_time_max = 120
 78 | 
 79 |         self.action_distribution = None
 80 |         self.episode_reward = 0
 81 | 
 82 |         # DEbugging stuff
 83 |         self.pygame_font = pygame.font.SysFont("monospace", 18)
 84 | 
 85 |         while True:
 86 |             raw_img = self.render()
 87 |             s = self.model.preprocess(raw_img)
 88 | 
 89 |             predicted = self.model.predict(s)
 90 |             print(predicted)
 91 | 
 92 |             if predicted == "menu":
 93 |                 self.been_terminal = False
 94 |                 self.handle_menu()
 95 | 
 96 |             elif predicted == "prompt":
 97 |                 self.py_vnc.send_key("x")
 98 | 
 99 |             elif predicted in ["terminal_1", "terminal_2"]:
100 |                 self.been_terminal = True
101 |                 if not self.is_clean:
102 |                     self.q_model.memory.buffer[self.q_model.memory.count - 1][2] = -1
103 |                     #print("setting last in pair to negative")
104 |                     time.sleep(3)
105 |                     self.py_vnc.send_key("q")
106 | 
107 |                     self.is_clean = True
108 |                     self.has_pressed_menu = False
109 | 
110 |             elif predicted == "stage" or True:
111 |                 if not self.been_terminal:
112 |                     self.is_clean = False
113 |                     self.has_trained = False
114 |                     # 0. Observe (s)
115 |                     # 1. Do action
116 |                     # 2. Observe
117 |                     # 3. Train
118 |                     # 4. set state+1 to state
119 | 
120 |                     a_idx = self.q_model.act(np.array([s]))
121 |                     a = self.action_space[a_idx]
122 |                     self.action_distribution[a_idx] += 1
123 |                     if a is not None:
124 |                         self.py_vnc.send_key(a, duration=.1)
125 | 
126 |                     time.sleep(.5)
127 | 
128 |                     raw_img = self.render()
129 |                     s1 = self.model.preprocess(raw_img)
130 |                     r = 0.01
131 |                     self.episode_reward += r
132 | 
133 |                     self.q_model.memory.add([s, a_idx, r, s1, False])
134 | 
135 | 
136 |                     #Debug stuff
137 |                     self.state_debugger(s, s1, r)
138 | 
139 |             time.sleep(.1)
140 | 
141 |     def render(self):
142 |         img = self.py_vnc.screen.get_array()
143 |         img = Image.fromarray(img)
144 |         return img
145 | 


--------------------------------------------------------------------------------
/tests/rl/Main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import random
  4 | import numpy as np
  5 | from keras import backend as K
  6 | from keras.models import load_model
  7 | from keras.optimizers import RMSprop
  8 | from keras.engine import Input, Model
  9 | from keras.utils.vis_utils import plot_model
 10 | from keras.layers.convolutional import Conv2D
 11 | from keras.layers.core import Flatten, Dense, Lambda, Reshape
 12 | 
 13 | from rl.Memory import Memory
 14 | from rl.settings import settings
 15 | 
 16 | 
 17 | class DQN:
 18 |     def __init__(self, state_size, action_space):
 19 |         self.memory = Memory(settings["memory_size"])
 20 | 
 21 |         # Parameters
 22 |         self.LEARNING_RATE = settings["learning_rate"]
 23 |         self.BATCH_SIZE = settings["batch_size"]
 24 |         self.GAMMA = settings["discount_factor"]
 25 | 
 26 |         # Epsilon decent
 27 |         self.EPSILON_START = settings["epsilon_start"]
 28 |         self.EPSILON_END = settings["epsilon_end"]
 29 |         self.EPSILON_DECAY = (self.EPSILON_END - self.EPSILON_START) / settings["epsilon_steps"]
 30 |         self.epsilon = self.EPSILON_START
 31 | 
 32 |         # Exploration parameters (fully random play)
 33 |         self.EXPLORATION_WINS = settings["exploration_wins"]
 34 |         self.EXPLORATION_WINS_COUNTER = 0
 35 | 
 36 |         # Episode data
 37 |         self.episode = 0  # Episode Count
 38 |         self.episode_loss = 0  # Loss sum of a episode
 39 |         self.episode_reward = 0  # Reward sum of a episode
 40 |         self.frame = 0  # Frame counter
 41 |         self.loss_list = []
 42 | 
 43 |         # State data
 44 |         self.state = None
 45 |         self.state_size = state_size
 46 | 
 47 |         self.is_clean = True
 48 | 
 49 |         # Action data
 50 |         self.action_size = len(action_space)
 51 | 
 52 |         #self.model = load_model(last_checkpoint)
 53 |         #self.target_model = load_model(last_checkpoint)
 54 | 
 55 |         self.model = self.build_model()
 56 |         self.target_model = self.build_model()
 57 | 
 58 |         self.model.summary()
 59 |         print("DQNRUnner inited!")
 60 |         print("State size is: %s,%s,%s" % self.state_size)
 61 |         print("Action size is: %s" % self.action_size)
 62 |         print("Batch size is: %s " % self.BATCH_SIZE)
 63 | 
 64 |     def reset(self, state):
 65 | 
 66 |         # Get new initial state
 67 |         self.state = state
 68 | 
 69 |         # Update target model
 70 |         if self.target_model:
 71 |             self.update_target_model()
 72 | 
 73 |             # Save target model
 74 |             model_name = "./save/dqn_p%s_%s.h5" % (self.player.id, int(time.time()))
 75 |             self.save(model_name)
 76 |         else:
 77 |             pass
 78 |             # Lost the round, delete memories
 79 |             # self.memory.remove_n(self.iteration)
 80 | 
 81 |         # Print output
 82 |         print("Episode: %s, Epsilon: %s, Reward: %s, Loss: %s, Memory: %s" % (
 83 |             self.episode, self.epsilon, self.episode_reward, self.episode_loss / (self.frame + 1), self.memory.count))
 84 | 
 85 |         self.frame = 0
 86 | 
 87 |         # Reset loss sum
 88 |         self.episode_loss = 0
 89 | 
 90 |         # Reset episode reward
 91 |         self.episode_reward = 0
 92 | 
 93 | 
 94 |         # Increase episode
 95 |         self.episode += 1
 96 | 
 97 |     def update_target_model(self):
 98 |         # copy weights from model to target_model
 99 |         self.target_model.set_weights(self.model.get_weights())
100 | 
101 |     def build_model(self):
102 |         # Neural Net for Deep-Q learning Model
103 | 
104 |         # Image input
105 |         input_layer = Input(shape=self.state_size, name='image_input')
106 |         x = Conv2D(16, (3, 3), strides=(2, 2), activation='relu', kernel_initializer='uniform', trainable=True)(input_layer)
107 |         x = Conv2D(32, (3, 3), strides=(2, 2), activation='relu', kernel_initializer='uniform', trainable=True)(x)
108 |         #x = Conv2D(64, (3, 3), strides=(2, 2), activation='relu', kernel_initializer='uniform', trainable=True)(x)
109 |         #x = Conv2D(128, (3, 3), strides=(2, 2), activation='relu', kernel_initializer='uniform')(x)
110 |         #x = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', kernel_initializer='uniform')(x)
111 |         x = Reshape((int(x.shape[1]), int(x.shape[2]), int(x.shape[3])))(x)
112 |         x = Flatten()(x)
113 | 
114 |         # Value Stream
115 |         vs = Dense(512, activation="relu", kernel_initializer='uniform')(x)
116 |         vs = Dense(1, kernel_initializer='uniform')(vs)
117 | 
118 |         # Advantage Stream
119 |         ad = Dense(512, activation="relu", kernel_initializer='uniform')(x)
120 |         ad = Dense(self.action_size, kernel_initializer='uniform')(ad)
121 | 
122 |         policy = Lambda(lambda w: w[0] - K.mean(w[0]) + w[1])([vs, ad])
123 |         #policy = keras.layers.merge([vs, ad], mode=lambda x: x[0] - K.mean(x[0]) + x[1], output_shape=(self.action_size,))
124 | 
125 |         model = Model(inputs=[input_layer], outputs=[policy])
126 |         optimizer = RMSprop(lr=self.LEARNING_RATE)
127 |         model.compile(optimizer=optimizer, loss="mse")
128 |         #plot_model(model, to_file='./output/model.png', show_shapes=True, show_layer_names=True)
129 | 
130 |         return model
131 | 
132 |     @staticmethod
133 |     def huber(y_true, y_pred):
134 |         cosh = lambda x: (K.exp(x) + K.exp(-x)) / 2
135 |         return K.mean(K.log(cosh(y_pred - y_true)), axis=-1)
136 | 
137 |     def load(self, name):
138 |         self.model = load_model(name)
139 |         self.target_model = load_model(name)
140 | 
141 |     def save(self, name):
142 |         self.target_model.save(name)
143 | 
144 |     def train(self):
145 | 
146 |         if self.memory.count < self.BATCH_SIZE:
147 |             return
148 | 
149 |         batch_loss = 0
150 |         memories = self.memory.get(self.BATCH_SIZE)
151 |         for s, a, r, s1, terminal in memories:
152 |             # Model = We train on
153 |             # Target = Draw actions from
154 | 
155 |             target = r
156 | 
157 |             tar_s = self.target_model.predict(np.array([s]))
158 |             if not terminal:
159 |                 tar_s1 = self.target_model.predict(np.array([s1]))
160 |                 target = r + self.GAMMA * np.amax(tar_s1[0])
161 | 
162 |             tar_s[0][a] = target
163 |             loss = (r + (self.GAMMA * np.amax(tar_s1[0]) - np.amax(tar_s[0]))) ** 2
164 | 
165 |             history = self.model.fit(np.array([s]), tar_s, epochs=1, batch_size=1, callbacks=[], verbose=0)
166 |             batch_loss += loss
167 |             self.episode_loss += loss
168 | 
169 |     def act(self, state):
170 |         if np.random.uniform() <= self.epsilon:
171 |             return random.randrange(self.action_size)
172 | 
173 |         # Exploit Q-Knowledge
174 |         act_values = self.target_model.predict(state)
175 | 
176 |         return np.argmax(act_values[0])  # returns action
177 | 
178 |     def reward_fn(self):
179 |         score = 0
180 |         return score
181 | 
182 | 
183 |     def update(self):
184 |         self.is_clean = False
185 | 
186 |         # 1. Do action
187 |         # 2. Observe
188 |         # 3. Train
189 |         # 4. set state+1 to state
190 |         action = self.act()
191 |         self.action_distribution[action] += 1
192 |         s, a, s1, r, terminal, _ = self.state, action, *self.game.step(self.player, action, settings["grayscale"])
193 | 
194 |         reward = self.reward_fn()
195 | 
196 |         self.memory.add([s, a, reward, s1, terminal])
197 | 
198 |         self.frame += 1
199 |         self.state = s1
200 |         self.episode_reward += reward
201 | 
202 |         self.epsilon = max(0, self.epsilon + self.EPSILON_DECAY)
203 | 
204 | 


--------------------------------------------------------------------------------