├── .gitattributes
├── .gitignore
├── Makefile
├── README.md
├── minecraft_deep_learning-source
    ├── __init__.coco
    ├── __main__.coco
    ├── callbacks.coco
    ├── constants.coco
    ├── display.coco
    ├── environment.coco
    ├── main.coco
    ├── memory.coco
    ├── model.coco
    ├── policy.coco
    └── processor.coco
├── saved_weights
    └── weights_final.h5f
├── setup.cfg
└── setup.coco


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.json filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # OS files
  2 | .DS_Store
  3 | Thumbs.db
  4 | *~
  5 | .#*
  6 | ._*
  7 | \#*\#
  8 | 
  9 | # Python files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | *.so
 14 | 
 15 | # Packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | bin/
 30 | *.iml
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | 
 35 | # PyInstaller
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | .hypothesis/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django
 59 | *.log
 60 | local_settings.py
 61 | 
 62 | # Flask
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy
 67 | .scrapy
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter
 73 | .ipynb_checkpoints/
 74 | 
 75 | # Celery
 76 | celerybeat-schedule
 77 | 
 78 | # SageMath
 79 | *.sage.py
 80 | 
 81 | # Environments
 82 | .python-version
 83 | .env
 84 | .venv
 85 | env/
 86 | venv/
 87 | ENV/
 88 | 
 89 | # Spyder
 90 | .spyderproject
 91 | .spyproject
 92 | 
 93 | # Rope
 94 | .ropeproject
 95 | 
 96 | # MyPy
 97 | .mypy_cache/
 98 | 
 99 | # mkdocs
100 | /site
101 | 
102 | # Vim
103 | *.sw[nop]
104 | .ropeproject
105 | 
106 | # Sublime
107 | *.sublime-*
108 | 
109 | # Compiled Coconut
110 | *.py
111 | 
112 | # Saved data
113 | saved_data/*
114 | !saved_data/play_memory.json
115 | 
116 | # Saved images
117 | saved_images/
118 | tmp/
119 | -p/
120 | 
121 | # Saved weights
122 | saved_weights/*
123 | !saved_weights/weights_final.h5f
124 | 
125 | # Saved video
126 | ffmpeg.out
127 | mission_records/
128 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: install
 2 | install: build
 3 | 	pip install -e .
 4 | 
 5 | .PHONY: build
 6 | build: setup
 7 | 	coconut setup.coco --no-tco --strict
 8 | 	coconut "minecraft_deep_learning-source" minecraft_deep_learning --no-tco --strict --jobs sys
 9 | 
10 | .PHONY: setup
11 | setup:
12 | 	pip install coconut-develop
13 | 
14 | .PHONY: play
15 | play: install
16 | 	python minecraft_deep_learning --mode play
17 | 
18 | .PHONY: train
19 | train: install
20 | 	python minecraft_deep_learning --mode train
21 | 
22 | .PHONY: test
23 | test: install
24 | 	python minecraft_deep_learning --mode test
25 | 
26 | .PHONY: clean
27 | clean:
28 | 	rm -rf ./dist ./build ./minecraft_deep_learning
29 | 	find . -name '*.pyc' -delete
30 | 	find . -name '__pycache__' -delete
31 | 	find . -name '*.py' -delete
32 | 
33 | .PHONY: wipe
34 | wipe: clean
35 | 	rm -rf ./saved_weights ./saved_data ./saved_images
36 | 
37 | .PHONY: watch
38 | watch:
39 | 	coconut "minecraft_deep_learning-source" minecraft_deep_learning --watch --no-tco --strict
40 | 
41 | .PHONY: quiver
42 | quiver: install
43 | 	python minecraft_deep_learning --mode quiver
44 | 
45 | .PHONY: tensorboard
46 | tensorboard:
47 | 	open http://localhost:6006
48 | 	tensorboard --logdir=./saved_data
49 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Minecraft Deep Learning
 2 | 
 3 | Deep reinforcement learning in Minecraft using [gym-minecraft](https://github.com/tambetm/gym-minecraft) and [keras-rl](https://github.com/matthiasplappert/keras-rl). See [this presentation](https://docs.google.com/presentation/d/1tNZANcEBsSqMPDr-pnXCosjJO5U1OBET-ZElCtKPc7A/edit?usp=sharing) for the (very positive) results.
 4 | 
 5 | ## Usage
 6 | 
 7 | First, you will need to install [gym-minecraft](https://github.com/tambetm/gym-minecraft) manually. Then:
 8 | 
 9 | 1. to play the environment yourself, run
10 | ```
11 | make play
12 | ```
13 | 2. to train your model, run
14 | ```
15 | make train
16 | ```
17 | 3. and to test your model, run
18 | ```
19 | make test
20 | ```
21 | 


--------------------------------------------------------------------------------
/minecraft_deep_learning-source/__init__.coco:
--------------------------------------------------------------------------------
 1 | """
 2 | Deep reinforcement learning in Minecraft using gym-minecraft and keras-rl.
 3 | 
 4 | Portions of this code are based on example code from keras-rl, used under its MIT license.
 5 | """
 6 | 
 7 | # Imports:
 8 | try:
 9 |     import gym_minecraft  # necessary to register the environment
10 | except ImportError:
11 |     raise RuntimeError("minecraft-deep-learning requires gym-minecraft (see https://github.com/tambetm/gym-minecraft for installation instructions)")
12 | 


--------------------------------------------------------------------------------
/minecraft_deep_learning-source/__main__.coco:
--------------------------------------------------------------------------------
1 | # Imports:
2 | from minecraft_deep_learning.main import main
3 | 
4 | # Main:
5 | if __name__ == "__main__":
6 |     main()
7 | 


--------------------------------------------------------------------------------
/minecraft_deep_learning-source/callbacks.coco:
--------------------------------------------------------------------------------
 1 | # Imports:
 2 | import os.path
 3 | 
 4 | from rl.callbacks import (
 5 |     Callback,
 6 |     FileLogger,
 7 |     ModelIntervalCheckpoint,
 8 | )
 9 | from keras.callbacks import TensorBoard
10 | 
11 | from minecraft_deep_learning.constants import (
12 |     DEBUG_LOG_INTERVAL,
13 |     LOG_INTERVAL,
14 |     DEBUG,
15 |     USE_ACTIONS,
16 |     DATA_DIR,
17 |     WEIGHTS_DIR,
18 | )
19 | 
20 | # Callback classes:
21 | class TensorBoardRL(TensorBoard):
22 |     def on_epoch_end(self, epoch, logs=None) =
23 |         super(TensorBoardRL, self).on_epoch_end(epoch)
24 | 
25 | class LogActions(Callback):
26 |     def __init__(self, interval):
27 |         super(LogActions, self).__init__()
28 |         self.interval = interval
29 |         self.counter = 0
30 | 
31 |     def on_action_begin(self, action, logs=None):
32 |         if self.counter % self.interval == 0:
33 |             print("\naction: {}".format(USE_ACTIONS[action]))
34 |         self.counter += 1
35 | 
36 | # Callbacks:
37 | use_callbacks = [
38 |     ModelIntervalCheckpoint(
39 |         os.path.join(WEIGHTS_DIR, "weights_{step}.h5f"),
40 |         interval=LOG_INTERVAL,
41 |     ),
42 |     FileLogger(
43 |         os.path.join(DATA_DIR, "log.json"),
44 |         interval=DEBUG_LOG_INTERVAL,
45 |     ),
46 | ] + ([
47 |     LogActions(
48 |         interval=DEBUG_LOG_INTERVAL,
49 |     ),
50 |     TensorBoardRL(
51 |         log_dir=DATA_DIR,
52 |         write_images=True,
53 |     ),
54 | ] if DEBUG else [])
55 | 


--------------------------------------------------------------------------------
/minecraft_deep_learning-source/constants.coco:
--------------------------------------------------------------------------------
  1 | # Imports:
  2 | import os
  3 | import math
  4 | 
  5 | import numpy as np
  6 | import pygame
  7 | 
  8 | # Global constants:
  9 | DEBUG = False
 10 | SCALE = 100
 11 | 
 12 | # Metrics:
 13 | def reward_farther(x, y, scale=1, L=None) =
 14 |     """Compute the distance between x and y using the given norm."""
 15 |     scale * np.linalg.norm(x - y, ord=L)
 16 | 
 17 | def reward_closer(x, y, scale=1, L=None) =
 18 |     """Compute negative the distance between x and y using the given norm."""
 19 |     reward_farther(x, y, scale=-scale, L=L)
 20 | 
 21 | def gaussian_reward_closer(x, y, scale=1, sigma=1, L=None) =
 22 |     """Compute 1 - the distance between x and y using the given Gaussian."""
 23 |     scale * np.exp(-reward_closer(x, y, L=L)**2/(2*sigma**2))
 24 | 
 25 | def gaussian_reward_farther(x, y, scale=1, sigma=1, L=None) =
 26 |     """Compute the distance between x and y using the given Gaussian."""
 27 |     scale - gaussian_reward_closer(x, y, scale=scale, sigma=sigma, L=L)
 28 | 
 29 | def angle_reward_farther(x, y, scale=1) =
 30 |     """Compute the distance between two angles in degrees."""
 31 |     scale * angle_dist where:
 32 |         raw_dist = abs(x - y)
 33 |         angle_dist = min(raw_dist, 360 - raw_dist)/180
 34 | 
 35 | def angle_reward_closer(x, y, scale=1) =
 36 |     """Compute 1 - the distance between two angles in degrees."""
 37 |     scale - angle_reward_farther(x, y, scale=scale)
 38 | 
 39 | # Environment constants:
 40 | ENV_NAME = "MinecraftDefaultFlat1-v0"
 41 | RESET_WORLD = False
 42 | IMAGE_SIZE = (288, 384)  # (height, width)
 43 | OBSERVE_DEPTH = False
 44 | CONTINUOUS_ACTIONS = True
 45 | DISCRETE_ACTIONS = False
 46 | KEYMAP = {
 47 |     pygame.K_w: "move 1",
 48 |     pygame.K_a: "turn -0.75",
 49 |     pygame.K_d: "turn 0.75",
 50 | }
 51 | USE_ACTIONS = KEYMAP.values() |> sorted |> list
 52 | USE_REWARDS = {
 53 |     100: 10,
 54 | }
 55 | # 0 for just reward shaping, positive for actual reward
 56 | REWARD_POTENTIAL = 1
 57 | POSITION_POTENTIAL = (
 58 |     {"x": 19.5, "z": 19.5},
 59 |     gaussian_reward_closer$(scale=0.9, sigma=9),
 60 | )
 61 | ANGLE_POTENTIAL = (
 62 |     19.5, 19.5,  # x, z
 63 |     angle_reward_closer$(scale=0.1),
 64 | )
 65 | 
 66 | # Model constants:
 67 | DOWNSAMPLE = 4
 68 | GRAYSCALE = True
 69 | TRIM_HEIGHT = False
 70 | WINDOW_SIZE = 4
 71 | CONV_LAYERS = (
 72 |     # (filters, size, strides)
 73 |     (32, (8, 8), (4, 4)),
 74 |     (64, (4, 4), (2, 2)),
 75 |     (64, (3, 3), (1, 1)),
 76 | )
 77 | CONV_ACTIVATION = "relu"
 78 | DENSE_LAYERS = (
 79 |     512,
 80 | )
 81 | DENSE_ACTIVATION = "relu"
 82 | MEMORY_SIZE = 400*SCALE
 83 | DISCOUNT_GAMMA = 0.99
 84 | DOUBLE_DQN = True
 85 | DUELING = True
 86 | 
 87 | CHANNELS = 1 if GRAYSCALE else 3 + OBSERVE_DEPTH
 88 | INPUT_SIZE = (
 89 |     IMAGE_SIZE
 90 |     |> map$(-> _//DOWNSAMPLE)
 91 |     |> tuple
 92 |     |> ((-> (_[0]//2,) + _[1:]) if TRIM_HEIGHT else ->_)
 93 | )
 94 | 
 95 | # Training constants:
 96 | NUM_STEPS = 1000*SCALE
 97 | MEMORY_STEPS = 100*SCALE
 98 | BOLTZMANN_TAU = 0.25
 99 | RANDOM_START_STEPS = 0
100 | WARMUP_STEPS = 0
101 | TARGET_UPDATE_PERIOD = SCALE
102 | SOFT_UPDATE_TARGET = False
103 | 
104 | TARGET_UPDATE = (
105 |     1/TARGET_UPDATE_PERIOD if SOFT_UPDATE_TARGET
106 |     else TARGET_UPDATE_PERIOD
107 | )
108 | ESTIMATED_TIME = "{} mins".format(
109 |     0.03*MEMORY_STEPS + 0.05*NUM_STEPS
110 |     |> -> _/60
111 |     |> math.ceil
112 |     |> int
113 | )
114 | 
115 | # Main constants:
116 | LOG_INTERVAL = NUM_STEPS//20
117 | DEBUG_LOG_INTERVAL = LOG_INTERVAL//10
118 | TESTING_EPISODES = 1
119 | RECORD_MP4 = (
120 |     20,  # fps
121 |     400000,  # bit rate
122 | )
123 | 
124 | BASE_DIR = (
125 |     __file__
126 |     |> os.path.dirname
127 |     |> os.path.dirname
128 | )
129 | WEIGHTS_DIR = os.path.join(BASE_DIR, "saved_weights")
130 | DATA_DIR = os.path.join(BASE_DIR, "saved_data")
131 | IMAGES_DIR = os.path.join(BASE_DIR, "saved_images")
132 | for need_dir in (WEIGHTS_DIR, DATA_DIR, IMAGES_DIR):
133 |     if not os.path.exists(need_dir):
134 |         os.mkdir(need_dir)
135 | 


--------------------------------------------------------------------------------
/minecraft_deep_learning-source/display.coco:
--------------------------------------------------------------------------------
 1 | # Imports:
 2 | import numpy as np
 3 | import pygame
 4 | 
 5 | from minecraft_deep_learning.constants import KEYMAP
 6 | 
 7 | # Functions:
 8 | def create_screen(screen_size):
 9 |     """Set up pygame and create a screen of the given size."""
10 |     pygame.init()
11 |     # pygame uses (width, height) not (height, width)
12 |     return pygame.display.set_mode(screen_size |> reversed |> tuple)
13 | 
14 | def show_array(screen, image):
15 |     """Display the given image to the given screen."""
16 |     # pygame uses (width, height, channels) not (height, width, channels)
17 |     swapped_image = np.swapaxes(image, 0, 1)
18 |     assert screen.get_size() + (3,) == swapped_image.shape, (screen.get_size() + (3,), swapped_image.shape)
19 |     pygame.surfarray.blit_array(screen, swapped_image)
20 |     pygame.display.flip()
21 | 
22 | def close_display():
23 |     """Quit pygame."""
24 |     try:
25 |         pygame.quit()
26 |     except Exception:
27 |         pass
28 | 
29 | def pump_events():
30 |     """Handle all pygame events."""
31 |     for event in pygame.event.get():
32 |         if event.type == pygame.QUIT:
33 |             close_display()
34 |             raise KeyboardInterrupt()
35 | 
36 | def get_pressed_action():
37 |     """Get the action name corresponding to the first pressed key in the queue."""
38 |     for event in pygame.event.get():
39 |         if event.type == pygame.KEYDOWN and event.key in KEYMAP:
40 |             return KEYMAP[event.key]
41 |         if event.type == pygame.QUIT:
42 |             close_display()
43 |             raise KeyboardInterrupt()
44 |     return None
45 | 


--------------------------------------------------------------------------------
/minecraft_deep_learning-source/environment.coco:
--------------------------------------------------------------------------------
 1 | # Imports:
 2 | import os.path
 3 | 
 4 | import gym
 5 | 
 6 | from minecraft_deep_learning.constants import (
 7 |     ENV_NAME,
 8 |     IMAGE_SIZE,
 9 |     USE_ACTIONS,
10 |     OBSERVE_DEPTH,
11 |     DISCRETE_ACTIONS,
12 |     CONTINUOUS_ACTIONS,
13 |     RESET_WORLD,
14 |     DATA_DIR,
15 | )
16 | 
17 | # Environment setup:
18 | def build_environment(recordMP4=None):
19 |     """Build the Minecraft gym environment."""
20 |     env = gym.make(ENV_NAME)
21 |     record_dest = None
22 |     if recordMP4:
23 |         i = 0
24 |         while record_dest is None or os.path.exists(record_dest):
25 |             record_dest = os.path.join(DATA_DIR, "recorded_data_{}.tgz".format(i))
26 |             i += 1
27 |         print("Recording video to: {}".format(record_dest))
28 |     env.init(
29 |         start_minecraft=True,
30 |         forceWorldReset=RESET_WORLD,
31 |         allowDiscreteMovement=DISCRETE_ACTIONS,
32 |         allowContinuousMovement=CONTINUOUS_ACTIONS,
33 |         # gym-minecraft uses (width, height) not (height, width)
34 |         videoResolution=IMAGE_SIZE |> reversed |> tuple if IMAGE_SIZE is not None else None,
35 |         videoWithDepth=OBSERVE_DEPTH,
36 |         recordDestination=record_dest,
37 |         recordMP4=recordMP4 if recordMP4 else None,
38 |     )
39 |     assert isinstance(env.action_space, gym.spaces.Discrete), env.action_space
40 |     assert len(env.action_names) == 1 and len(env.action_spaces) == 1, (env.action_names, env.action_spaces)
41 |     print("Using actions: {} -> {}".format(env.action_names[0], USE_ACTIONS))
42 |     use_action_space = gym.spaces.Discrete(len(USE_ACTIONS))
43 |     env.action_names = [USE_ACTIONS]
44 |     env.action_spaces = [use_action_space]
45 |     env.action_space = use_action_space
46 |     return env
47 | 


--------------------------------------------------------------------------------
/minecraft_deep_learning-source/main.coco:
--------------------------------------------------------------------------------
  1 | # Imports:
  2 | import os.path
  3 | import argparse
  4 | import shutil
  5 | 
  6 | from quiver_engine import server
  7 | 
  8 | from minecraft_deep_learning.environment import build_environment
  9 | from minecraft_deep_learning.processor import MinecraftProcessor
 10 | from minecraft_deep_learning.callbacks import use_callbacks
 11 | from minecraft_deep_learning.memory import (
 12 |     save_memory,
 13 |     load_memory,
 14 |     add_memory,
 15 |     train_on_memory,
 16 | )
 17 | from minecraft_deep_learning.model import (
 18 |     build_agent,
 19 |     build_image_model,
 20 | )
 21 | from minecraft_deep_learning.display import (
 22 |     close_display,
 23 |     get_pressed_action,
 24 | )
 25 | from minecraft_deep_learning.constants import (
 26 |     NUM_STEPS,
 27 |     LOG_INTERVAL,
 28 |     TESTING_EPISODES,
 29 |     WEIGHTS_DIR,
 30 |     ESTIMATED_TIME,
 31 |     RANDOM_START_STEPS,
 32 |     USE_ACTIONS,
 33 |     IMAGES_DIR,
 34 |     BASE_DIR,
 35 |     DATA_DIR,
 36 |     MEMORY_STEPS,
 37 |     RECORD_MP4,
 38 | )
 39 | 
 40 | # Argument parser:
 41 | final_weights_file = os.path.join(WEIGHTS_DIR, "weights_final.h5f")
 42 | memory_file = os.path.join(DATA_DIR, "play_memory.json")
 43 | 
 44 | arguments = argparse.ArgumentParser()
 45 | arguments.add_argument(
 46 |     "--mode",
 47 |     choices=["train", "test", "play", "quiver"],
 48 |     default="train",
 49 | )
 50 | arguments.add_argument(
 51 |     "--weights",
 52 |     type=str,
 53 |     default=final_weights_file,
 54 | )
 55 | arguments.add_argument(
 56 |     "--memory",
 57 |     type=str,
 58 |     default=memory_file,
 59 | )
 60 | 
 61 | # Argument handling:
 62 | def run(args):
 63 |     """Process the given parsed arguments."""
 64 |     env = None
 65 |     try:
 66 |         if args.mode == "play":
 67 |             proc = MinecraftProcessor(use_display=True, always_show_rewards=True)
 68 |             proc.handle_events = -> _
 69 |             memory = load_memory(args.memory)
 70 |             env = build_environment()
 71 |             last_obs = None
 72 |             done = True
 73 |             try:
 74 |                 while True:
 75 |                     if done:
 76 |                         print("observations in memory: {}".format(len(memory)))
 77 |                         env.reset()
 78 |                     action_name = None
 79 |                     while action_name is None:
 80 |                         action_name = get_pressed_action()
 81 |                     action = USE_ACTIONS.index(action_name)
 82 |                     obs, reward, done, info = (
 83 |                         action
 84 |                         |> env.step
 85 |                         |*> proc.process_step
 86 |                     )
 87 |                     if last_obs is not None:
 88 |                         memory.append((last_obs, action, reward, done))
 89 |                     last_obs = obs
 90 |             finally:
 91 |                 save_memory(memory, args.memory)
 92 | 
 93 |         elif args.mode == "train":
 94 |             agent = build_agent()
 95 |             if os.path.exists(args.weights):
 96 |                 agent.load_weights(args.weights)
 97 |                 print("Loaded weights from: {}".format(args.weights))
 98 |             else:
 99 |                 print("\nFailed to load weights from: {}\n".format(args.weights))
100 |             add_memory(agent, load_memory(args.memory))
101 |             print("Estimated training time: {}".format(ESTIMATED_TIME))
102 |             if MEMORY_STEPS:
103 |                 train_on_memory(agent, MEMORY_STEPS)
104 |             env = build_environment()
105 |             agent.fit(
106 |                 env,
107 |                 callbacks=use_callbacks,
108 |                 nb_steps=NUM_STEPS,
109 |                 nb_max_start_steps=RANDOM_START_STEPS,
110 |                 log_interval=LOG_INTERVAL,
111 |             )
112 |             agent.save_weights(args.weights, overwrite=True)
113 | 
114 |         elif args.mode == "test":
115 |             agent = build_agent()
116 |             agent.load_weights(args.weights)
117 |             env = build_environment(recordMP4=RECORD_MP4)
118 |             agent.test(
119 |                 env,
120 |                 nb_episodes=TESTING_EPISODES,
121 |                 nb_max_start_steps=RANDOM_START_STEPS,
122 |                 visualize=False,
123 |             )
124 | 
125 |         elif args.mode == "quiver":
126 |             agent = build_agent(use_display=False)
127 |             agent.load_weights(args.weights)
128 |             quiver_model = build_image_model(agent)
129 |             temp_dir = os.path.join(BASE_DIR, "tmp")
130 |             p_dir = os.path.join(BASE_DIR, "-p")
131 |             try:
132 |                 server.launch(
133 |                     quiver_model,
134 |                     USE_ACTIONS,
135 |                     temp_folder=temp_dir,
136 |                     input_folder=IMAGES_DIR,
137 |                 )
138 |             finally:
139 |                 print("Shutting down...")
140 |                 if os.path.exists(temp_dir):
141 |                     shutil.rmtree(temp_dir)
142 |                 if os.path.exists(p_dir):
143 |                     shutil.rmtree(p_dir)
144 | 
145 |         else:
146 |             raise ValueError("unknown --mode {!r}".format(args.mode))
147 |     finally:
148 |         if env is not None:
149 |             env.close()
150 | 
151 | def main(raw_args=None):
152 |     """Parse arguments and pass them to run."""
153 |     if raw_args is None:
154 |         args = arguments.parse_args()
155 |     else:
156 |         args = arguments.parse_args(raw_args)
157 |     try:
158 |         run(args)
159 |     finally:
160 |         close_display()
161 | 


--------------------------------------------------------------------------------
/minecraft_deep_learning-source/memory.coco:
--------------------------------------------------------------------------------
 1 | # Imports:
 2 | import json
 3 | import codecs
 4 | import os.path
 5 | from io import BytesIO
 6 | 
 7 | import numpy as np
 8 | from progressbar import ProgressBar
 9 | 
10 | # Memory handling:
11 | def save_memory(memory, filename):
12 |     """Save the given replay memory to the given filename."""
13 |     serialized_memory = []
14 |     for obs, action, reward, done in memory:
15 |         binary_file = BytesIO()
16 |         obs |> np.save$(binary_file)
17 |         binary_file.seek(0)
18 |         serialized_obs = (
19 |             binary_file.read()
20 |             |> codecs.encode$(?, "base64")
21 |             |> .decode("ascii")
22 |         )
23 |         serialized_memory.append((
24 |             serialized_obs,
25 |             action,
26 |             reward,
27 |             done,
28 |         ))
29 |     with open(filename, "w") as memory_file:
30 |         json.dump(serialized_memory, memory_file)
31 | 
32 | def load_memory(filename):
33 |     """Load replay memory from the given filename."""
34 |     memory = []
35 |     if os.path.exists(filename):
36 |         with open(filename, "r") as memory_file:
37 |             serialized_memory = json.load(memory_file)
38 |         for serialized_obs, action, reward, done in serialized_memory:
39 |             binary_file = (
40 |                 serialized_obs
41 |                 |> .encode("ascii")
42 |                 |> codecs.decode$(?, "base64")
43 |                 |> BytesIO
44 |             )
45 |             obs = np.load(binary_file)
46 |             memory.append((
47 |                 obs,
48 |                 action,
49 |                 reward,
50 |                 done,
51 |             ))
52 |     print("Loaded {} observations into memory.".format(len(memory)))
53 |     return memory
54 | 
55 | def add_memory(agent, memory):
56 |     """Add the given replay memory to the given agent."""
57 |     original_memory_size = agent.memory.nb_entries
58 |     for obs, action, reward, done in memory:
59 |         agent.memory.append(obs, action, reward, done, training=True)
60 |     assert original_memory_size < agent.memory.nb_entries <= original_memory_size + len(memory), (agent.memory.nb_entries, original_memory_size, len(memory))
61 | 
62 | def train_on_memory(agent, iterations):
63 |     """Train the agent for the given iterations on its existing replay memory."""
64 |     print("Training on existing memory for {} iterations...".format(iterations))
65 |     assert agent.memory.nb_entries > 0, "no existing memory found; run make play to generate some"
66 |     training, agent.training = agent.training, True
67 |     memory_interval, agent.memory_interval = agent.memory_interval, float("inf")
68 |     nb_steps_warmup, agent.nb_steps_warmup = agent.nb_steps_warmup, 0
69 |     train_interval, agent.train_interval = agent.train_interval, 1
70 |     original_step = agent.step
71 |     try:
72 |         for step in ProgressBar()(range(1, iterations+1)):
73 |             agent.step = step
74 |             assert agent.step % agent.memory_interval != 0, (agent.step, agent.memory_interval)
75 |             assert agent.training, agent.training
76 |             assert agent.step > agent.nb_steps_warmup, (agent.step, agent.nb_steps_warmup)
77 |             assert agent.step % agent.train_interval == 0, (agent.step, agent.train_interval)
78 |             agent.backward(None, None)
79 |     finally:
80 |         agent.training = training
81 |         agent.memory_interval = memory_interval
82 |         agent.nb_steps_warmup = nb_steps_warmup
83 |         agent.train_interval = train_interval
84 |         agent.step = original_step
85 | 


--------------------------------------------------------------------------------
/minecraft_deep_learning-source/model.coco:
--------------------------------------------------------------------------------
  1 | # Imports:
  2 | import tensorflow as tf
  3 | from keras.models import Sequential
  4 | from keras.backend import image_data_format
  5 | from keras.optimizers import Adam
  6 | from keras.layers import (
  7 |     Dense,
  8 |     Flatten,
  9 |     Conv2D,
 10 |     Permute,
 11 |     Lambda,
 12 | )
 13 | from rl.memory import SequentialMemory
 14 | from rl.agents.dqn import DQNAgent
 15 | 
 16 | from minecraft_deep_learning.processor import MinecraftProcessor
 17 | from minecraft_deep_learning.policy import build_policies
 18 | from minecraft_deep_learning.constants import (
 19 |     INPUT_SIZE,
 20 |     DENSE_LAYERS,
 21 |     CONV_LAYERS,
 22 |     MEMORY_SIZE,
 23 |     WINDOW_SIZE,
 24 |     WARMUP_STEPS,
 25 |     CHANNELS,
 26 |     DUELING,
 27 |     DOUBLE_DQN,
 28 |     TARGET_UPDATE,
 29 |     USE_ACTIONS,
 30 |     CONV_ACTIVATION,
 31 |     DENSE_ACTIVATION,
 32 |     DISCOUNT_GAMMA,
 33 | )
 34 | 
 35 | # Model setup:
 36 | def build_agent(use_display=None):
 37 |     """Build an agent for the given environment."""
 38 |     num_actions = len(USE_ACTIONS)
 39 | 
 40 |     model = Sequential([
 41 |         Permute(
 42 |             # convert (channels, width, height) input to proper ordering
 43 |             (2, 3, 1) if image_data_format() == "channels_last" else (1, 2, 3),
 44 |             input_shape=(CHANNELS * WINDOW_SIZE,) + INPUT_SIZE,
 45 |         ),
 46 |     ] + [
 47 |         Conv2D(filters, size, strides=strides, activation=CONV_ACTIVATION)
 48 |         for filters, size, strides in CONV_LAYERS
 49 |     ] + [
 50 |         Flatten(),
 51 |     ] + [
 52 |         Dense(neurons, activation=DENSE_ACTIVATION)
 53 |         for neurons in DENSE_LAYERS
 54 |     ] + [
 55 |         Dense(len(USE_ACTIONS)),
 56 |     ])
 57 | 
 58 |     memory = SequentialMemory(
 59 |         limit=MEMORY_SIZE,
 60 |         window_length=WINDOW_SIZE,
 61 |     )
 62 | 
 63 |     processor = MinecraftProcessor(use_display=use_display)
 64 | 
 65 |     policy, test_policy = build_policies()
 66 | 
 67 |     agent = DQNAgent(
 68 |         model=model,
 69 |         nb_actions=len(USE_ACTIONS),
 70 |         policy=policy,
 71 |         test_policy=test_policy,
 72 |         memory=memory,
 73 |         processor=processor,
 74 |         nb_steps_warmup=WARMUP_STEPS,
 75 |         target_model_update=TARGET_UPDATE,
 76 |         train_interval=WINDOW_SIZE,
 77 |         gamma=DISCOUNT_GAMMA,
 78 |         enable_double_dqn=DOUBLE_DQN,
 79 |         enable_dueling_network=DUELING,
 80 |     )
 81 |     agent.compile(
 82 |         optimizer=Adam(),
 83 |         metrics=["mae", "mse"],
 84 |     )
 85 |     return agent
 86 | 
 87 | def build_image_model(agent) =
 88 |     """Construct a version of the model from agent that accepts raw images."""
 89 |     Sequential([
 90 |         Lambda(tensor ->
 91 |             tensor/255.0
 92 |             |> tensor -> [tensor]*WINDOW_SIZE
 93 |             |> tf.stack
 94 |             # convert (window, batch, height, width, channels) to (batch, window, channels, height, width)
 95 |             |> tf.transpose$(perm=[1, 0, 4, 2, 3])
 96 |             |> tf.reshape$(shape=(-1, CHANNELS * WINDOW_SIZE) + INPUT_SIZE),
 97 |             input_shape=INPUT_SIZE + (CHANNELS,)
 98 |         ),
 99 |     ] + [
100 |         (def layer ->
101 |             layer.inbound_nodes = [];
102 |             layer)(layer)
103 |         for layer in agent.model.layers
104 |     ])
105 | 


--------------------------------------------------------------------------------
/minecraft_deep_learning-source/policy.coco:
--------------------------------------------------------------------------------
 1 | # Imports:
 2 | import numpy as np
 3 | from rl.policy import BoltzmannQPolicy
 4 | 
 5 | from minecraft_deep_learning.constants import (
 6 |     LOG_INTERVAL,
 7 |     DEBUG,
 8 |     BOLTZMANN_TAU,
 9 |     USE_ACTIONS,
10 | )
11 | 
12 | # Policy setup:
13 | def build_policies():
14 |     """Build the training and test exploration policies."""
15 |     print("Using Boltzmann exploration with tau = {}".format(BOLTZMANN_TAU))
16 |     policy = BoltzmannQPolicy(tau=BOLTZMANN_TAU)
17 | 
18 |     if DEBUG:
19 |         policy._counter = 0
20 |         policy._min_counter = 0
21 |         old_select_action = policy.select_action
22 |         def policy.select_action(q_values):
23 |             if policy._counter and policy._counter % LOG_INTERVAL == 0:
24 |                 print("\neffective epsilon: {} (selected min {}/{} times)".format(
25 |                     policy._min_counter/policy._counter,
26 |                     policy._min_counter,
27 |                     policy._counter,
28 |                 ))
29 |             min_action = np.argmin(q_values)
30 |             action = old_select_action(q_values)
31 |             if action == min_action:
32 |                 policy._min_counter += 1
33 |             policy._counter += 1
34 |             return action
35 | 
36 |     return policy, policy  # train and test policies are identical
37 | 


--------------------------------------------------------------------------------
/minecraft_deep_learning-source/processor.coco:
--------------------------------------------------------------------------------
  1 | # Imports:
  2 | import math
  3 | import os.path
  4 | from pprint import pprint
  5 | 
  6 | import numpy as np
  7 | from PIL import Image
  8 | from rl.core import Processor
  9 | 
 10 | from minecraft_deep_learning.constants import (
 11 |     IMAGE_SIZE,
 12 |     DOWNSAMPLE,
 13 |     INPUT_SIZE,
 14 |     GRAYSCALE,
 15 |     OBSERVE_DEPTH,
 16 |     CHANNELS,
 17 |     WINDOW_SIZE,
 18 |     TRIM_HEIGHT,
 19 |     DEBUG,
 20 |     LOG_INTERVAL,
 21 |     DEBUG_LOG_INTERVAL,
 22 |     USE_REWARDS,
 23 |     POSITION_POTENTIAL,
 24 |     ANGLE_POTENTIAL,
 25 |     IMAGES_DIR,
 26 |     DISCOUNT_GAMMA,
 27 |     REWARD_POTENTIAL,
 28 | )
 29 | from minecraft_deep_learning.display import (
 30 |     create_screen,
 31 |     show_array,
 32 |     pump_events,
 33 | )
 34 | 
 35 | # Processor:
 36 | class MinecraftProcessor(Processor):
 37 |     """Convert gym_minecraft output into a form understood by keras-rl."""
 38 |     handle_events = staticmethod(pump_events)
 39 | 
 40 |     def __init__(self, use_display=None, always_show_rewards=False):
 41 |         super(MinecraftProcessor, self).__init__()
 42 |         self.screen = None
 43 |         self.always_show_rewards = always_show_rewards
 44 |         self.set_prev_potential()
 45 |         if use_display ?? DEBUG:
 46 |             self.counter = -1
 47 |             self.use_display()
 48 | 
 49 |     def use_display(self):
 50 |         """Enable logging of observations to the screen."""
 51 |         if self.screen is None:
 52 |             self.screen_size = IMAGE_SIZE
 53 |             self.screen = create_screen(self.screen_size)
 54 | 
 55 |     def process_step(self, observation, reward, done, info):
 56 |         """Process a step from the environment. Called by the agent."""
 57 |         if DEBUG:
 58 |             self.counter += 1
 59 |         processed_step = (
 60 |             self.process_observation(observation),
 61 |             self.process_reward(reward, info),
 62 |             done,
 63 |             self.process_info(info),
 64 |         )
 65 |         if done:
 66 |             self.set_prev_potential()
 67 |         return processed_step
 68 | 
 69 |     def set_prev_potential(self, reward=None):
 70 |         """Set the previous reward for computing reward deltas."""
 71 |         self.prev_potential = reward
 72 | 
 73 |     def process_observation(self, observation):
 74 |         assert IMAGE_SIZE is not None and observation.shape == IMAGE_SIZE + (3 + OBSERVE_DEPTH,), observation.shape
 75 |         processed_observation = (
 76 |             observation
 77 |             |> Image.fromarray
 78 |             # PIL uses (width, height) not (height, width)
 79 |             |> (.resize(IMAGE_SIZE |> map$(-> _//DOWNSAMPLE) |> reversed |> tuple)
 80 |                 if DOWNSAMPLE and DOWNSAMPLE != 1 else ->_)
 81 |             |> (.convert("L") if GRAYSCALE else ->_)
 82 |             |> np.asarray$(dtype="uint8")
 83 |         )
 84 |         if TRIM_HEIGHT:
 85 |             height = processed_observation.shape[0]
 86 |             processed_observation = processed_observation[height//2:]
 87 |         assert processed_observation.shape == INPUT_SIZE + ((CHANNELS,) if CHANNELS > 1 else ()), processed_observation.shape
 88 | 
 89 |         if DEBUG and self.counter % LOG_INTERVAL == 0:
 90 |             image_file = os.path.join(IMAGES_DIR, "obs_{}.png".format(self.counter))
 91 |             Image.fromarray(processed_observation).save(image_file)
 92 | 
 93 |         if self.screen is not None:
 94 |             display_image = (
 95 |                 processed_observation
 96 |                 |> Image.fromarray
 97 |                 |> .convert("RGB")
 98 |                 # PIL uses (width, height) not (height, width)
 99 |                 |> .resize(self.screen_size |> reversed |> tuple)
100 |                 |> np.asarray$(dtype="uint8")
101 |             )
102 |             assert display_image.shape == self.screen_size + (3,), (display_image.shape, self.screen_size + (3,))
103 |             show_array(self.screen, display_image)
104 |             self.handle_events()
105 | 
106 |         return processed_observation
107 | 
108 |     def process_state_batch(self, batch):
109 |         assert batch.shape[1:] == (WINDOW_SIZE,) + INPUT_SIZE + ((CHANNELS,) if CHANNELS > 1 else ()), batch.shape
110 |         # We could perform this processing step in `process_observation`. In this case, however,
111 |         # we would need to store a `float32` array instead, which is 4x more memory intensive than
112 |         # a `uint8` array. This matters if we store 1M observations.
113 |         processed_batch = batch.astype("float32")/255.0
114 |         if CHANNELS > 1:
115 |             # merge WINDOW_SIZE and CHANNELS together
116 |             processed_batch = (
117 |                 np.moveaxis(processed_batch, -1, 2)
118 |                 |> .reshape((batch.shape[0], -1) + INPUT_SIZE)
119 |             )
120 |         assert processed_batch.shape == (batch.shape[0], CHANNELS * WINDOW_SIZE) + INPUT_SIZE, processed_batch.shape
121 |         return processed_batch
122 | 
123 |     def process_info(self, info):
124 |         # dictionary-valued infos are not supported by keras-rl
125 |         info["observation"] = (
126 |             list(info["observation"].items())
127 |             if info["observation"] is not None else []
128 |         )
129 |         if DEBUG and self.counter % LOG_INTERVAL == 0:
130 |             pprint(info)
131 |         return info
132 | 
133 |     @property
134 |     def show_rewards(self) =
135 |         DEBUG or self.always_show_rewards
136 | 
137 |     def process_reward(self, reward, info):
138 |         if self.show_rewards:
139 |             reward_history = [(reward, None)]
140 | 
141 |         reward = USE_REWARDS.get(reward, 0)
142 |         if self.show_rewards:
143 |             reward_history.append((reward, "using"))
144 | 
145 |         if reward == 0 and (REWARD_POTENTIAL ?? False) is not False and info["observation"] is not None:
146 |             potential = 0
147 | 
148 |             if POSITION_POTENTIAL:
149 |                 reward_coords, metric = POSITION_POTENTIAL
150 |                 x = np.asarray([
151 |                     info["observation"][coord.upper() + "Pos"]
152 |                     for coord in reward_coords |> sorted
153 |                 ])
154 |                 y = np.asarray([
155 |                     pos for coord, pos in reward_coords.items() |> sorted
156 |                 ])
157 |                 potential += metric(x, y)
158 |                 if self.show_rewards:
159 |                     reward_history.append((potential, "position {}".format({
160 |                         coord: x[i] for i, coord in reward_coords |> sorted |> enumerate
161 |                     })))
162 | 
163 |             if ANGLE_POTENTIAL:
164 |                 desired_x, desired_z, metric = ANGLE_POTENTIAL
165 |                 x, z, raw_yaw = ("XPos", "ZPos", "Yaw") |> map$(info["observation"][])
166 |                 # calculate yaw and desired_yaw to match up
167 |                 yaw = (raw_yaw + 90) % 360
168 |                 desired_yaw = (
169 |                     math.atan2(z - desired_z, x - desired_x) + math.pi
170 |                     |> math.degrees
171 |                 )
172 |                 potential += metric(yaw, desired_yaw)
173 |                 if self.show_rewards:
174 |                     reward_history.append((potential, "got angle {}; desired angle {}".format(yaw, desired_yaw)))
175 | 
176 |             if self.prev_potential is not None:
177 |                 reward += (DISCOUNT_GAMMA + REWARD_POTENTIAL)*potential - self.prev_potential
178 |             self.set_prev_potential(potential)
179 |             if self.show_rewards:
180 |                 reward_history.append((reward, "reward shaping {} - {}".format(potential, self.prev_potential)))
181 | 
182 |         if self.always_show_rewards or self.show_rewards and (
183 |             self.counter % DEBUG_LOG_INTERVAL == 0
184 |             or abs(reward) >= 1
185 |         ):
186 |             prev_reward = reward_history[0][0]
187 |             reward_str = "reward: {}".format(prev_reward)
188 |             for i in range(1, len(reward_history)):
189 |                 reward_i, reason = reward_history[i]
190 |                 reward_diff = reward_i - prev_reward
191 |                 if reward_diff > 0:
192 |                     reward_str += "\t+ {}".format(reward_diff)
193 |                 elif reward_diff < 0:
194 |                     reward_str += "\t- {}".format(-reward_diff)
195 |                 reward_str += " ({})".format(reason)
196 |                 prev_reward = reward_i
197 |             print("\n{}\n\t= {}".format(reward_str, reward))
198 | 
199 |         return reward
200 | 


--------------------------------------------------------------------------------
/saved_weights/weights_final.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evhub/minecraft-deep-learning/b80c33be66306debda5c1e3aa3d3f5adb1444af2/saved_weights/weights_final.h5f


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 


--------------------------------------------------------------------------------
/setup.coco:
--------------------------------------------------------------------------------
 1 | # Imports:
 2 | import setuptools
 3 | 
 4 | # Constants:
 5 | NAME = "minecraft-deep-learning"
 6 | VERSION = "0.0.1"
 7 | DESCRIPTION = "Deep reinforcement learning in Minecraft using gym-minecraft and keras-rl."
 8 | HOME_URL = "https://github.com/evhub/minecraft-deep-learning"
 9 | AUTHOR = "Evan Hubinger"
10 | AUTHOR_EMAIL = "evanjhub@gmail.com"
11 | REQUIREMENTS = [
12 |     "gym",
13 |     "keras",
14 |     "keras-rl",
15 |     "tensorflow-gpu",
16 |     "pygame",
17 |     "scipy",
18 |     "quiver_engine",
19 |     "progressbar2",
20 | ]
21 | 
22 | # Setup:
23 | setuptools.setup(
24 |     name=NAME,
25 |     version=VERSION,
26 |     description=DESCRIPTION,
27 |     url=HOME_URL,
28 |     author=AUTHOR,
29 |     author_email=AUTHOR_EMAIL,
30 |     install_requires=REQUIREMENTS,
31 |     packages=setuptools.find_packages(),
32 | )
33 | 


--------------------------------------------------------------------------------