├── .gitattributes ├── .gitignore ├── Makefile ├── README.md ├── minecraft_deep_learning-source ├── __init__.coco ├── __main__.coco ├── callbacks.coco ├── constants.coco ├── display.coco ├── environment.coco ├── main.coco ├── memory.coco ├── model.coco ├── policy.coco └── processor.coco ├── saved_weights └── weights_final.h5f ├── setup.cfg └── setup.coco /.gitattributes: -------------------------------------------------------------------------------- 1 | *.json filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OS files 2 | .DS_Store 3 | Thumbs.db 4 | *~ 5 | .#* 6 | ._* 7 | \#*\# 8 | 9 | # Python files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | *.so 14 | 15 | # Packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | bin/ 30 | *.iml 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | 35 | # PyInstaller 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django 59 | *.log 60 | local_settings.py 61 | 62 | # Flask 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy 67 | .scrapy 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter 73 | .ipynb_checkpoints/ 74 | 75 | # Celery 76 | celerybeat-schedule 77 | 78 | # SageMath 79 | *.sage.py 80 | 81 | # Environments 82 | .python-version 83 | .env 84 | .venv 85 | env/ 86 | venv/ 87 | ENV/ 88 | 89 | # Spyder 90 | .spyderproject 91 | .spyproject 92 | 93 | # Rope 94 | .ropeproject 95 | 96 | # MyPy 97 | .mypy_cache/ 98 | 99 | # mkdocs 100 | /site 101 | 102 | # Vim 103 | *.sw[nop] 104 | .ropeproject 105 | 106 | # Sublime 107 | *.sublime-* 108 | 109 | # Compiled Coconut 110 | *.py 111 | 112 | # Saved data 113 | saved_data/* 114 | !saved_data/play_memory.json 115 | 116 | # Saved images 117 | saved_images/ 118 | tmp/ 119 | -p/ 120 | 121 | # Saved weights 122 | saved_weights/* 123 | !saved_weights/weights_final.h5f 124 | 125 | # Saved video 126 | ffmpeg.out 127 | mission_records/ 128 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install 2 | install: build 3 | pip install -e . 4 | 5 | .PHONY: build 6 | build: setup 7 | coconut setup.coco --no-tco --strict 8 | coconut "minecraft_deep_learning-source" minecraft_deep_learning --no-tco --strict --jobs sys 9 | 10 | .PHONY: setup 11 | setup: 12 | pip install coconut-develop 13 | 14 | .PHONY: play 15 | play: install 16 | python minecraft_deep_learning --mode play 17 | 18 | .PHONY: train 19 | train: install 20 | python minecraft_deep_learning --mode train 21 | 22 | .PHONY: test 23 | test: install 24 | python minecraft_deep_learning --mode test 25 | 26 | .PHONY: clean 27 | clean: 28 | rm -rf ./dist ./build ./minecraft_deep_learning 29 | find . -name '*.pyc' -delete 30 | find . -name '__pycache__' -delete 31 | find . -name '*.py' -delete 32 | 33 | .PHONY: wipe 34 | wipe: clean 35 | rm -rf ./saved_weights ./saved_data ./saved_images 36 | 37 | .PHONY: watch 38 | watch: 39 | coconut "minecraft_deep_learning-source" minecraft_deep_learning --watch --no-tco --strict 40 | 41 | .PHONY: quiver 42 | quiver: install 43 | python minecraft_deep_learning --mode quiver 44 | 45 | .PHONY: tensorboard 46 | tensorboard: 47 | open http://localhost:6006 48 | tensorboard --logdir=./saved_data 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Minecraft Deep Learning 2 | 3 | Deep reinforcement learning in Minecraft using [gym-minecraft](https://github.com/tambetm/gym-minecraft) and [keras-rl](https://github.com/matthiasplappert/keras-rl). See [this presentation](https://docs.google.com/presentation/d/1tNZANcEBsSqMPDr-pnXCosjJO5U1OBET-ZElCtKPc7A/edit?usp=sharing) for the (very positive) results. 4 | 5 | ## Usage 6 | 7 | First, you will need to install [gym-minecraft](https://github.com/tambetm/gym-minecraft) manually. Then: 8 | 9 | 1. to play the environment yourself, run 10 | ``` 11 | make play 12 | ``` 13 | 2. to train your model, run 14 | ``` 15 | make train 16 | ``` 17 | 3. and to test your model, run 18 | ``` 19 | make test 20 | ``` 21 | -------------------------------------------------------------------------------- /minecraft_deep_learning-source/__init__.coco: -------------------------------------------------------------------------------- 1 | """ 2 | Deep reinforcement learning in Minecraft using gym-minecraft and keras-rl. 3 | 4 | Portions of this code are based on example code from keras-rl, used under its MIT license. 5 | """ 6 | 7 | # Imports: 8 | try: 9 | import gym_minecraft # necessary to register the environment 10 | except ImportError: 11 | raise RuntimeError("minecraft-deep-learning requires gym-minecraft (see https://github.com/tambetm/gym-minecraft for installation instructions)") 12 | -------------------------------------------------------------------------------- /minecraft_deep_learning-source/__main__.coco: -------------------------------------------------------------------------------- 1 | # Imports: 2 | from minecraft_deep_learning.main import main 3 | 4 | # Main: 5 | if __name__ == "__main__": 6 | main() 7 | -------------------------------------------------------------------------------- /minecraft_deep_learning-source/callbacks.coco: -------------------------------------------------------------------------------- 1 | # Imports: 2 | import os.path 3 | 4 | from rl.callbacks import ( 5 | Callback, 6 | FileLogger, 7 | ModelIntervalCheckpoint, 8 | ) 9 | from keras.callbacks import TensorBoard 10 | 11 | from minecraft_deep_learning.constants import ( 12 | DEBUG_LOG_INTERVAL, 13 | LOG_INTERVAL, 14 | DEBUG, 15 | USE_ACTIONS, 16 | DATA_DIR, 17 | WEIGHTS_DIR, 18 | ) 19 | 20 | # Callback classes: 21 | class TensorBoardRL(TensorBoard): 22 | def on_epoch_end(self, epoch, logs=None) = 23 | super(TensorBoardRL, self).on_epoch_end(epoch) 24 | 25 | class LogActions(Callback): 26 | def __init__(self, interval): 27 | super(LogActions, self).__init__() 28 | self.interval = interval 29 | self.counter = 0 30 | 31 | def on_action_begin(self, action, logs=None): 32 | if self.counter % self.interval == 0: 33 | print("\naction: {}".format(USE_ACTIONS[action])) 34 | self.counter += 1 35 | 36 | # Callbacks: 37 | use_callbacks = [ 38 | ModelIntervalCheckpoint( 39 | os.path.join(WEIGHTS_DIR, "weights_{step}.h5f"), 40 | interval=LOG_INTERVAL, 41 | ), 42 | FileLogger( 43 | os.path.join(DATA_DIR, "log.json"), 44 | interval=DEBUG_LOG_INTERVAL, 45 | ), 46 | ] + ([ 47 | LogActions( 48 | interval=DEBUG_LOG_INTERVAL, 49 | ), 50 | TensorBoardRL( 51 | log_dir=DATA_DIR, 52 | write_images=True, 53 | ), 54 | ] if DEBUG else []) 55 | -------------------------------------------------------------------------------- /minecraft_deep_learning-source/constants.coco: -------------------------------------------------------------------------------- 1 | # Imports: 2 | import os 3 | import math 4 | 5 | import numpy as np 6 | import pygame 7 | 8 | # Global constants: 9 | DEBUG = False 10 | SCALE = 100 11 | 12 | # Metrics: 13 | def reward_farther(x, y, scale=1, L=None) = 14 | """Compute the distance between x and y using the given norm.""" 15 | scale * np.linalg.norm(x - y, ord=L) 16 | 17 | def reward_closer(x, y, scale=1, L=None) = 18 | """Compute negative the distance between x and y using the given norm.""" 19 | reward_farther(x, y, scale=-scale, L=L) 20 | 21 | def gaussian_reward_closer(x, y, scale=1, sigma=1, L=None) = 22 | """Compute 1 - the distance between x and y using the given Gaussian.""" 23 | scale * np.exp(-reward_closer(x, y, L=L)**2/(2*sigma**2)) 24 | 25 | def gaussian_reward_farther(x, y, scale=1, sigma=1, L=None) = 26 | """Compute the distance between x and y using the given Gaussian.""" 27 | scale - gaussian_reward_closer(x, y, scale=scale, sigma=sigma, L=L) 28 | 29 | def angle_reward_farther(x, y, scale=1) = 30 | """Compute the distance between two angles in degrees.""" 31 | scale * angle_dist where: 32 | raw_dist = abs(x - y) 33 | angle_dist = min(raw_dist, 360 - raw_dist)/180 34 | 35 | def angle_reward_closer(x, y, scale=1) = 36 | """Compute 1 - the distance between two angles in degrees.""" 37 | scale - angle_reward_farther(x, y, scale=scale) 38 | 39 | # Environment constants: 40 | ENV_NAME = "MinecraftDefaultFlat1-v0" 41 | RESET_WORLD = False 42 | IMAGE_SIZE = (288, 384) # (height, width) 43 | OBSERVE_DEPTH = False 44 | CONTINUOUS_ACTIONS = True 45 | DISCRETE_ACTIONS = False 46 | KEYMAP = { 47 | pygame.K_w: "move 1", 48 | pygame.K_a: "turn -0.75", 49 | pygame.K_d: "turn 0.75", 50 | } 51 | USE_ACTIONS = KEYMAP.values() |> sorted |> list 52 | USE_REWARDS = { 53 | 100: 10, 54 | } 55 | # 0 for just reward shaping, positive for actual reward 56 | REWARD_POTENTIAL = 1 57 | POSITION_POTENTIAL = ( 58 | {"x": 19.5, "z": 19.5}, 59 | gaussian_reward_closer$(scale=0.9, sigma=9), 60 | ) 61 | ANGLE_POTENTIAL = ( 62 | 19.5, 19.5, # x, z 63 | angle_reward_closer$(scale=0.1), 64 | ) 65 | 66 | # Model constants: 67 | DOWNSAMPLE = 4 68 | GRAYSCALE = True 69 | TRIM_HEIGHT = False 70 | WINDOW_SIZE = 4 71 | CONV_LAYERS = ( 72 | # (filters, size, strides) 73 | (32, (8, 8), (4, 4)), 74 | (64, (4, 4), (2, 2)), 75 | (64, (3, 3), (1, 1)), 76 | ) 77 | CONV_ACTIVATION = "relu" 78 | DENSE_LAYERS = ( 79 | 512, 80 | ) 81 | DENSE_ACTIVATION = "relu" 82 | MEMORY_SIZE = 400*SCALE 83 | DISCOUNT_GAMMA = 0.99 84 | DOUBLE_DQN = True 85 | DUELING = True 86 | 87 | CHANNELS = 1 if GRAYSCALE else 3 + OBSERVE_DEPTH 88 | INPUT_SIZE = ( 89 | IMAGE_SIZE 90 | |> map$(-> _//DOWNSAMPLE) 91 | |> tuple 92 | |> ((-> (_[0]//2,) + _[1:]) if TRIM_HEIGHT else ->_) 93 | ) 94 | 95 | # Training constants: 96 | NUM_STEPS = 1000*SCALE 97 | MEMORY_STEPS = 100*SCALE 98 | BOLTZMANN_TAU = 0.25 99 | RANDOM_START_STEPS = 0 100 | WARMUP_STEPS = 0 101 | TARGET_UPDATE_PERIOD = SCALE 102 | SOFT_UPDATE_TARGET = False 103 | 104 | TARGET_UPDATE = ( 105 | 1/TARGET_UPDATE_PERIOD if SOFT_UPDATE_TARGET 106 | else TARGET_UPDATE_PERIOD 107 | ) 108 | ESTIMATED_TIME = "{} mins".format( 109 | 0.03*MEMORY_STEPS + 0.05*NUM_STEPS 110 | |> -> _/60 111 | |> math.ceil 112 | |> int 113 | ) 114 | 115 | # Main constants: 116 | LOG_INTERVAL = NUM_STEPS//20 117 | DEBUG_LOG_INTERVAL = LOG_INTERVAL//10 118 | TESTING_EPISODES = 1 119 | RECORD_MP4 = ( 120 | 20, # fps 121 | 400000, # bit rate 122 | ) 123 | 124 | BASE_DIR = ( 125 | __file__ 126 | |> os.path.dirname 127 | |> os.path.dirname 128 | ) 129 | WEIGHTS_DIR = os.path.join(BASE_DIR, "saved_weights") 130 | DATA_DIR = os.path.join(BASE_DIR, "saved_data") 131 | IMAGES_DIR = os.path.join(BASE_DIR, "saved_images") 132 | for need_dir in (WEIGHTS_DIR, DATA_DIR, IMAGES_DIR): 133 | if not os.path.exists(need_dir): 134 | os.mkdir(need_dir) 135 | -------------------------------------------------------------------------------- /minecraft_deep_learning-source/display.coco: -------------------------------------------------------------------------------- 1 | # Imports: 2 | import numpy as np 3 | import pygame 4 | 5 | from minecraft_deep_learning.constants import KEYMAP 6 | 7 | # Functions: 8 | def create_screen(screen_size): 9 | """Set up pygame and create a screen of the given size.""" 10 | pygame.init() 11 | # pygame uses (width, height) not (height, width) 12 | return pygame.display.set_mode(screen_size |> reversed |> tuple) 13 | 14 | def show_array(screen, image): 15 | """Display the given image to the given screen.""" 16 | # pygame uses (width, height, channels) not (height, width, channels) 17 | swapped_image = np.swapaxes(image, 0, 1) 18 | assert screen.get_size() + (3,) == swapped_image.shape, (screen.get_size() + (3,), swapped_image.shape) 19 | pygame.surfarray.blit_array(screen, swapped_image) 20 | pygame.display.flip() 21 | 22 | def close_display(): 23 | """Quit pygame.""" 24 | try: 25 | pygame.quit() 26 | except Exception: 27 | pass 28 | 29 | def pump_events(): 30 | """Handle all pygame events.""" 31 | for event in pygame.event.get(): 32 | if event.type == pygame.QUIT: 33 | close_display() 34 | raise KeyboardInterrupt() 35 | 36 | def get_pressed_action(): 37 | """Get the action name corresponding to the first pressed key in the queue.""" 38 | for event in pygame.event.get(): 39 | if event.type == pygame.KEYDOWN and event.key in KEYMAP: 40 | return KEYMAP[event.key] 41 | if event.type == pygame.QUIT: 42 | close_display() 43 | raise KeyboardInterrupt() 44 | return None 45 | -------------------------------------------------------------------------------- /minecraft_deep_learning-source/environment.coco: -------------------------------------------------------------------------------- 1 | # Imports: 2 | import os.path 3 | 4 | import gym 5 | 6 | from minecraft_deep_learning.constants import ( 7 | ENV_NAME, 8 | IMAGE_SIZE, 9 | USE_ACTIONS, 10 | OBSERVE_DEPTH, 11 | DISCRETE_ACTIONS, 12 | CONTINUOUS_ACTIONS, 13 | RESET_WORLD, 14 | DATA_DIR, 15 | ) 16 | 17 | # Environment setup: 18 | def build_environment(recordMP4=None): 19 | """Build the Minecraft gym environment.""" 20 | env = gym.make(ENV_NAME) 21 | record_dest = None 22 | if recordMP4: 23 | i = 0 24 | while record_dest is None or os.path.exists(record_dest): 25 | record_dest = os.path.join(DATA_DIR, "recorded_data_{}.tgz".format(i)) 26 | i += 1 27 | print("Recording video to: {}".format(record_dest)) 28 | env.init( 29 | start_minecraft=True, 30 | forceWorldReset=RESET_WORLD, 31 | allowDiscreteMovement=DISCRETE_ACTIONS, 32 | allowContinuousMovement=CONTINUOUS_ACTIONS, 33 | # gym-minecraft uses (width, height) not (height, width) 34 | videoResolution=IMAGE_SIZE |> reversed |> tuple if IMAGE_SIZE is not None else None, 35 | videoWithDepth=OBSERVE_DEPTH, 36 | recordDestination=record_dest, 37 | recordMP4=recordMP4 if recordMP4 else None, 38 | ) 39 | assert isinstance(env.action_space, gym.spaces.Discrete), env.action_space 40 | assert len(env.action_names) == 1 and len(env.action_spaces) == 1, (env.action_names, env.action_spaces) 41 | print("Using actions: {} -> {}".format(env.action_names[0], USE_ACTIONS)) 42 | use_action_space = gym.spaces.Discrete(len(USE_ACTIONS)) 43 | env.action_names = [USE_ACTIONS] 44 | env.action_spaces = [use_action_space] 45 | env.action_space = use_action_space 46 | return env 47 | -------------------------------------------------------------------------------- /minecraft_deep_learning-source/main.coco: -------------------------------------------------------------------------------- 1 | # Imports: 2 | import os.path 3 | import argparse 4 | import shutil 5 | 6 | from quiver_engine import server 7 | 8 | from minecraft_deep_learning.environment import build_environment 9 | from minecraft_deep_learning.processor import MinecraftProcessor 10 | from minecraft_deep_learning.callbacks import use_callbacks 11 | from minecraft_deep_learning.memory import ( 12 | save_memory, 13 | load_memory, 14 | add_memory, 15 | train_on_memory, 16 | ) 17 | from minecraft_deep_learning.model import ( 18 | build_agent, 19 | build_image_model, 20 | ) 21 | from minecraft_deep_learning.display import ( 22 | close_display, 23 | get_pressed_action, 24 | ) 25 | from minecraft_deep_learning.constants import ( 26 | NUM_STEPS, 27 | LOG_INTERVAL, 28 | TESTING_EPISODES, 29 | WEIGHTS_DIR, 30 | ESTIMATED_TIME, 31 | RANDOM_START_STEPS, 32 | USE_ACTIONS, 33 | IMAGES_DIR, 34 | BASE_DIR, 35 | DATA_DIR, 36 | MEMORY_STEPS, 37 | RECORD_MP4, 38 | ) 39 | 40 | # Argument parser: 41 | final_weights_file = os.path.join(WEIGHTS_DIR, "weights_final.h5f") 42 | memory_file = os.path.join(DATA_DIR, "play_memory.json") 43 | 44 | arguments = argparse.ArgumentParser() 45 | arguments.add_argument( 46 | "--mode", 47 | choices=["train", "test", "play", "quiver"], 48 | default="train", 49 | ) 50 | arguments.add_argument( 51 | "--weights", 52 | type=str, 53 | default=final_weights_file, 54 | ) 55 | arguments.add_argument( 56 | "--memory", 57 | type=str, 58 | default=memory_file, 59 | ) 60 | 61 | # Argument handling: 62 | def run(args): 63 | """Process the given parsed arguments.""" 64 | env = None 65 | try: 66 | if args.mode == "play": 67 | proc = MinecraftProcessor(use_display=True, always_show_rewards=True) 68 | proc.handle_events = -> _ 69 | memory = load_memory(args.memory) 70 | env = build_environment() 71 | last_obs = None 72 | done = True 73 | try: 74 | while True: 75 | if done: 76 | print("observations in memory: {}".format(len(memory))) 77 | env.reset() 78 | action_name = None 79 | while action_name is None: 80 | action_name = get_pressed_action() 81 | action = USE_ACTIONS.index(action_name) 82 | obs, reward, done, info = ( 83 | action 84 | |> env.step 85 | |*> proc.process_step 86 | ) 87 | if last_obs is not None: 88 | memory.append((last_obs, action, reward, done)) 89 | last_obs = obs 90 | finally: 91 | save_memory(memory, args.memory) 92 | 93 | elif args.mode == "train": 94 | agent = build_agent() 95 | if os.path.exists(args.weights): 96 | agent.load_weights(args.weights) 97 | print("Loaded weights from: {}".format(args.weights)) 98 | else: 99 | print("\nFailed to load weights from: {}\n".format(args.weights)) 100 | add_memory(agent, load_memory(args.memory)) 101 | print("Estimated training time: {}".format(ESTIMATED_TIME)) 102 | if MEMORY_STEPS: 103 | train_on_memory(agent, MEMORY_STEPS) 104 | env = build_environment() 105 | agent.fit( 106 | env, 107 | callbacks=use_callbacks, 108 | nb_steps=NUM_STEPS, 109 | nb_max_start_steps=RANDOM_START_STEPS, 110 | log_interval=LOG_INTERVAL, 111 | ) 112 | agent.save_weights(args.weights, overwrite=True) 113 | 114 | elif args.mode == "test": 115 | agent = build_agent() 116 | agent.load_weights(args.weights) 117 | env = build_environment(recordMP4=RECORD_MP4) 118 | agent.test( 119 | env, 120 | nb_episodes=TESTING_EPISODES, 121 | nb_max_start_steps=RANDOM_START_STEPS, 122 | visualize=False, 123 | ) 124 | 125 | elif args.mode == "quiver": 126 | agent = build_agent(use_display=False) 127 | agent.load_weights(args.weights) 128 | quiver_model = build_image_model(agent) 129 | temp_dir = os.path.join(BASE_DIR, "tmp") 130 | p_dir = os.path.join(BASE_DIR, "-p") 131 | try: 132 | server.launch( 133 | quiver_model, 134 | USE_ACTIONS, 135 | temp_folder=temp_dir, 136 | input_folder=IMAGES_DIR, 137 | ) 138 | finally: 139 | print("Shutting down...") 140 | if os.path.exists(temp_dir): 141 | shutil.rmtree(temp_dir) 142 | if os.path.exists(p_dir): 143 | shutil.rmtree(p_dir) 144 | 145 | else: 146 | raise ValueError("unknown --mode {!r}".format(args.mode)) 147 | finally: 148 | if env is not None: 149 | env.close() 150 | 151 | def main(raw_args=None): 152 | """Parse arguments and pass them to run.""" 153 | if raw_args is None: 154 | args = arguments.parse_args() 155 | else: 156 | args = arguments.parse_args(raw_args) 157 | try: 158 | run(args) 159 | finally: 160 | close_display() 161 | -------------------------------------------------------------------------------- /minecraft_deep_learning-source/memory.coco: -------------------------------------------------------------------------------- 1 | # Imports: 2 | import json 3 | import codecs 4 | import os.path 5 | from io import BytesIO 6 | 7 | import numpy as np 8 | from progressbar import ProgressBar 9 | 10 | # Memory handling: 11 | def save_memory(memory, filename): 12 | """Save the given replay memory to the given filename.""" 13 | serialized_memory = [] 14 | for obs, action, reward, done in memory: 15 | binary_file = BytesIO() 16 | obs |> np.save$(binary_file) 17 | binary_file.seek(0) 18 | serialized_obs = ( 19 | binary_file.read() 20 | |> codecs.encode$(?, "base64") 21 | |> .decode("ascii") 22 | ) 23 | serialized_memory.append(( 24 | serialized_obs, 25 | action, 26 | reward, 27 | done, 28 | )) 29 | with open(filename, "w") as memory_file: 30 | json.dump(serialized_memory, memory_file) 31 | 32 | def load_memory(filename): 33 | """Load replay memory from the given filename.""" 34 | memory = [] 35 | if os.path.exists(filename): 36 | with open(filename, "r") as memory_file: 37 | serialized_memory = json.load(memory_file) 38 | for serialized_obs, action, reward, done in serialized_memory: 39 | binary_file = ( 40 | serialized_obs 41 | |> .encode("ascii") 42 | |> codecs.decode$(?, "base64") 43 | |> BytesIO 44 | ) 45 | obs = np.load(binary_file) 46 | memory.append(( 47 | obs, 48 | action, 49 | reward, 50 | done, 51 | )) 52 | print("Loaded {} observations into memory.".format(len(memory))) 53 | return memory 54 | 55 | def add_memory(agent, memory): 56 | """Add the given replay memory to the given agent.""" 57 | original_memory_size = agent.memory.nb_entries 58 | for obs, action, reward, done in memory: 59 | agent.memory.append(obs, action, reward, done, training=True) 60 | assert original_memory_size < agent.memory.nb_entries <= original_memory_size + len(memory), (agent.memory.nb_entries, original_memory_size, len(memory)) 61 | 62 | def train_on_memory(agent, iterations): 63 | """Train the agent for the given iterations on its existing replay memory.""" 64 | print("Training on existing memory for {} iterations...".format(iterations)) 65 | assert agent.memory.nb_entries > 0, "no existing memory found; run make play to generate some" 66 | training, agent.training = agent.training, True 67 | memory_interval, agent.memory_interval = agent.memory_interval, float("inf") 68 | nb_steps_warmup, agent.nb_steps_warmup = agent.nb_steps_warmup, 0 69 | train_interval, agent.train_interval = agent.train_interval, 1 70 | original_step = agent.step 71 | try: 72 | for step in ProgressBar()(range(1, iterations+1)): 73 | agent.step = step 74 | assert agent.step % agent.memory_interval != 0, (agent.step, agent.memory_interval) 75 | assert agent.training, agent.training 76 | assert agent.step > agent.nb_steps_warmup, (agent.step, agent.nb_steps_warmup) 77 | assert agent.step % agent.train_interval == 0, (agent.step, agent.train_interval) 78 | agent.backward(None, None) 79 | finally: 80 | agent.training = training 81 | agent.memory_interval = memory_interval 82 | agent.nb_steps_warmup = nb_steps_warmup 83 | agent.train_interval = train_interval 84 | agent.step = original_step 85 | -------------------------------------------------------------------------------- /minecraft_deep_learning-source/model.coco: -------------------------------------------------------------------------------- 1 | # Imports: 2 | import tensorflow as tf 3 | from keras.models import Sequential 4 | from keras.backend import image_data_format 5 | from keras.optimizers import Adam 6 | from keras.layers import ( 7 | Dense, 8 | Flatten, 9 | Conv2D, 10 | Permute, 11 | Lambda, 12 | ) 13 | from rl.memory import SequentialMemory 14 | from rl.agents.dqn import DQNAgent 15 | 16 | from minecraft_deep_learning.processor import MinecraftProcessor 17 | from minecraft_deep_learning.policy import build_policies 18 | from minecraft_deep_learning.constants import ( 19 | INPUT_SIZE, 20 | DENSE_LAYERS, 21 | CONV_LAYERS, 22 | MEMORY_SIZE, 23 | WINDOW_SIZE, 24 | WARMUP_STEPS, 25 | CHANNELS, 26 | DUELING, 27 | DOUBLE_DQN, 28 | TARGET_UPDATE, 29 | USE_ACTIONS, 30 | CONV_ACTIVATION, 31 | DENSE_ACTIVATION, 32 | DISCOUNT_GAMMA, 33 | ) 34 | 35 | # Model setup: 36 | def build_agent(use_display=None): 37 | """Build an agent for the given environment.""" 38 | num_actions = len(USE_ACTIONS) 39 | 40 | model = Sequential([ 41 | Permute( 42 | # convert (channels, width, height) input to proper ordering 43 | (2, 3, 1) if image_data_format() == "channels_last" else (1, 2, 3), 44 | input_shape=(CHANNELS * WINDOW_SIZE,) + INPUT_SIZE, 45 | ), 46 | ] + [ 47 | Conv2D(filters, size, strides=strides, activation=CONV_ACTIVATION) 48 | for filters, size, strides in CONV_LAYERS 49 | ] + [ 50 | Flatten(), 51 | ] + [ 52 | Dense(neurons, activation=DENSE_ACTIVATION) 53 | for neurons in DENSE_LAYERS 54 | ] + [ 55 | Dense(len(USE_ACTIONS)), 56 | ]) 57 | 58 | memory = SequentialMemory( 59 | limit=MEMORY_SIZE, 60 | window_length=WINDOW_SIZE, 61 | ) 62 | 63 | processor = MinecraftProcessor(use_display=use_display) 64 | 65 | policy, test_policy = build_policies() 66 | 67 | agent = DQNAgent( 68 | model=model, 69 | nb_actions=len(USE_ACTIONS), 70 | policy=policy, 71 | test_policy=test_policy, 72 | memory=memory, 73 | processor=processor, 74 | nb_steps_warmup=WARMUP_STEPS, 75 | target_model_update=TARGET_UPDATE, 76 | train_interval=WINDOW_SIZE, 77 | gamma=DISCOUNT_GAMMA, 78 | enable_double_dqn=DOUBLE_DQN, 79 | enable_dueling_network=DUELING, 80 | ) 81 | agent.compile( 82 | optimizer=Adam(), 83 | metrics=["mae", "mse"], 84 | ) 85 | return agent 86 | 87 | def build_image_model(agent) = 88 | """Construct a version of the model from agent that accepts raw images.""" 89 | Sequential([ 90 | Lambda(tensor -> 91 | tensor/255.0 92 | |> tensor -> [tensor]*WINDOW_SIZE 93 | |> tf.stack 94 | # convert (window, batch, height, width, channels) to (batch, window, channels, height, width) 95 | |> tf.transpose$(perm=[1, 0, 4, 2, 3]) 96 | |> tf.reshape$(shape=(-1, CHANNELS * WINDOW_SIZE) + INPUT_SIZE), 97 | input_shape=INPUT_SIZE + (CHANNELS,) 98 | ), 99 | ] + [ 100 | (def layer -> 101 | layer.inbound_nodes = []; 102 | layer)(layer) 103 | for layer in agent.model.layers 104 | ]) 105 | -------------------------------------------------------------------------------- /minecraft_deep_learning-source/policy.coco: -------------------------------------------------------------------------------- 1 | # Imports: 2 | import numpy as np 3 | from rl.policy import BoltzmannQPolicy 4 | 5 | from minecraft_deep_learning.constants import ( 6 | LOG_INTERVAL, 7 | DEBUG, 8 | BOLTZMANN_TAU, 9 | USE_ACTIONS, 10 | ) 11 | 12 | # Policy setup: 13 | def build_policies(): 14 | """Build the training and test exploration policies.""" 15 | print("Using Boltzmann exploration with tau = {}".format(BOLTZMANN_TAU)) 16 | policy = BoltzmannQPolicy(tau=BOLTZMANN_TAU) 17 | 18 | if DEBUG: 19 | policy._counter = 0 20 | policy._min_counter = 0 21 | old_select_action = policy.select_action 22 | def policy.select_action(q_values): 23 | if policy._counter and policy._counter % LOG_INTERVAL == 0: 24 | print("\neffective epsilon: {} (selected min {}/{} times)".format( 25 | policy._min_counter/policy._counter, 26 | policy._min_counter, 27 | policy._counter, 28 | )) 29 | min_action = np.argmin(q_values) 30 | action = old_select_action(q_values) 31 | if action == min_action: 32 | policy._min_counter += 1 33 | policy._counter += 1 34 | return action 35 | 36 | return policy, policy # train and test policies are identical 37 | -------------------------------------------------------------------------------- /minecraft_deep_learning-source/processor.coco: -------------------------------------------------------------------------------- 1 | # Imports: 2 | import math 3 | import os.path 4 | from pprint import pprint 5 | 6 | import numpy as np 7 | from PIL import Image 8 | from rl.core import Processor 9 | 10 | from minecraft_deep_learning.constants import ( 11 | IMAGE_SIZE, 12 | DOWNSAMPLE, 13 | INPUT_SIZE, 14 | GRAYSCALE, 15 | OBSERVE_DEPTH, 16 | CHANNELS, 17 | WINDOW_SIZE, 18 | TRIM_HEIGHT, 19 | DEBUG, 20 | LOG_INTERVAL, 21 | DEBUG_LOG_INTERVAL, 22 | USE_REWARDS, 23 | POSITION_POTENTIAL, 24 | ANGLE_POTENTIAL, 25 | IMAGES_DIR, 26 | DISCOUNT_GAMMA, 27 | REWARD_POTENTIAL, 28 | ) 29 | from minecraft_deep_learning.display import ( 30 | create_screen, 31 | show_array, 32 | pump_events, 33 | ) 34 | 35 | # Processor: 36 | class MinecraftProcessor(Processor): 37 | """Convert gym_minecraft output into a form understood by keras-rl.""" 38 | handle_events = staticmethod(pump_events) 39 | 40 | def __init__(self, use_display=None, always_show_rewards=False): 41 | super(MinecraftProcessor, self).__init__() 42 | self.screen = None 43 | self.always_show_rewards = always_show_rewards 44 | self.set_prev_potential() 45 | if use_display ?? DEBUG: 46 | self.counter = -1 47 | self.use_display() 48 | 49 | def use_display(self): 50 | """Enable logging of observations to the screen.""" 51 | if self.screen is None: 52 | self.screen_size = IMAGE_SIZE 53 | self.screen = create_screen(self.screen_size) 54 | 55 | def process_step(self, observation, reward, done, info): 56 | """Process a step from the environment. Called by the agent.""" 57 | if DEBUG: 58 | self.counter += 1 59 | processed_step = ( 60 | self.process_observation(observation), 61 | self.process_reward(reward, info), 62 | done, 63 | self.process_info(info), 64 | ) 65 | if done: 66 | self.set_prev_potential() 67 | return processed_step 68 | 69 | def set_prev_potential(self, reward=None): 70 | """Set the previous reward for computing reward deltas.""" 71 | self.prev_potential = reward 72 | 73 | def process_observation(self, observation): 74 | assert IMAGE_SIZE is not None and observation.shape == IMAGE_SIZE + (3 + OBSERVE_DEPTH,), observation.shape 75 | processed_observation = ( 76 | observation 77 | |> Image.fromarray 78 | # PIL uses (width, height) not (height, width) 79 | |> (.resize(IMAGE_SIZE |> map$(-> _//DOWNSAMPLE) |> reversed |> tuple) 80 | if DOWNSAMPLE and DOWNSAMPLE != 1 else ->_) 81 | |> (.convert("L") if GRAYSCALE else ->_) 82 | |> np.asarray$(dtype="uint8") 83 | ) 84 | if TRIM_HEIGHT: 85 | height = processed_observation.shape[0] 86 | processed_observation = processed_observation[height//2:] 87 | assert processed_observation.shape == INPUT_SIZE + ((CHANNELS,) if CHANNELS > 1 else ()), processed_observation.shape 88 | 89 | if DEBUG and self.counter % LOG_INTERVAL == 0: 90 | image_file = os.path.join(IMAGES_DIR, "obs_{}.png".format(self.counter)) 91 | Image.fromarray(processed_observation).save(image_file) 92 | 93 | if self.screen is not None: 94 | display_image = ( 95 | processed_observation 96 | |> Image.fromarray 97 | |> .convert("RGB") 98 | # PIL uses (width, height) not (height, width) 99 | |> .resize(self.screen_size |> reversed |> tuple) 100 | |> np.asarray$(dtype="uint8") 101 | ) 102 | assert display_image.shape == self.screen_size + (3,), (display_image.shape, self.screen_size + (3,)) 103 | show_array(self.screen, display_image) 104 | self.handle_events() 105 | 106 | return processed_observation 107 | 108 | def process_state_batch(self, batch): 109 | assert batch.shape[1:] == (WINDOW_SIZE,) + INPUT_SIZE + ((CHANNELS,) if CHANNELS > 1 else ()), batch.shape 110 | # We could perform this processing step in `process_observation`. In this case, however, 111 | # we would need to store a `float32` array instead, which is 4x more memory intensive than 112 | # a `uint8` array. This matters if we store 1M observations. 113 | processed_batch = batch.astype("float32")/255.0 114 | if CHANNELS > 1: 115 | # merge WINDOW_SIZE and CHANNELS together 116 | processed_batch = ( 117 | np.moveaxis(processed_batch, -1, 2) 118 | |> .reshape((batch.shape[0], -1) + INPUT_SIZE) 119 | ) 120 | assert processed_batch.shape == (batch.shape[0], CHANNELS * WINDOW_SIZE) + INPUT_SIZE, processed_batch.shape 121 | return processed_batch 122 | 123 | def process_info(self, info): 124 | # dictionary-valued infos are not supported by keras-rl 125 | info["observation"] = ( 126 | list(info["observation"].items()) 127 | if info["observation"] is not None else [] 128 | ) 129 | if DEBUG and self.counter % LOG_INTERVAL == 0: 130 | pprint(info) 131 | return info 132 | 133 | @property 134 | def show_rewards(self) = 135 | DEBUG or self.always_show_rewards 136 | 137 | def process_reward(self, reward, info): 138 | if self.show_rewards: 139 | reward_history = [(reward, None)] 140 | 141 | reward = USE_REWARDS.get(reward, 0) 142 | if self.show_rewards: 143 | reward_history.append((reward, "using")) 144 | 145 | if reward == 0 and (REWARD_POTENTIAL ?? False) is not False and info["observation"] is not None: 146 | potential = 0 147 | 148 | if POSITION_POTENTIAL: 149 | reward_coords, metric = POSITION_POTENTIAL 150 | x = np.asarray([ 151 | info["observation"][coord.upper() + "Pos"] 152 | for coord in reward_coords |> sorted 153 | ]) 154 | y = np.asarray([ 155 | pos for coord, pos in reward_coords.items() |> sorted 156 | ]) 157 | potential += metric(x, y) 158 | if self.show_rewards: 159 | reward_history.append((potential, "position {}".format({ 160 | coord: x[i] for i, coord in reward_coords |> sorted |> enumerate 161 | }))) 162 | 163 | if ANGLE_POTENTIAL: 164 | desired_x, desired_z, metric = ANGLE_POTENTIAL 165 | x, z, raw_yaw = ("XPos", "ZPos", "Yaw") |> map$(info["observation"][]) 166 | # calculate yaw and desired_yaw to match up 167 | yaw = (raw_yaw + 90) % 360 168 | desired_yaw = ( 169 | math.atan2(z - desired_z, x - desired_x) + math.pi 170 | |> math.degrees 171 | ) 172 | potential += metric(yaw, desired_yaw) 173 | if self.show_rewards: 174 | reward_history.append((potential, "got angle {}; desired angle {}".format(yaw, desired_yaw))) 175 | 176 | if self.prev_potential is not None: 177 | reward += (DISCOUNT_GAMMA + REWARD_POTENTIAL)*potential - self.prev_potential 178 | self.set_prev_potential(potential) 179 | if self.show_rewards: 180 | reward_history.append((reward, "reward shaping {} - {}".format(potential, self.prev_potential))) 181 | 182 | if self.always_show_rewards or self.show_rewards and ( 183 | self.counter % DEBUG_LOG_INTERVAL == 0 184 | or abs(reward) >= 1 185 | ): 186 | prev_reward = reward_history[0][0] 187 | reward_str = "reward: {}".format(prev_reward) 188 | for i in range(1, len(reward_history)): 189 | reward_i, reason = reward_history[i] 190 | reward_diff = reward_i - prev_reward 191 | if reward_diff > 0: 192 | reward_str += "\t+ {}".format(reward_diff) 193 | elif reward_diff < 0: 194 | reward_str += "\t- {}".format(-reward_diff) 195 | reward_str += " ({})".format(reason) 196 | prev_reward = reward_i 197 | print("\n{}\n\t= {}".format(reward_str, reward)) 198 | 199 | return reward 200 | -------------------------------------------------------------------------------- /saved_weights/weights_final.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/evhub/minecraft-deep-learning/b80c33be66306debda5c1e3aa3d3f5adb1444af2/saved_weights/weights_final.h5f -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /setup.coco: -------------------------------------------------------------------------------- 1 | # Imports: 2 | import setuptools 3 | 4 | # Constants: 5 | NAME = "minecraft-deep-learning" 6 | VERSION = "0.0.1" 7 | DESCRIPTION = "Deep reinforcement learning in Minecraft using gym-minecraft and keras-rl." 8 | HOME_URL = "https://github.com/evhub/minecraft-deep-learning" 9 | AUTHOR = "Evan Hubinger" 10 | AUTHOR_EMAIL = "evanjhub@gmail.com" 11 | REQUIREMENTS = [ 12 | "gym", 13 | "keras", 14 | "keras-rl", 15 | "tensorflow-gpu", 16 | "pygame", 17 | "scipy", 18 | "quiver_engine", 19 | "progressbar2", 20 | ] 21 | 22 | # Setup: 23 | setuptools.setup( 24 | name=NAME, 25 | version=VERSION, 26 | description=DESCRIPTION, 27 | url=HOME_URL, 28 | author=AUTHOR, 29 | author_email=AUTHOR_EMAIL, 30 | install_requires=REQUIREMENTS, 31 | packages=setuptools.find_packages(), 32 | ) 33 | --------------------------------------------------------------------------------