├── common ├── __init__.py ├── action_info_types.py ├── random_agent.py ├── box_quantizer.py ├── interactive_agent.py ├── markov_ensemble.py ├── interactive_markov_agent.py ├── markov_model.py └── demo_recorder.py ├── examples ├── __init__.py ├── lunar_lander │ ├── __init__.py │ ├── lunar_lander.pickle │ ├── readme.md │ └── interactively_trainable_agent.py └── mountain_car │ ├── __init__.py │ ├── mountain_car.pickle │ ├── readme.md │ └── interactively_trainable_agent.py ├── .gitignore ├── notebooks ├── fps.pdf ├── fps.png ├── lunar_lander.pdf ├── lunar_lander.png ├── mountain_car.pdf ├── mountain_car.png ├── interactive_training.pdf ├── interactive_training.png └── explore_metrics.ipynb ├── requirements.txt ├── readme.md └── LICENSE /common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/lunar_lander/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/mountain_car/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE files 2 | .idea/ 3 | __pycache__ -------------------------------------------------------------------------------- /notebooks/fps.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electronicarts/interactive_training/HEAD/notebooks/fps.pdf -------------------------------------------------------------------------------- /notebooks/fps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electronicarts/interactive_training/HEAD/notebooks/fps.png -------------------------------------------------------------------------------- /notebooks/lunar_lander.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electronicarts/interactive_training/HEAD/notebooks/lunar_lander.pdf -------------------------------------------------------------------------------- /notebooks/lunar_lander.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electronicarts/interactive_training/HEAD/notebooks/lunar_lander.png -------------------------------------------------------------------------------- /notebooks/mountain_car.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electronicarts/interactive_training/HEAD/notebooks/mountain_car.pdf -------------------------------------------------------------------------------- /notebooks/mountain_car.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electronicarts/interactive_training/HEAD/notebooks/mountain_car.png -------------------------------------------------------------------------------- /notebooks/interactive_training.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electronicarts/interactive_training/HEAD/notebooks/interactive_training.pdf -------------------------------------------------------------------------------- /notebooks/interactive_training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electronicarts/interactive_training/HEAD/notebooks/interactive_training.png -------------------------------------------------------------------------------- /examples/lunar_lander/lunar_lander.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electronicarts/interactive_training/HEAD/examples/lunar_lander/lunar_lander.pickle -------------------------------------------------------------------------------- /examples/mountain_car/mountain_car.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/electronicarts/interactive_training/HEAD/examples/mountain_car/mountain_car.pickle -------------------------------------------------------------------------------- /examples/mountain_car/readme.md: -------------------------------------------------------------------------------- 1 | # Interactive Mountain Car 2 | 3 | To run: `python interactively_trainable_agent.py` 4 | 5 | Controls: 6 | 7 | - Left: Left Arrow 8 | - Right: Right Arrow 9 | - NoOp: Spacebar 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2019.3.9 2 | future==0.17.1 3 | gym==0.12.5 4 | numpy==1.16.4 5 | pyglet==1.3.2 6 | pyobjc-core==5.2 7 | pyobjc-framework-Cocoa==5.2 8 | pyobjc-framework-Quartz==5.2 9 | scipy==1.3.0 10 | singleton-decorator==1.0.0 11 | six==1.12.0 12 | -------------------------------------------------------------------------------- /examples/lunar_lander/readme.md: -------------------------------------------------------------------------------- 1 | # Interactive Lunar Lander 2 | 3 | To run: `python interactively_trainable_agent.py` 4 | 5 | Controls: 6 | 7 | - Left Thruster: Left Arrow 8 | - Bottom Thruster: Up Arrow 9 | - Right Thruster: Right Arrow 10 | - NoOp: Spacebar 11 | 12 | Note: 13 | 14 | Currently there are OpenAI Gym issues that might make this fail on MacOS. 15 | -------------------------------------------------------------------------------- /common/action_info_types.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2019 Electronic Arts Inc. All rights reserved. 2 | Enumeration of possible action info types.""" 3 | 4 | 5 | class ActionInfoType: 6 | human = "human" # Human provided the action 7 | agent = None # An agent other than Markov agent provided the action 8 | found = True # Markov agent provided the action 9 | not_found = False # Markov agent couldn't provide the action 10 | -------------------------------------------------------------------------------- /common/random_agent.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2019 Electronic Arts Inc. All rights reserved. 2 | Random agent is the base for other agents and allows base line random action sampling.""" 3 | from common.action_info_types import ActionInfoType 4 | 5 | 6 | class RandomAgent: 7 | def __init__(self, env): 8 | self.env = env 9 | 10 | def get_action(self, observation, reward, done, info): 11 | """ Can return an additional object describing the action. """ 12 | return self.env.action_space.sample(), ActionInfoType.agent 13 | 14 | -------------------------------------------------------------------------------- /common/box_quantizer.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2019 Electronic Arts Inc. All rights reserved. 2 | The module provides simple uniform quantization under the assumption that 3 | the range of state variables is known. We expect observations represented 4 | as a numpy array and quantize them into bins of size defined by fidelity. """ 5 | import numpy as np 6 | 7 | 8 | class BoxQuantizer: 9 | def __init__(self, box, fidelity): 10 | self.box_low = box[0] 11 | self.box_high = box[1] 12 | self.size = box[1] - box[0] 13 | self.fidelity = fidelity 14 | 15 | def quantize(self, vec): 16 | return tuple(np.floor((vec - self.box_low)/self.size * self.fidelity)) 17 | 18 | if __name__ == "__main__": 19 | box = [np.array([-2, -1]), np.array([2, 1])] 20 | fidelity = np.array([20, 5]) 21 | bq = BoxQuantizer(box, fidelity) 22 | vec = np.array([-0, 0.2]) 23 | print(bq.quantize(vec)) 24 | -------------------------------------------------------------------------------- /common/interactive_agent.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2019 Electronic Arts Inc. All rights reserved. 2 | An interactive agent allows a human player to override the actions of an agent 3 | supplied in the class instance constructor. """ 4 | from common.action_info_types import ActionInfoType 5 | 6 | 7 | class InteractiveAgent: 8 | def __init__(self, key_to_action, agent): 9 | self.key_to_action = key_to_action 10 | self.auto_agent = agent 11 | self.latest_key = None 12 | 13 | def get_action(self, observation, reward, done, info): 14 | action = self.key_to_action.get(self.latest_key) 15 | if action is not None: 16 | return action, ActionInfoType.human 17 | else: 18 | return self.auto_agent.get_action(observation, reward, done, info) 19 | 20 | def on_press(self, key, mod): 21 | self.latest_key = key 22 | 23 | def on_release(self, key, mod): 24 | self.latest_key = None 25 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Interactive Training 2 | 3 | The repository provides two minimal examples of Markov Ensemble discussed in the paper "Towards Interactive Training of Non-Player Characters in Video Games" (http://arxiv.org/abs/1906.00535) presented at 2019 ICML Workshop on Human in the Loop Learning (HILL 2019), Long Beach, USA. 4 | 5 | ## Credits 6 | + Igor Borovikov - iborovikov@ea.com 7 | + Jesse Harder - jharder@ea.com 8 | 9 | ## Project Structure 10 | 11 | - **common/** - a source of code files used throughout the project. 12 | - **examples/** - contains two example demonstrations that can be run: lunar_lander and mountain_car. 13 | - **notebooks/** - contains a Jupyter Notebook and various files produced by the notebook regarding performance in the example environments. 14 | 15 | ## Running Examples 16 | 17 | To run the examples in this project, navigate to the desired folder under `examples/`. Within either `lunar_lander/` or `mountain_car/`, run `python interactively_trainable_agent.py`. Each folder contains a readme with more information on running that example. 18 | 19 | 20 | ## License 21 | Modified BSD License (3-Clause BSD license) see the file LICENSE in the project root. 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2019 Electronic Arts Inc. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 3. Neither the name of Electronic Arts, Inc. ("EA") nor the names of 13 | its contributors may be used to endorse or promote products derived 14 | from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY ELECTRONIC ARTS AND ITS CONTRIBUTORS "AS IS" AND ANY 17 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL ELECTRONIC ARTS OR ITS CONTRIBUTORS BE LIABLE FOR ANY 20 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /common/markov_ensemble.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2019 Electronic Arts Inc. All rights reserved. 2 | The classes cover a range of Markov model orders from min_order to max_order, 3 | constructed from the same sequences of observations and raw actions. Such models 4 | represent the first step in building models ensemble (see Algorithm 1 in the paper). """ 5 | from common.action_info_types import ActionInfoType 6 | from common.markov_model import MarkovModel 7 | 8 | 9 | class MarkovEnsemble: 10 | def __init__(self, observations, actions, min_order, max_order, quantizers): 11 | self.models = [MarkovModel(observations, actions, m, q) 12 | for m in range(min_order, max_order) 13 | for q in quantizers] 14 | 15 | def get_action(self, observations, actions): 16 | for mcm in self.models[::-1]: 17 | action, found = mcm.next_action(observations, actions) 18 | if found is True: 19 | return action, ActionInfoType.found 20 | return None, ActionInfoType.not_found 21 | 22 | 23 | class MarkovEnsembleStack: 24 | def __init__(self, min_order, max_order, quantizers): 25 | self.min_order = min_order 26 | self.max_order = max_order 27 | self.quantizers = quantizers 28 | self.stack = [] 29 | 30 | def add_demo(self, observations, actions): 31 | self.stack.append(MarkovEnsemble( 32 | observations, actions, self.min_order, self.max_order, self.quantizers 33 | )) 34 | 35 | def get_action(self, observations, actions): 36 | for model in self.stack: 37 | action, action_info = model.get_action(observations, actions) 38 | if action_info: 39 | return action, action_info 40 | return None, ActionInfoType.not_found 41 | 42 | -------------------------------------------------------------------------------- /common/interactive_markov_agent.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2019 Electronic Arts Inc. All rights reserved. 2 | InteractiveMarkovAgent class combines interactive player input with the input from the ensemble model. 3 | Also, it updates the ensemble as needed to capture new demonstrations from the player. """ 4 | from common.action_info_types import ActionInfoType 5 | from common.interactive_agent import InteractiveAgent 6 | from common.demo_recorder import DemoRecorder 7 | 8 | 9 | class InteractiveMarkovAgent(InteractiveAgent): 10 | def __init__(self, key_to_action, agent, ensemble_stack): 11 | super().__init__(key_to_action, agent) 12 | self.demos = DemoRecorder() 13 | self.ensemble = ensemble_stack 14 | 15 | def get_action(self, observation, reward, done, info): 16 | action = None 17 | action_info = None 18 | 19 | # Check if there is human input and record that as the next action to take 20 | human_action = self.key_to_action.get(self.latest_key) 21 | if human_action is not None: 22 | action = human_action 23 | action_info = ActionInfoType.human 24 | 25 | # If there is no human input, check for an action computed by the ensemble model 26 | if action is None: 27 | ensemble_action, action_found = self.ensemble.get_action(self.demos.observations, self.demos.actions) 28 | if action_found: 29 | action = ensemble_action 30 | action_info = ActionInfoType.found 31 | 32 | # If ensemble didn't produce an action, let the auto play agent to produce one: 33 | if action is None: 34 | action, action_info = self.auto_agent.get_action(observation, reward, done, info) 35 | 36 | # Record action regardless of its origin and update ensemble as needed 37 | demo_status = self.demos.update(action, action_info, observation, reward, done) 38 | if DemoRecorder.ended == demo_status or (action_info == "human" and done): 39 | self.ensemble.add_demo(self.demos.demos[-1]["observations"], 40 | self.demos.demos[-1]["actions"]) 41 | 42 | return action, action_info 43 | -------------------------------------------------------------------------------- /examples/mountain_car/interactively_trainable_agent.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2019 Electronic Arts Inc. All rights reserved. 2 | The module demonstrates interactively trainable Mountain Car agent based on Markov Ensemble.""" 3 | import gym 4 | import numpy as np 5 | import sys 6 | sys.path.append('../../') 7 | 8 | from common.action_info_types import ActionInfoType 9 | from common.box_quantizer import BoxQuantizer 10 | from common.interactive_markov_agent import InteractiveMarkovAgent 11 | from common.markov_ensemble import MarkovEnsembleStack 12 | from common.random_agent import RandomAgent 13 | 14 | 15 | if __name__ == '__main__': 16 | env = gym.make("MountainCar-v0") 17 | 18 | # RandomAgent provides default actions: 19 | random_agent = RandomAgent(env) 20 | 21 | # Ensemble and its parameters: 22 | min_order = 0 23 | max_order = 6 24 | box = [np.array([-1.3, -0.1]), np.array([0.7, 0.1])] # The actual box: position -1.2 0.6; velocity -0.07 0.07 25 | quantizers = [BoxQuantizer(box, np.array([2**k, 2**k])).quantize for k in range(3, 10)] 26 | mes = MarkovEnsembleStack(min_order, max_order, quantizers) 27 | 28 | key_to_action = { 29 | 65361: 0, # Left 30 | 32: 1, # Space 31 | 65363: 2 # Right 32 | } 33 | interactive_agent = InteractiveMarkovAgent(key_to_action, random_agent, mes) 34 | 35 | observation, reward, done, info = env.reset(), None, False, None 36 | interactive_agent.demos.update(None, ActionInfoType.agent, observation, reward, done) 37 | 38 | episode_id = 0 39 | for _ in range(15000): 40 | env.render() 41 | env.unwrapped.viewer.window.on_key_press = interactive_agent.on_press 42 | env.unwrapped.viewer.window.on_key_release = interactive_agent.on_release 43 | 44 | action, _ = interactive_agent.get_action(observation, reward, done, info) 45 | observation, reward, done, info = env.step(action) 46 | 47 | if done: 48 | episode_id += 1 49 | print("Episode", episode_id, "ended.") 50 | interactive_agent.demos.end_episode(reward, 'mountain_car.pickle') 51 | observation, reward, done, info = env.reset(), None, False, None 52 | interactive_agent.demos.update(None, ActionInfoType.agent, observation, reward, done) 53 | 54 | env.close() 55 | -------------------------------------------------------------------------------- /common/markov_model.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2019 Electronic Arts Inc. All rights reserved. 2 | A Markov model defines probabilities of transitions in a stochastic system. 3 | Here, using demonstration episodes (games played by a human player), 4 | we compute the probabilities of the next action in the currently observed state 5 | and with N actions already taken. The model may not necessarily find such an 6 | action if the game state together with the sequence of previous actions 7 | was never observed in the human play-through. 8 | 9 | This model doesn't build an explicit dictionary of frequencies; instead, it keeps 10 | the original user input and collects pointers into it. This way we simplify 11 | sampling of the original continuous channels which helps to preserve temporal 12 | coherence of the model playback better and reproduce the distribution of 13 | the observed inputs. 14 | """ 15 | from collections import defaultdict 16 | import numpy as np 17 | from common.action_info_types import ActionInfoType 18 | 19 | 20 | class MarkovModel: 21 | def __init__(self, states, actions, model_order, state_preproc=lambda x: x): 22 | self.order = model_order 23 | self.state_preproc = state_preproc 24 | self.states = [self.state_preproc(s) for s in states] 25 | self.actions = actions 26 | self.ngram_pointers = defaultdict(list) 27 | self.last_action_idx = 0 28 | self._build_dictionary() 29 | 30 | def _build_dictionary(self): 31 | """Key is last state and N actions preceding it. """ 32 | sequence_length = len(self.actions) 33 | for ii in range(sequence_length - self.order): 34 | ngram = tuple([self.states[ii + self.order - 1]] + self.actions[ii: ii + self.order]) 35 | self.ngram_pointers[ngram].append(ii) 36 | 37 | def next_action(self, states, actions): 38 | """ Expects states and actions already being preprocessed. """ 39 | last_state = self.state_preproc(states[-1]) 40 | ngram = tuple([last_state] + list(actions[-self.order:])) 41 | occurrences = self.ngram_pointers.get(ngram, []) 42 | if not occurrences: 43 | return None, ActionInfoType.not_found 44 | pick_id = 0 if len(occurrences) == 1 else np.random.choice(range(len(occurrences))) 45 | idx = occurrences[pick_id] 46 | self.last_action_idx = idx 47 | return self.actions[idx + self.order], ActionInfoType.found 48 | -------------------------------------------------------------------------------- /examples/lunar_lander/interactively_trainable_agent.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2019 Electronic Arts Inc. All rights reserved. 2 | The module demonstrates interactively trainable Lunar Lander agent based on Markov Ensemble.""" 3 | import gym 4 | import time 5 | import numpy as np 6 | import sys 7 | sys.path.append('../../') 8 | 9 | from common.action_info_types import ActionInfoType 10 | from common.box_quantizer import BoxQuantizer 11 | from common.interactive_markov_agent import InteractiveMarkovAgent 12 | from common.markov_ensemble import MarkovEnsembleStack 13 | from common.random_agent import RandomAgent 14 | 15 | 16 | if __name__ == '__main__': 17 | env = gym.make("LunarLander-v2") 18 | print(env.observation_space) 19 | 20 | # Provides default actions: 21 | random_agent = RandomAgent(env) 22 | 23 | # Ensemble and its parameters: 24 | min_order = 0 25 | max_order = 6 26 | box = [np.array([-10]*8), np.array([10]*8)] 27 | quantizers = [BoxQuantizer(box, np.array([2**k]*8)).quantize for k in range(3, 12)] 28 | mes = MarkovEnsembleStack(min_order, max_order, quantizers) 29 | 30 | # See OpenAI documentation for the definition of the state-action space for the lander environment. 31 | # https://gym.openai.com/envs/LunarLander-v2/ 32 | key_to_action = { 33 | 32: 0, # No action, stops all thrusters. Space. 34 | 65363: 1, # Right thruster. Right arrow. 35 | 65362: 2, # Main thruster. Up arrow. 36 | 65361: 3 # Left thruster. Left Arrow. 37 | } 38 | interactive_agent = InteractiveMarkovAgent(key_to_action, random_agent, mes) 39 | 40 | observation, reward, done, info = env.reset(), 0, False, None 41 | interactive_agent.demos.update(None, ActionInfoType.agent, observation, reward, done) 42 | 43 | episode_id = 0 44 | for _ in range(15000): 45 | env.render() 46 | env.unwrapped.viewer.window.on_key_press = interactive_agent.on_press 47 | env.unwrapped.viewer.window.on_key_release = interactive_agent.on_release 48 | 49 | action, _ = interactive_agent.get_action(observation, reward, done, info) 50 | observation, reward, done, info = env.step(action) 51 | time.sleep(0.1) # Simplifies human interaction. 52 | 53 | if done: 54 | episode_id += 1 55 | print("Episode", episode_id, "ended.") 56 | interactive_agent.demos.end_episode(reward, 'lunar_lander.pickle') 57 | time.sleep(1) # Simplifies human interaction. 58 | observation, reward, done, info = env.reset(), 0, False, None 59 | interactive_agent.demos.update(None, ActionInfoType.agent, observation, reward, done) 60 | 61 | env.close() 62 | -------------------------------------------------------------------------------- /common/demo_recorder.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2019 Electronic Arts Inc. All rights reserved. 2 | DemoRecorder instance records observations and actions, keeps separate 3 | records for human demonstrations. It can save episodes as native pickled files 4 | with references only to numpy and defaultdict. We use these demonstrations 5 | to build Markov Ensembles. """ 6 | import pickle 7 | from collections import defaultdict 8 | from common.action_info_types import ActionInfoType 9 | 10 | 11 | class DemoRecorder: 12 | ended = 1 13 | in_progress = 2 14 | no_update = 3 15 | 16 | def __init__(self): 17 | self.curr_episode = defaultdict(list) 18 | self.episodes = [] 19 | self.curr_demo = defaultdict(list) 20 | self.demos = [] 21 | 22 | def end_episode(self, reward, save_file="episodes.pickle"): 23 | self.rewards.append(reward) 24 | print('Total reward:', sum([r for r in self.rewards if r is not None])) 25 | if self.curr_demo: 26 | self.demos.append(self.curr_demo) 27 | self.curr_demo = defaultdict(list) 28 | self.episodes.append(self.curr_episode) 29 | self.curr_episode = defaultdict(list) 30 | if save_file: 31 | with open(save_file, "wb") as outfile: 32 | pickle.dump(self.episodes, outfile, protocol=pickle.HIGHEST_PROTOCOL) 33 | 34 | @property 35 | def observations(self): 36 | return self.curr_episode['observations'] 37 | 38 | @property 39 | def actions(self): 40 | return self.curr_episode['actions'] 41 | 42 | @property 43 | def action_infos(self): 44 | return self.curr_episode['action_infos'] 45 | 46 | @property 47 | def rewards(self): 48 | return self.curr_episode['rewards'] 49 | 50 | def update(self, action, action_info, observation, reward, done): 51 | self.observations.append(observation) 52 | self.actions.append(action) 53 | self.action_infos.append(action_info) 54 | self.rewards.append(reward) 55 | 56 | human_input_stopped = len(self.action_infos) > 1 and \ 57 | self.action_infos[-2] == ActionInfoType.human and \ 58 | action_info == ActionInfoType.agent 59 | 60 | if self.curr_demo and human_input_stopped: 61 | self.demos.append(self.curr_demo) 62 | self.curr_demo = defaultdict(list) 63 | return self.ended 64 | 65 | if action_info is not None and not done: 66 | self.curr_demo["observations"].append(observation) 67 | self.curr_demo["actions"].append(action) 68 | return self.in_progress 69 | 70 | return self.no_update 71 | -------------------------------------------------------------------------------- /notebooks/explore_metrics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Copyright (C) 2019 Electronic Arts Inc. All rights reserved.\n", 10 | "import os\n", 11 | "from collections import defaultdict\n", 12 | "import numpy as np\n", 13 | "import pandas as pd\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "%matplotlib inline" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | "50 episodes loaded.\n" 28 | ] 29 | }, 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "dict_keys(['observations', 'actions', 'action_infos'])" 34 | ] 35 | }, 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": [ 42 | "import pickle\n", 43 | "with open('..\\\\examples\\mountain_car\\\\mountain_car.pickle', \"rb\") as infile:\n", 44 | " episodes = pickle.load(infile)\n", 45 | "print(len(episodes), \"episodes loaded.\")\n", 46 | "episodes[0].keys()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 4, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "max_x = [max([ob[0] for ob in episode['observations']]) for episode in episodes]\n", 56 | "# print(\"Max x\", max_x)\n", 57 | "\n", 58 | "human_inputs = [len([info for info in episode['action_infos'] if info==\"human\"]) for episode in episodes]\n", 59 | "# print(\"Num human inputs\", human_inputs)\n", 60 | "\n", 61 | "ensemble_inputs = [len([info for info in episode['action_infos'] if info==True]) for episode in episodes]\n", 62 | "# print(\"Num sccessful ensemble inputs\", ensemble_inputs)\n", 63 | "\n", 64 | "failed_ensemble_inputs = [len([info for info in episode['action_infos'] if info==False]) for episode in episodes]\n", 65 | "# print(\"Num failed ensemble inputs\", failed_ensemble_inputs)\n", 66 | "\n", 67 | "random_inputs = [len([info for info in episode['action_infos'] if info==None]) for episode in episodes]\n", 68 | "# print('Random inputs', random_inputs)\n", 69 | "\n", 70 | "episode_lengths = [len(episode['actions']) for episode in episodes]\n", 71 | "# print('Episode lengths', episode_lengths)\n", 72 | "\n", 73 | "# Number of good human actions, i.e., when sign of velocity is same as the direction of input\n", 74 | "good_human_inputs = [len([ (a, ob, info) for a, ob, info in zip(episode['actions'], episode['observations'], episode['action_infos']) if info == \"human\" and (a - 1)*ob[0] >= 0]) for episode in episodes]\n", 75 | "# print('\"Good\" human inputs count', good_human_inputs)\n", 76 | "\n", 77 | "# Number of good ensemble actions, i.e., when sign of velocity is same as the direction of input\n", 78 | "good_ensemble_inputs = [len([ (a, ob, info) for a, ob, info in zip(episode['actions'], episode['observations'], episode['action_infos']) if info == True and (a - 1)*ob[0] >= 0]) for episode in episodes]\n", 79 | "# print('Good ensemble inputs count', good_ensemble_inputs)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "image/png": "\n", 90 | "text/plain": [ 91 | "
" 92 | ] 93 | }, 94 | "metadata": {}, 95 | "output_type": "display_data" 96 | } 97 | ], 98 | "source": [ 99 | "t = range(1, len(max_x) + 1)\n", 100 | "\n", 101 | "# fig, ax = plt.subplots(figsize=(12,7))\n", 102 | "fig, ax = plt.subplots(figsize=(7,6))\n", 103 | "\n", 104 | "ax1 = plt.subplot(3, 1, 1)\n", 105 | "# color = 'red'\n", 106 | "ax1.set_ylabel('Performance\\n(goal $x = 0.5$)')\n", 107 | "ax1.plot(t, max_x, label=\"Best $x$\")\n", 108 | "ax1.tick_params(axis='y')\n", 109 | "ax1.legend(loc='upper left')\n", 110 | "plt.title(\"Performance of interactive Markov Ensemble for Mountain Car\")\n", 111 | "\n", 112 | "ax2 = plt.subplot(3, 1, 2)\n", 113 | "color = 'tab:red'\n", 114 | "ax2.set_ylabel('$N$ actions from\\nMarkov Ensemble') # we already handled the x-label with ax1\n", 115 | "ax2.plot(t, ensemble_inputs, label='All')\n", 116 | "ax2.plot(t, good_ensemble_inputs, label='Add energy', linestyle=\"--\")\n", 117 | "ax2.tick_params(axis='y')\n", 118 | "ax2.legend()\n", 119 | "plt.title(\"Actions produced by Markov Ensemble\")\n", 120 | "\n", 121 | "ax3 = plt.subplot(3, 1, 3)\n", 122 | "ax3.set_ylabel('$N$ actions\\nfrom human')\n", 123 | "ax3.bar(t, human_inputs, label='All')\n", 124 | "ax3.bar(t, good_human_inputs, width=0.65, label='Add energy')\n", 125 | "ax3.set_ylim([0,200])\n", 126 | "ax3.legend(loc='upper left')\n", 127 | "ax3.set_xlabel('Episode')\n", 128 | "plt.title(\"Human actions\")\n", 129 | "\n", 130 | "fig.tight_layout() # otherwise the right y-label is slightly clipped\n", 131 | "plt.savefig('mountain_car.pdf')\n", 132 | "plt.savefig('mountain_car.png')\n", 133 | "plt.show()\n", 134 | "plt.close()" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 6, 140 | "metadata": {}, 141 | "outputs": [ 142 | { 143 | "name": "stdout", 144 | "output_type": "stream", 145 | "text": [ 146 | "60 episodes loaded.\n" 147 | ] 148 | }, 149 | { 150 | "data": { 151 | "text/plain": [ 152 | "dict_keys(['observations', 'actions', 'action_infos', 'rewards'])" 153 | ] 154 | }, 155 | "execution_count": 6, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "with open('..\\\\examples\\\\lunar_lander\\\\lunar_lander.pickle', \"rb\") as infile:\n", 162 | " episodes = pickle.load(infile)\n", 163 | "print(len(episodes), \"episodes loaded.\")\n", 164 | "episodes[0].keys()" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 9, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "rewards = [sum(episode['rewards']) for episode in episodes]\n", 174 | "# print(\"Rewards\", rewards)\n", 175 | "\n", 176 | "# TODO: detect first human input\n", 177 | "ma0 = np.mean(rewards[:10])\n", 178 | "# print('ma0:', ma0)\n", 179 | "ma1 = [np.mean(rewards[10+i:10+i+10]) for i in range(len(rewards)- 10 - 10)]\n", 180 | "# print('ma1:',ma1)\n", 181 | "\n", 182 | "human_inputs = [len([info for info in episode['action_infos'] if info==\"human\"]) for episode in episodes]\n", 183 | "# print(\"Num human inputs\", human_inputs)\n", 184 | "\n", 185 | "ensemble_inputs = [len([info for info in episode['action_infos'] if info==True]) for episode in episodes]\n", 186 | "# print(\"Num sccessful ensemble inputs\", ensemble_inputs)\n", 187 | "\n", 188 | "failed_ensemble_inputs = [len([info for info in episode['action_infos'] if info==False]) for episode in episodes]\n", 189 | "# print(\"Num failed ensemble inputs\", failed_ensemble_inputs)\n", 190 | "\n", 191 | "random_inputs = [len([info for info in episode['action_infos'] if info==None]) for episode in episodes]\n", 192 | "# print('Random inputs', random_inputs)\n", 193 | "\n", 194 | "episode_lengths = [len(episode['actions']) for episode in episodes]\n", 195 | "# print('Episode lengths', episode_lengths)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 11, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "data": { 205 | "image/png": "\n", 206 | "text/plain": [ 207 | "
" 208 | ] 209 | }, 210 | "metadata": {}, 211 | "output_type": "display_data" 212 | } 213 | ], 214 | "source": [ 215 | "t = range(len(rewards))\n", 216 | "\n", 217 | "# fig, ax = plt.subplots(figsize=(12,7))\n", 218 | "fig, ax = plt.subplots(figsize=(7,6))\n", 219 | "\n", 220 | "ax1 = plt.subplot(3, 1, 1)\n", 221 | "color = 'red'\n", 222 | "ax1.set_ylabel('Performance')\n", 223 | "ax1.plot(t, rewards, label=\"Reward\")\n", 224 | "ma = [ma0 for _ in range(10)] + [ma1[0] for _ in range(10)] + ma1\n", 225 | "# print(ma)\n", 226 | "ax1.plot(t, ma, label=\"Avg Reward\", linestyle=\"--\")\n", 227 | "# ax1.plot(range(20, len(rewards)), ma1, label=\"Reward MA(10)\", linestyle=\"--\")\n", 228 | "ax1.tick_params(axis='y')\n", 229 | "ax1.legend(loc='lower right')\n", 230 | "plt.title(\"Performance of interactive Markov Ensemble for Lunar Lander\")\n", 231 | "\n", 232 | "ax2 = plt.subplot(3, 1, 2)\n", 233 | "color = 'tab:red'\n", 234 | "ax2.set_ylabel('$N$ actions from\\nMarkov Ensemble') # we already handled the x-label with ax1\n", 235 | "ax2.plot(t, ensemble_inputs, label='Ensemble actions')\n", 236 | "# ax2.plot(t, good_ensemble_inputs, label='Ensemble actions: \"Good\"\\n(adding kinetic energy)', linestyle=\"--\")\n", 237 | "ax2.tick_params(axis='y')\n", 238 | "ax2.legend()\n", 239 | "plt.title(\"Actions by Markov Ensemble\")\n", 240 | "\n", 241 | "ax3 = plt.subplot(3, 1, 3)\n", 242 | "ax3.set_ylabel('$N$ actions\\nfrom human')\n", 243 | "ax3.bar(t, human_inputs, label='Human actions')\n", 244 | "# ax3.bar(t, good_human_inputs, width=0.65, label='Human actions: \"Good\"\\n(adding kinetic energy)')\n", 245 | "# ax3.set_ylim([0,200])\n", 246 | "ax3.legend(loc='upper right')\n", 247 | "ax3.set_xlabel('Episode')\n", 248 | "plt.title(\"Human actions\")\n", 249 | "\n", 250 | "fig.tight_layout()\n", 251 | "plt.savefig('lunar_lander.pdf')\n", 252 | "plt.savefig('lunar_lander.png')\n", 253 | "plt.show()\n", 254 | "plt.close()" 255 | ] 256 | } 257 | ], 258 | "metadata": { 259 | "kernelspec": { 260 | "display_name": "Python 3", 261 | "language": "python", 262 | "name": "python3" 263 | }, 264 | "language_info": { 265 | "codemirror_mode": { 266 | "name": "ipython", 267 | "version": 3 268 | }, 269 | "file_extension": ".py", 270 | "mimetype": "text/x-python", 271 | "name": "python", 272 | "nbconvert_exporter": "python", 273 | "pygments_lexer": "ipython3", 274 | "version": "3.6.5" 275 | } 276 | }, 277 | "nbformat": 4, 278 | "nbformat_minor": 2 279 | } 280 | --------------------------------------------------------------------------------