├── babyai_levels.png ├── babyaiPP ├── __init__.py ├── descriptive_levels.py ├── decriptive_level_base.py ├── additional_levels.py └── dynamics_levels.py ├── .gitmodules ├── scripts ├── train_att_fusion_agent_redball.sh ├── train_concat_fusion_agent_redball.sh ├── train_film_agent_redball.sh └── train_image_only_agent_redball.sh ├── experiment ├── bAIPlusgui.py ├── train_il.py ├── batch_evaluate.py ├── evaluate.py ├── arguments.py ├── make_agent_demos.py ├── train_rl.py ├── model.py └── imitation.py ├── LICENSE ├── .gitignore └── README.md /babyai_levels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caotians1/BabyAIPlusPlus/HEAD/babyai_levels.png -------------------------------------------------------------------------------- /babyaiPP/__init__.py: -------------------------------------------------------------------------------- 1 | from . import descriptive_levels 2 | from . import dynamics_levels 3 | from . import decriptive_level_base 4 | from . import additional_levels -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "gym-minigrid"] 2 | path = gym-minigrid 3 | url = https://github.com/caotians1/gym-minigrid.git 4 | branch = master 5 | 6 | [submodule "babyai"] 7 | path = babyai 8 | url = https://github.com/caotians1/babyai.git 9 | branch = master 10 | -------------------------------------------------------------------------------- /scripts/train_att_fusion_agent_redball.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | frames=5000000 3 | batch_size=12800 4 | procs=256 5 | log_interval=2 6 | env=BabyAI-GoTo_RedBallDynamics_Train-v0 7 | 8 | python experiment/train_rl.py --env $env --arch fusion --tb --frames $frames --batch-size $batch_size --procs $procs --log-interval $log_interval 9 | -------------------------------------------------------------------------------- /scripts/train_concat_fusion_agent_redball.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | frames=5000000 3 | batch_size=12800 4 | procs=256 5 | log_interval=2 6 | env=BabyAI-GoTo_RedBallDynamics_Train-v0 7 | 8 | python experiment/train_rl.py --env $env --arch cnn --tb --frames $frames --batch-size $batch_size --procs $procs --log-interval $log_interval 9 | -------------------------------------------------------------------------------- /scripts/train_film_agent_redball.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | frames=5000000 3 | batch_size=12800 4 | procs=256 5 | log_interval=2 6 | env=BabyAI-GoTo_RedBallDynamics_Train-v0 7 | 8 | python experiment/train_rl.py --env $env --arch expert_filmcnn --tb --frames $frames --batch-size $batch_size --procs $procs --log-interval $log_interval 9 | -------------------------------------------------------------------------------- /scripts/train_image_only_agent_redball.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | frames=5000000 3 | batch_size=12800 4 | procs=256 5 | log_interval=2 6 | env=BabyAI-GoTo_RedBallDynamics_Train-v0 7 | 8 | python experiment/train_rl.py --env $env --arch cnn --no-desc --tb --frames $frames --batch-size $batch_size --procs $procs --log-interval $log_interval 9 | -------------------------------------------------------------------------------- /experiment/bAIPlusgui.py: -------------------------------------------------------------------------------- 1 | """ 2 | Quick script to register babyAI++ levels before passing to babyAI gui. 3 | """ 4 | 5 | import sys 6 | import os 7 | sys.path.append(os.getcwd()) 8 | sys.path.append(os.path.join(os.getcwd(),"babyai/scripts")) 9 | import babyaiPP 10 | import gui 11 | 12 | if __name__ == '__main__': 13 | gui.main(sys.argv) 14 | -------------------------------------------------------------------------------- /babyaiPP/descriptive_levels.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from babyai.levels.verifier import * 3 | from babyai.levels.levelgen import register_levels 4 | from babyai.levels.iclr19_levels import Level_GoTo 5 | from .decriptive_level_base import DescriptiveLevel 6 | from .dynamics_levels import DynamicsLevel 7 | 8 | class Level_GoTo_Desc(DescriptiveLevel, Level_GoTo): 9 | def __init__(self, 10 | room_size=8, 11 | num_rows=3, 12 | num_cols=3, 13 | num_dists=18, 14 | doors_open=False, 15 | seed=None 16 | ): 17 | 18 | DescriptiveLevel.__init__(self, 1, 1.0) 19 | Level_GoTo.__init__(self, room_size, num_rows, num_cols, num_dists, doors_open, seed) 20 | 21 | class Level_GoTo_DescDynamic(DescriptiveLevel, DynamicsLevel, Level_GoTo): 22 | def __init__(self, 23 | room_size=8, 24 | num_rows=3, 25 | num_cols=3, 26 | num_dists=18, 27 | doors_open=False, 28 | seed=None): 29 | DynamicsLevel.__init__(self, [1, 3, 5], 3, seed, 1, 1.0, room_size, num_rows, num_cols, num_dists, doors_open, seed) 30 | 31 | 32 | register_levels(__name__, {'Level_GoTo_Desc':Level_GoTo_Desc, 'Level_GoTo_DescDynamic':Level_GoTo_DescDynamic}) 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, caotians1 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | logs/ 2 | models/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | .idea/ 133 | .idea/modules.xml 134 | .idea/workspace.xml 135 | -------------------------------------------------------------------------------- /experiment/train_il.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Script to train agent through imitation learning using demonstrations. 5 | """ 6 | 7 | import os 8 | import sys 9 | sys.path.append(os.getcwd()) 10 | 11 | import csv 12 | import copy 13 | import gym 14 | import time 15 | import datetime 16 | import numpy as np 17 | import sys 18 | import logging 19 | import torch 20 | import babyaiPP 21 | import babyai.utils as utils 22 | 23 | from experiment.arguments import ArgumentParser 24 | from experiment.imitation import ImitationLearning 25 | 26 | # Parse arguments 27 | parser = ArgumentParser() 28 | parser.add_argument("--demos", default=None, 29 | help="demos filename (REQUIRED or demos-origin or multi-demos required)") 30 | parser.add_argument("--demos-origin", required=False, 31 | help="origin of the demonstrations: human | agent (REQUIRED or demos or multi-demos required)") 32 | parser.add_argument("--episodes", type=int, default=0, 33 | help="number of episodes of demonstrations to use" 34 | "(default: 0, meaning all demos)") 35 | parser.add_argument("--multi-env", nargs='*', default=None, 36 | help="name of the environments used for validation/model loading") 37 | parser.add_argument("--multi-demos", nargs='*', default=None, 38 | help="demos filenames for envs to train on (REQUIRED when multi-env is specified)") 39 | parser.add_argument("--multi-episodes", type=int, nargs='*', default=None, 40 | help="number of episodes of demos to use from each file (REQUIRED when multi-env is specified)") 41 | parser.add_argument("--save-interval", type=int, default=1, 42 | help="number of epochs between two saves (default: 1, 0 means no saving)") 43 | 44 | 45 | def main(args): 46 | # Verify the arguments when we train on multiple environments 47 | # No need to check for the length of len(args.multi_env) in case, for some reason, we need to validate on other envs 48 | if args.multi_env is not None: 49 | assert len(args.multi_demos) == len(args.multi_episodes) 50 | 51 | args.model = args.model or ImitationLearning.default_model_name(args) 52 | utils.configure_logging(args.model) 53 | logger = logging.getLogger(__name__) 54 | 55 | print (args) 56 | il_learn = ImitationLearning(args) 57 | 58 | # Define logger and Tensorboard writer 59 | header = (["update", "frames", "FPS", "duration", "entropy", "policy_loss", "train_accuracy"] 60 | + ["validation_accuracy"]) 61 | if args.multi_env is None: 62 | header.extend(["validation_return", "validation_success_rate"]) 63 | else: 64 | header.extend(["validation_return_{}".format(env) for env in args.multi_env]) 65 | header.extend(["validation_success_rate_{}".format(env) for env in args.multi_env]) 66 | writer = None 67 | if args.tb: 68 | from tensorboardX import SummaryWriter 69 | writer = SummaryWriter(utils.get_log_dir(args.model)) 70 | 71 | # Define csv writer 72 | csv_writer = None 73 | csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') 74 | first_created = not os.path.exists(csv_path) 75 | # we don't buffer data going in the csv log, cause we assume 76 | # that one update will take much longer that one write to the log 77 | csv_writer = csv.writer(open(csv_path, 'a', 1)) 78 | if first_created: 79 | csv_writer.writerow(header) 80 | 81 | # Get the status path 82 | status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') 83 | 84 | # Log command, availability of CUDA, and model 85 | logger.info(args) 86 | logger.info("CUDA available: {}".format(torch.cuda.is_available())) 87 | logger.info(il_learn.acmodel) 88 | 89 | il_learn.train(il_learn.train_demos, writer, csv_writer, status_path, header) 90 | 91 | 92 | if __name__ == "__main__": 93 | args = parser.parse_args() 94 | main(args) -------------------------------------------------------------------------------- /experiment/batch_evaluate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Evaluate a trained model or bot 5 | """ 6 | 7 | import os 8 | import sys 9 | sys.path.append(os.getcwd()) 10 | 11 | import argparse 12 | import gym 13 | import time 14 | import datetime 15 | import math 16 | import babyaiPP 17 | import babyai.utils as utils 18 | from babyai.evaluate import evaluate_demo_agent, batch_evaluate, evaluate 19 | # Parse arguments 20 | 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument("--exp_name", required=True, type=str) 23 | parser.add_argument("--env", required=True, 24 | help="name of the environment to be run (REQUIRED)") 25 | parser.add_argument("--model", default=None, 26 | help="name of the trained model (REQUIRED or --demos-origin or --demos REQUIRED)") 27 | parser.add_argument("--episodes", type=int, default=1000, 28 | help="number of episodes of evaluation (default: 1000)") 29 | parser.add_argument("--seed", type=int, default=int(1e9), 30 | help="random seed") 31 | parser.add_argument("--argmax", action="store_true", default=False, 32 | help="action with highest probability is selected for model agent") 33 | 34 | def main_train(args, seed, episodes): 35 | # Set seed for all randomness sources 36 | utils.seed(seed) 37 | 38 | # Define agent 39 | # do train environment 40 | env_name = args.env + "_Train-v0" 41 | env = gym.make(env_name) 42 | env.seed(seed) 43 | agent = utils.load_agent(env, args.model, argmax=args.argmax, env_name=env_name) 44 | if args.model is None and args.episodes > len(agent.demos): 45 | # Set the number of episodes to be the number of demos 46 | episodes = len(agent.demos) 47 | 48 | # Evaluate 49 | if isinstance(agent, utils.DemoAgent): 50 | logs = evaluate_demo_agent(agent, episodes) 51 | elif isinstance(agent, utils.BotAgent): 52 | logs = evaluate(agent, env, episodes, False) 53 | else: 54 | logs = batch_evaluate(agent, env_name, seed, episodes) 55 | 56 | return logs 57 | 58 | 59 | def main_test(args, seed, episodes): 60 | # Set seed for all randomness sources 61 | utils.seed(seed) 62 | 63 | # Define agent 64 | # do test environment 65 | env_name = args.env + "_Test-v0" 66 | env = gym.make(env_name) 67 | env.seed(seed) 68 | agent = utils.load_agent(env, args.model, argmax = args.argmax, env_name=env_name) 69 | if args.model is None and args.episodes > len(agent.demos): 70 | # Set the number of episodes to be the number of demos 71 | episodes = len(agent.demos) 72 | 73 | # Evaluate 74 | if isinstance(agent, utils.DemoAgent): 75 | logs = evaluate_demo_agent(agent, episodes) 76 | elif isinstance(agent, utils.BotAgent): 77 | logs = evaluate(agent, env, episodes, False) 78 | else: 79 | logs = batch_evaluate(agent, env_name, seed, episodes) 80 | 81 | return logs 82 | 83 | if __name__ == "__main__": 84 | args = parser.parse_args() 85 | 86 | start_time = time.time() 87 | logs = main_train(args, args.seed, args.episodes) 88 | logs_ts = main_test(args, args.seed, args.episodes) 89 | end_time = time.time() 90 | 91 | # Print logs 92 | return_per_episode_tr = utils.synthesize(logs["return_per_episode"]) 93 | success_per_episode_tr = utils.synthesize( 94 | [1 if r > 0 else 0 for r in logs["return_per_episode"]]) 95 | 96 | num_frames_per_episode_tr = utils.synthesize(logs["num_frames_per_episode"]) 97 | succ_se_tr = math.sqrt(success_per_episode_tr['mean'] * (1 - success_per_episode_tr['mean']) / args.episodes) 98 | R_se_tr = return_per_episode_tr['std']/math.sqrt(args.episodes) 99 | N_se_tr = num_frames_per_episode_tr['std']/math.sqrt(args.episodes) 100 | 101 | return_per_episode_ts = utils.synthesize(logs_ts["return_per_episode"]) 102 | success_per_episode_ts = utils.synthesize( 103 | [1 if r > 0 else 0 for r in logs_ts["return_per_episode"]]) 104 | 105 | num_frames_per_episode_ts = utils.synthesize(logs_ts["num_frames_per_episode"]) 106 | succ_se_ts = math.sqrt(success_per_episode_ts['mean'] * (1 - success_per_episode_ts['mean']) / args.episodes) 107 | R_se_ts = return_per_episode_ts['std'] / math.sqrt(args.episodes) 108 | N_se_ts = num_frames_per_episode_ts['std'] / math.sqrt(args.episodes) 109 | 110 | print( 111 | "{} & ${:.3f}\pm{:.3f}$ & ${:.3f}\pm{:.3f}$ & ${:.3f}\pm{:.3f}$ & ${:.3f}\pm{:.3f}$ & ${:.3f}\pm{:.3f}$ & ${:.3f}\pm{:.3f}$ \\\\" 112 | .format(args.exp_name, success_per_episode_tr['mean'], succ_se_tr, return_per_episode_tr['mean'], R_se_tr, 113 | num_frames_per_episode_tr['mean'], N_se_tr, 114 | success_per_episode_ts['mean'], succ_se_ts, return_per_episode_ts['mean'], R_se_ts, 115 | num_frames_per_episode_ts['mean'], N_se_ts)) 116 | -------------------------------------------------------------------------------- /experiment/evaluate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Evaluate a trained model or bot 5 | """ 6 | 7 | import os 8 | import sys 9 | sys.path.append(os.getcwd()) 10 | 11 | import argparse 12 | import gym 13 | import time 14 | import datetime 15 | import math 16 | import babyaiPP 17 | import babyai.utils as utils 18 | from babyai.evaluate import evaluate_demo_agent, batch_evaluate, evaluate 19 | # Parse arguments 20 | 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument("--env", required=True, 23 | help="name of the environment to be run (REQUIRED)") 24 | parser.add_argument("--model", default=None, 25 | help="name of the trained model (REQUIRED or --demos-origin or --demos REQUIRED)") 26 | parser.add_argument("--demos-origin", default=None, 27 | help="origin of the demonstrations: human | agent (REQUIRED or --model or --demos REQUIRED)") 28 | parser.add_argument("--demos", default=None, 29 | help="name of the demos file (REQUIRED or --demos-origin or --model REQUIRED)") 30 | parser.add_argument("--episodes", type=int, default=1000, 31 | help="number of episodes of evaluation (default: 1000)") 32 | parser.add_argument("--seed", type=int, default=int(1e9), 33 | help="random seed") 34 | parser.add_argument("--argmax", action="store_true", default=False, 35 | help="action with highest probability is selected for model agent") 36 | parser.add_argument("--contiguous-episodes", action="store_true", default=False, 37 | help="Make sure episodes on which evaluation is done are contiguous") 38 | parser.add_argument("--worst-episodes-to-show", type=int, default=10, 39 | help="The number of worse episodes to show") 40 | 41 | 42 | def main(args, seed, episodes): 43 | # Set seed for all randomness sources 44 | utils.seed(seed) 45 | 46 | # Define agent 47 | 48 | env = gym.make(args.env) 49 | env.seed(seed) 50 | agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, args.env) 51 | if args.model is None and args.episodes > len(agent.demos): 52 | # Set the number of episodes to be the number of demos 53 | episodes = len(agent.demos) 54 | 55 | # Evaluate 56 | if isinstance(agent, utils.DemoAgent): 57 | logs = evaluate_demo_agent(agent, episodes) 58 | elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes: 59 | logs = evaluate(agent, env, episodes, False) 60 | else: 61 | logs = batch_evaluate(agent, args.env, seed, episodes) 62 | 63 | 64 | return logs 65 | 66 | 67 | if __name__ == "__main__": 68 | args = parser.parse_args() 69 | assert_text = "ONE of --model or --demos-origin or --demos must be specified." 70 | assert int(args.model is None) + int(args.demos_origin is None) + int(args.demos is None) == 2, assert_text 71 | 72 | start_time = time.time() 73 | logs = main(args, args.seed, args.episodes) 74 | end_time = time.time() 75 | 76 | # Print logs 77 | num_frames = sum(logs["num_frames_per_episode"]) 78 | fps = num_frames/(end_time - start_time) 79 | ellapsed_time = int(end_time - start_time) 80 | duration = datetime.timedelta(seconds=ellapsed_time) 81 | 82 | if args.model is not None: 83 | return_per_episode = utils.synthesize(logs["return_per_episode"]) 84 | success_per_episode = utils.synthesize( 85 | [1 if r > 0 else 0 for r in logs["return_per_episode"]]) 86 | 87 | num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"]) 88 | 89 | if args.model is not None: 90 | print("F {} | FPS {:.0f} | D {} | R:xsmM {:.3f} {:.3f} {:.3f} {:.3f} | S {:.3f} {:.4f} | F:xsmM {:.1f} {:.1f} {} {}" 91 | .format(num_frames, fps, duration, 92 | *return_per_episode.values(), 93 | success_per_episode['mean'], 94 | math.sqrt(success_per_episode['mean'] * (1 - success_per_episode['mean']) / args.episodes), 95 | *num_frames_per_episode.values())) 96 | else: 97 | print("F {} | FPS {:.0f} | D {} | F:xsmM {:.1f} {:.1f} {} {}" 98 | .format(num_frames, fps, duration, *num_frames_per_episode.values())) 99 | 100 | 101 | indexes = sorted(range(len(logs["num_frames_per_episode"])), key=lambda k: - logs["num_frames_per_episode"][k]) 102 | 103 | n = args.worst_episodes_to_show 104 | if n > 0: 105 | print("{} worst episodes:".format(n)) 106 | for i in indexes[:n]: 107 | if 'seed_per_episode' in logs: 108 | print(logs['seed_per_episode'][i]) 109 | if args.model is not None: 110 | print("- episode {}: R={}, F={}".format(i, logs["return_per_episode"][i], logs["num_frames_per_episode"][i])) 111 | else: 112 | print("- episode {}: F={}".format(i, logs["num_frames_per_episode"][i])) 113 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BabyAI++ 2 | This is the implementation of [BabyAI++ : Towards Grounded-Language Learning beyond Memorization](https://arxiv.org/pdf/2004.07200.pdf), as described in the following ICLR2020 [BeTR-RL](http://www.betr-rl.ml/2020/) workshop paper. 3 | 4 | ``` 5 | @inproceedings{cao2020babiai++, 6 | title={BabyAI++ : Towards Grounded-Language Learning beyond Memorization}, 7 | author={Cao, Tianshi and Wang, Jingkang and Zhang, Yining and Manivasagam, Sivabalan}, 8 | booktitle={ICLR}, 9 | year={2020} 10 | } 11 | ``` 12 | 13 | ## Introduction 14 | Although recent works have shown the benefits of instructive texts in goal-conditioned RL, few have studied whether descriptive texts help agents to generalize across dynamic environments. To promote research in this direction, we introduce a new platform BabyAI++, to generate various dynamic environments along with corresponding descriptive texts (see following Table). Experiments on BabyAI++ show strong evidence that using descriptive texts improves the generalization of RL agents across environments with varied dynamics. 15 | 16 | | Environments | Instructive Text | Descriptive Text | State Manipulation | Variable Dynamics | Procedural Envs | Multi-task | 17 | |----------------------------------|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:| 18 | | [Kolve et al. (2017)](https://arxiv.org/abs/1712.05474) | :x: | :x: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :x: | 19 | | [Narasimhan et al. (2017)](https://arxiv.org/abs/1708.00133) | :heavy_check_mark: | :heavy_check_mark: | :x: | :x: | :heavy_check_mark: | :x: | 20 | | [Wu et al. (2018)](https://arxiv.org/abs/1801.02209) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :x: | :x: | :x: | 21 | | [Chaplot et al. (2018)](https://arxiv.org/abs/1706.07230) | :heavy_check_mark: | :x: | :x: | :x: | :heavy_check_mark: | :x: | 22 | | [Chevalier-Boisvert et al. (2019)](https://arxiv.org/abs/1810.08272) | :heavy_check_mark: | :x: | :heavy_check_mark: | :x: | :heavy_check_mark: | :heavy_check_mark: | 23 | | __BabyAI++ (Ours, 2020)__ | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | 24 | 25 | 26 | ## Getting started 27 | First, clone this repository to your local device recursively: 28 | ```bash 29 | git clone --recursive https://github.com/caotians1/BabyAIPlusPlus.git 30 | ``` 31 | Install the prerequisites (Anaconda): 32 | ```bash 33 | conda install pytorch=1.2.0 torchvision -c pytorch 34 | conda install pyqt 35 | pip install lorem tensorboardX blosc gym 36 | ``` 37 | Install `gym-minigrid` and `babyai`: 38 | ```bash 39 | cd gym-minigrid 40 | pip install --editable . 41 | cd ../babyai 42 | pip install --no-deps --editable . 43 | ``` 44 | 45 | ## Using BabyAI++ 46 | Play a level in BabyAI++: 47 | ``` 48 | python experiment/bAIPlusgui.py --env="BabyAI-GoTo_Dynamics_Train-v0" 49 | ``` 50 | To train a model in BabyAI++, see `scripts/train_film_agent_redball.sh` and `experiment/train_rl.py`. 51 | 52 | ## BabyAI++ Levels 53 | Please refer to [`babyaiPP/dynamics_levels.py`](https://github.com/caotians1/BabyAIPlusPlus/blob/master/babyaiPP/dynamics_levels.py) and [`babyaiPP/additional_levels.py`](https://github.com/caotians1/BabyAIPlusPlus/blob/master/babyaiPP/additional_levels.py) for the definition of supported levels. The following table lists the available environments of BabyAI++ currently. 54 | 55 | ![babyai_levels](https://github.com/caotians1/BabyAIPlusPlus/blob/master/babyai_levels.png ) 56 | 57 | 58 | ## Customize BabyAI++ Levels 59 | You could also define your own environments with descriptive texts and varying dynamics. Here is an example for creating `PutNextLocalDynamics_Medium` Level: 60 | ``` 61 | # define dynamics setting 62 | class Level_PutNextDynamics_Medium_Train(DynamicsLevel, Level_PutNext): 63 | def __init__(self, seed=None, with_instruction=True): 64 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=2, 65 | held_out_cp_pairs=[('green', 0), ('green', 2), ('green', 4), 66 | ('blue', 1), ('blue', 3), ('blue', 5)], 67 | with_instruction=with_instruction) 68 | Level_PutNext.__init__(self, room_size=11, seed=seed) 69 | 70 | class Level_PutNextDynamics_Medium_Test(DynamicsLevel, Level_PutNext): 71 | def __init__(self, seed=None, with_instruction=True): 72 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=2, 73 | with_instruction=with_instruction) 74 | Level_PutNext.__init__(self, room_size=11, seed=seed) 75 | 76 | # register your environment 77 | register_levels(__name__, {'Level_PutNextDynamics_Medium_Train': Level_PutNextDynamics_Medium_Train}) 78 | ``` 79 | Note that you could augument any levels supported in [BabyAI platform](https://github.com/mila-iqia/babyai) with varying dynamics and descriptive texts by inheriting `DynamicsLevel`. 80 | 81 | ## Questions/Bugs 82 | Please submit a Github issue or contact jcao@cs.toronto.edu or wangjk@cs.toronto.edu if you have any questions or find any bugs. Contributions to this repository (e.g., pull requests for other baselines) are also well welcomed. 83 | -------------------------------------------------------------------------------- /experiment/arguments.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common arguments for BabyAI training scripts 3 | """ 4 | 5 | import os 6 | import argparse 7 | import numpy as np 8 | 9 | 10 | class ArgumentParser(argparse.ArgumentParser): 11 | 12 | def __init__(self): 13 | super().__init__() 14 | 15 | # Base arguments 16 | self.add_argument("--env", default=None, 17 | help="name of the environment to train on (REQUIRED)") 18 | self.add_argument("--model", default=None, 19 | help="name of the model (default: ENV_ALGO_TIME)") 20 | self.add_argument("--pretrained-model", default=None, 21 | help='If you\'re using a pre-trained model and want the fine-tuned one to have a new name') 22 | self.add_argument("--seed", type=int, default=1, 23 | help="random seed; if 0, a random random seed will be used (default: 1)") 24 | self.add_argument("--task-id-seed", action='store_true', 25 | help="use the task id within a Slurm job array as the seed") 26 | self.add_argument("--procs", type=int, default=64, 27 | help="number of processes (default: 64)") 28 | self.add_argument("--tb", action="store_true", default=False, 29 | help="log into Tensorboard") 30 | 31 | # Training arguments 32 | self.add_argument("--log-interval", type=int, default=10, 33 | help="number of updates between two logs (default: 10)") 34 | self.add_argument("--frames", type=int, default=int(9e10), 35 | help="number of frames of training (default: 9e10)") 36 | self.add_argument("--patience", type=int, default=100, 37 | help="patience for early stopping (default: 100)") 38 | self.add_argument("--epochs", type=int, default=40, 39 | help="maximum number of epochs") 40 | self.add_argument("--epoch-length", type=int, default=0, 41 | help="number of examples per epoch; the whole dataset is used by if 0") 42 | self.add_argument("--frames-per-proc", type=int, default=40, 43 | help="number of frames per process before update (default: 40)") 44 | self.add_argument("--lr", type=float, default=1e-4, 45 | help="learning rate (default: 1e-4)") 46 | self.add_argument("--beta1", type=float, default=0.9, 47 | help="beta1 for Adam (default: 0.9)") 48 | self.add_argument("--beta2", type=float, default=0.999, 49 | help="beta2 for Adam (default: 0.999)") 50 | self.add_argument("--recurrence", type=int, default=20, 51 | help="number of timesteps gradient is backpropagated (default: 20)") 52 | self.add_argument("--optim-eps", type=float, default=1e-5, 53 | help="Adam and RMSprop optimizer epsilon (default: 1e-5)") 54 | self.add_argument("--optim-alpha", type=float, default=0.99, 55 | help="RMSprop optimizer apha (default: 0.99)") 56 | self.add_argument("--batch-size", type=int, default=1280, 57 | help="batch size for PPO (default: 1280)") 58 | self.add_argument("--entropy-coef", type=float, default=0.01, 59 | help="entropy term coefficient (default: 0.01)") 60 | 61 | # Model parameters 62 | self.add_argument("--image-dim", type=int, default=128, 63 | help="dimensionality of the image embedding") 64 | self.add_argument("--memory-dim", type=int, default=128, 65 | help="dimensionality of the memory LSTM") 66 | self.add_argument("--instr-dim", type=int, default=128, 67 | help="dimensionality of the memory LSTM") 68 | self.add_argument("--no-desc", action="store_true", default=False, 69 | help="don't use descriptive texts in the model") 70 | self.add_argument("--instr-arch", default="gru", 71 | help="arch to encode instructions, possible values: gru, bigru, conv, attgru (default: gru)") 72 | self.add_argument("--no-mem", action="store_true", default=False, 73 | help="don't use memory in the model") 74 | self.add_argument("--arch", default='expert_filmcnn', 75 | help="image embedding architecture") 76 | self.add_argument("--random-shuffle", action="store_true", default=False, 77 | help='random shuffled texts') 78 | self.add_argument("--enable-instr", action="store_true", default=False, 79 | help='enalbe instructional texts') 80 | self.add_argument("--instr-only", action="store_true", default=False, 81 | help='use instructional texts only') 82 | 83 | # Validation parameters 84 | self.add_argument("--val-seed", type=int, default=int(1e9), 85 | help="seed for environment used for validation (default: 1e9)") 86 | self.add_argument("--val-interval", type=int, default=1, 87 | help="number of epochs between two validation checks (default: 1)") 88 | self.add_argument("--val-episodes", type=int, default=500, 89 | help="number of episodes used to evaluate the agent, and to evaluate validation accuracy") 90 | 91 | def parse_args(self): 92 | """ 93 | Parse the arguments and perform some basic validation 94 | """ 95 | 96 | args = super().parse_args() 97 | 98 | # Set seed for all randomness sources 99 | if args.seed == 0: 100 | args.seed = np.random.randint(10000) 101 | if args.task_id_seed: 102 | args.seed = int(os.environ['SLURM_ARRAY_TASK_ID']) 103 | print('set seed to {}'.format(args.seed)) 104 | 105 | # TODO: more validation 106 | 107 | return args 108 | -------------------------------------------------------------------------------- /experiment/make_agent_demos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Generate a set of agent demonstrations. 5 | 6 | The agent can either be a trained model or the heuristic expert (bot). 7 | 8 | Demonstration generation can take a long time, but it can be parallelized 9 | if you have a cluster at your disposal. Provide a script that launches 10 | make_agent_demos.py at your cluster as --job-script and the number of jobs as --jobs. 11 | 12 | 13 | """ 14 | 15 | import argparse 16 | import gym 17 | import logging 18 | import sys, os 19 | sys.path.append(os.getcwd()) 20 | 21 | import subprocess 22 | import os 23 | import time 24 | import numpy as np 25 | import blosc 26 | import torch 27 | 28 | import babyaiPP 29 | import babyai.utils as utils 30 | 31 | # Parse arguments 32 | 33 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 34 | parser.add_argument("--env", required=True, 35 | help="name of the environment to be run (REQUIRED)") 36 | parser.add_argument("--model", default='BOT', 37 | help="name of the trained model (REQUIRED)") 38 | parser.add_argument("--demos", default=None, 39 | help="path to save demonstrations (based on --model and --origin by default)") 40 | parser.add_argument("--episodes", type=int, default=1000000, 41 | help="number of episodes to generate demonstrations for") 42 | parser.add_argument("--valid-episodes", type=int, default=512, 43 | help="number of validation episodes to generate demonstrations for") 44 | parser.add_argument("--seed", type=int, default=0, 45 | help="start random seed") 46 | parser.add_argument("--argmax", action="store_true", default=False, 47 | help="action with highest probability is selected") 48 | parser.add_argument("--log-interval", type=int, default=100, 49 | help="interval between progress reports") 50 | parser.add_argument("--save-interval", type=int, default=10000, 51 | help="interval between demonstrations saving") 52 | parser.add_argument("--filter-steps", type=int, default=0, 53 | help="filter out demos with number of steps more than filter-steps") 54 | parser.add_argument("--on-exception", type=str, default='warn', choices=('warn', 'crash'), 55 | help="How to handle exceptions during demo generation") 56 | 57 | parser.add_argument("--job-script", type=str, default=None, 58 | help="The script that launches make_agent_demos.py at a cluster.") 59 | parser.add_argument("--jobs", type=int, default=0, 60 | help="Split generation in that many jobs") 61 | 62 | args = parser.parse_args() 63 | logger = logging.getLogger(__name__) 64 | 65 | # Set seed for all randomness sources 66 | 67 | 68 | def print_demo_lengths(demos): 69 | num_frames_per_episode = [len(demo[2]) for demo in demos] 70 | logger.info('Demo length: {:.3f}+-{:.3f}'.format( 71 | np.mean(num_frames_per_episode), np.std(num_frames_per_episode))) 72 | 73 | 74 | def generate_demos(n_episodes, valid, seed, shift=0): 75 | utils.seed(seed) 76 | 77 | # Generate environment 78 | env = gym.make(args.env) 79 | 80 | agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax, args.env) 81 | demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid) 82 | demos = [] 83 | 84 | checkpoint_time = time.time() 85 | 86 | just_crashed = False 87 | while True: 88 | if len(demos) == n_episodes: 89 | break 90 | 91 | done = False 92 | if just_crashed: 93 | logger.info("reset the environment to find a mission that the bot can solve") 94 | env.reset() 95 | else: 96 | env.seed(seed + len(demos)) 97 | obs = env.reset() 98 | agent.on_reset() 99 | 100 | actions = [] 101 | mission = obs["mission"] 102 | images = [] 103 | directions = [] 104 | 105 | try: 106 | while not done: 107 | action = agent.act(obs)['action'] 108 | if isinstance(action, torch.Tensor): 109 | action = action.item() 110 | new_obs, reward, done, _ = env.step(action) 111 | agent.analyze_feedback(reward, done) 112 | 113 | actions.append(action) 114 | images.append(obs['image']) 115 | directions.append(obs['direction']) 116 | 117 | obs = new_obs 118 | if reward > 0 and (args.filter_steps == 0 or len(images) <= args.filter_steps): 119 | demos.append((mission, blosc.pack_array(np.array(images)), directions, actions)) 120 | just_crashed = False 121 | 122 | if reward == 0: 123 | if args.on_exception == 'crash': 124 | raise Exception("mission failed, the seed is {}".format(seed + len(demos))) 125 | just_crashed = True 126 | logger.info("mission failed") 127 | except (Exception, AssertionError): 128 | if args.on_exception == 'crash': 129 | raise 130 | just_crashed = True 131 | logger.exception("error while generating demo #{}".format(len(demos))) 132 | continue 133 | 134 | if len(demos) and len(demos) % args.log_interval == 0: 135 | now = time.time() 136 | demos_per_second = args.log_interval / (now - checkpoint_time) 137 | to_go = (n_episodes - len(demos)) / demos_per_second 138 | logger.info("demo #{}, {:.3f} demos per second, {:.3f} seconds to go".format( 139 | len(demos) - 1, demos_per_second, to_go)) 140 | checkpoint_time = now 141 | 142 | # Save demonstrations 143 | 144 | if args.save_interval > 0 and len(demos) < n_episodes and len(demos) % args.save_interval == 0: 145 | logger.info("Saving demos...") 146 | utils.save_demos(demos, demos_path) 147 | logger.info("{} demos saved".format(len(demos))) 148 | # print statistics for the last 100 demonstrations 149 | print_demo_lengths(demos[-100:]) 150 | 151 | 152 | # Save demonstrations 153 | logger.info("Saving demos...") 154 | utils.save_demos(demos, demos_path) 155 | logger.info("{} demos saved".format(len(demos))) 156 | print_demo_lengths(demos[-100:]) 157 | 158 | 159 | def generate_demos_cluster(): 160 | demos_per_job = args.episodes // args.jobs 161 | demos_path = utils.get_demos_path(args.demos, args.env, 'agent') 162 | job_demo_names = [os.path.realpath(demos_path + '.shard{}'.format(i)) 163 | for i in range(args.jobs)] 164 | for demo_name in job_demo_names: 165 | job_demos_path = utils.get_demos_path(demo_name) 166 | if os.path.exists(job_demos_path): 167 | os.remove(job_demos_path) 168 | 169 | command = [args.job_script] 170 | command += sys.argv[1:] 171 | for i in range(args.jobs): 172 | cmd_i = list(map(str, 173 | command 174 | + ['--seed', args.seed + i * demos_per_job] 175 | + ['--demos', job_demo_names[i]] 176 | + ['--episodes', demos_per_job] 177 | + ['--jobs', 0] 178 | + ['--valid-episodes', 0])) 179 | logger.info('LAUNCH COMMAND') 180 | logger.info(cmd_i) 181 | subprocess.Popen(cmd_i) 182 | # output = subprocess.check_output(cmd_i) 183 | # logger.info('LAUNCH OUTPUT') 184 | # logger.info(output.decode('utf-8')) 185 | 186 | job_demos = [None] * args.jobs 187 | while True: 188 | jobs_done = 0 189 | for i in range(args.jobs): 190 | if job_demos[i] is None or len(job_demos[i]) < demos_per_job: 191 | try: 192 | logger.info("Trying to load shard {}".format(i)) 193 | job_demos[i] = utils.load_demos(utils.get_demos_path(job_demo_names[i])) 194 | logger.info("{} demos ready in shard {}".format( 195 | len(job_demos[i]), i)) 196 | except Exception: 197 | logger.exception("Failed to load the shard") 198 | if job_demos[i] and len(job_demos[i]) == demos_per_job: 199 | jobs_done += 1 200 | logger.info("{} out of {} shards done".format(jobs_done, args.jobs)) 201 | if jobs_done == args.jobs: 202 | break 203 | logger.info("sleep for 60 seconds") 204 | time.sleep(60) 205 | 206 | # Training demos 207 | all_demos = [] 208 | for demos in job_demos: 209 | all_demos.extend(demos) 210 | utils.save_demos(all_demos, demos_path) 211 | 212 | 213 | logging.basicConfig(level='INFO', format="%(asctime)s: %(levelname)s: %(message)s") 214 | logger.info(args) 215 | # Training demos 216 | if args.jobs == 0: 217 | generate_demos(args.episodes, False, args.seed) 218 | else: 219 | generate_demos_cluster() 220 | # Validation demos 221 | if args.valid_episodes: 222 | generate_demos(args.valid_episodes, True, int(1e9)) 223 | -------------------------------------------------------------------------------- /babyaiPP/decriptive_level_base.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from babyai.levels.verifier import * 3 | from babyai.levels.levelgen import * 4 | from babyai.levels.iclr19_levels import * 5 | from gym_minigrid.minigrid import DIR_TO_VEC 6 | import copy 7 | 8 | class DescriptiveLevel(RoomGridLevel): 9 | def __init__(self, description_level, desc_sample_frac, *args, **kwargs): 10 | print("entered descriptive level init") 11 | self.description_level = description_level # type of description, 0 = no description, 1 = aggregated description, 2 = list description 12 | self.description_frac = desc_sample_frac # if description is used, how much to use; 1 = describe everything 13 | self.desc = None 14 | super(DescriptiveLevel, self).__init__(*args, **kwargs) 15 | assert description_level in [0,1,2] 16 | 17 | def gen_mission(self): 18 | super(DescriptiveLevel, self).gen_mission() 19 | if self.description_level == 0: 20 | self.desc = None 21 | return 22 | # search for objects 23 | cell_infos = [] 24 | for i in range(self.grid.width): 25 | for j in range(self.grid.height): 26 | cell = self.grid.get(i, j) 27 | if cell is None: 28 | continue 29 | if cell.type == "wall": 30 | continue 31 | cell_infos.append((cell, i, j)) 32 | 33 | if self.description_frac < 1: 34 | N = int(self.description_frac * len(cell_infos)) 35 | inds = np.arange(len(cell_infos)) 36 | np.random.shuffle(inds) 37 | cell_infos_new = [cell_infos[i] for i in inds[:N]] 38 | cell_infos = cell_infos_new 39 | 40 | if self.description_level == 1: 41 | 42 | self.desc, _, _ = gen_aggregated_description(cell_infos, agent_pos=self.agent_pos, agent_dir=self.agent_dir, aggregation_order="012") 43 | 44 | elif self.description_level == 2: 45 | descs = [] 46 | for cell, i, j in cell_infos: 47 | desc = gen_description(cell, pos=(i,j), agent_pos=self.agent_pos, agent_dir=self.agent_dir) 48 | descs.append(desc) 49 | 50 | self.desc = " ".join(descs) 51 | 52 | return 53 | 54 | def step(self, action): 55 | obs, reward, done, info = super().step(action) 56 | obs['mission'] = self.desc 57 | return obs, reward, done, info 58 | 59 | def reset(self, **kwargs): 60 | obs = super().reset(**kwargs) 61 | obs['mission'] = self.desc 62 | return obs 63 | 64 | def gen_aggregated_description(cell_info, agent_pos, agent_dir, aggregation_order): 65 | """ 66 | :param aggregation_order: "type:0", "color:1", "direction:2", 67 | aggregation_order = 012: "there are are 2 keys, 2 red, one in front of you to your right and one in front of you to your left. There are 4 balls, 1 blue and 3 green..." 68 | aggregation_order = 201: "there are 3 objects in front of you to your left, 2 keys, 1 red and 1 green, and a box, 1 blue. There are 1 object in front of you to your right..." 69 | """ 70 | # first, add a pos attribute to cells 71 | cell_info = copy.deepcopy(cell_info) 72 | all_types = [] 73 | all_colors = [] 74 | all_poses = [] 75 | for cell, i, j in cell_info: 76 | v = (i - agent_pos[0], j - agent_pos[1]) 77 | d1 = DIR_TO_VEC[agent_dir] 78 | d2 = (-d1[1], d1[0]) 79 | pos = "" 80 | if dot_product(v, d1) > 0: 81 | pos += "F" 82 | elif dot_product(v, d1) < 0: 83 | pos += "B" 84 | if dot_product(v, d2) < 0: 85 | pos += "L" 86 | elif dot_product(v, d2) > 0: 87 | pos += "R" 88 | cell.pos = pos 89 | if cell.type not in all_types: 90 | all_types.append(cell.type) 91 | if cell.color not in all_colors: 92 | all_colors.append(cell.color) 93 | if cell.pos not in all_poses: 94 | all_poses.append(cell.pos) 95 | 96 | dims = {'type':all_types, 'color':all_colors, 'pos':all_poses} 97 | index = {'0':'type', '1':'color', '2':'pos'} 98 | t0 = index[aggregation_order[0]] 99 | t1 = index[aggregation_order[1]] 100 | t2 = index[aggregation_order[2]] 101 | dim_0 = dims[t0] 102 | dim_1 = dims[t1] 103 | dim_2 = dims[t2] 104 | count_array = np.zeros((len(dim_0), len(dim_1), len(dim_2))) 105 | 106 | for cell, i, j in cell_info: 107 | d0 = dim_0.index(getattr(cell, t0)) 108 | d1 = dim_1.index(getattr(cell, t1)) 109 | d2 = dim_2.index(getattr(cell, t2)) 110 | count_array[d0,d1,d2] += 1 111 | 112 | desc = "" 113 | direction_list = {"FL":"front left", 114 | "FR": "front right", 115 | "BL": "back left", 116 | "BR": "back right", 117 | "B": "back", 118 | "F": "front", 119 | "R": "right", 120 | "L": "left", 121 | } 122 | 123 | ONEONEFLAG = 0 124 | 125 | for i in range(count_array.shape[0]): 126 | print(i) 127 | n_ins = count_array.sum(axis=(1,2))[i] 128 | if n_ins == 0: 129 | continue 130 | 131 | desc += "There " 132 | if n_ins == 1: 133 | desc += "is a " 134 | ONEONEFLAG = 1 135 | else: 136 | desc += "are %d " % n_ins 137 | ONEONEFLAG = 0 138 | value = dim_0[i] 139 | 140 | if t0 == "pos": 141 | content = direction_list[value] 142 | if n_ins > 1: 143 | desc += "objects to your " + content + ", " 144 | else: 145 | desc += "object to your " + content + ", " 146 | 147 | elif t0 == "type": 148 | desc += value 149 | if n_ins > 1: 150 | desc += "s, " 151 | else: 152 | desc += ", " 153 | else: 154 | desc += value + " object" 155 | if n_ins > 1: 156 | desc += "s, " 157 | else: 158 | desc += ", " 159 | 160 | for j in range(count_array.shape[1]): 161 | n_ins = count_array.sum(axis=(2,))[i, j] 162 | if n_ins == 0: 163 | continue 164 | if np.sum(count_array.sum(axis=(2,))[i, j:]) == n_ins: 165 | desc += "and " 166 | if ONEONEFLAG: 167 | if n_ins > 1: 168 | desc += "%d " % n_ins 169 | else: 170 | desc += "%d " % n_ins 171 | 172 | if n_ins == 1: 173 | ONEONEFLAG = 1 174 | else: 175 | ONEONEFLAG = 0 176 | 177 | value = dim_1[j] 178 | 179 | if t1 == "pos": 180 | content = direction_list[value] 181 | desc += "to your " + content + ", " 182 | elif t1 == "type": 183 | desc += value 184 | if n_ins > 1: 185 | desc += "s, " 186 | else: 187 | desc += ", " 188 | else: 189 | desc += value + ", " 190 | 191 | for k in range(count_array.shape[2]): 192 | n_ins = count_array[i, j, k] 193 | if n_ins == 0: 194 | continue 195 | if ONEONEFLAG: 196 | if n_ins > 1: 197 | desc += "%d " % n_ins 198 | else: 199 | desc += "%d " % n_ins 200 | ONEONEFLAG = 0 201 | 202 | value = dim_2[k] 203 | 204 | if t2 == "pos": 205 | content = direction_list[value] 206 | desc += "to your " + content 207 | 208 | elif t2 == "type": 209 | desc += value 210 | if n_ins > 1: 211 | desc += "s" 212 | else: 213 | desc += value 214 | 215 | desc += ", " 216 | desc = desc[:-2] + "; " 217 | desc = desc[:-2] + ". " 218 | desc = desc[:-1] 219 | 220 | 221 | return desc, count_array, dims 222 | 223 | def gen_description(cell, pos, agent_pos, agent_dir): 224 | # Direction from the agent to the object 225 | v = (pos[0] - agent_pos[0], pos[1] - agent_pos[1]) 226 | 227 | # (d1, d2) is an oriented orthonormal basis 228 | d1 = DIR_TO_VEC[agent_dir] 229 | d2 = (-d1[1], d1[0]) 230 | 231 | # Check if object's position matches with location 232 | pos_matches = { 233 | "left": dot_product(v, d2) < 0, 234 | "right": dot_product(v, d2) > 0, 235 | "in front of": dot_product(v, d1) > 0, 236 | "behind": dot_product(v, d1) < 0 237 | } 238 | s = "There is a %s %s " % (cell.color, cell.type) 239 | if dot_product(v, d1) > 0: 240 | s += "in front of you" 241 | elif dot_product(v, d1) < 0: 242 | s += "behind you" 243 | else: 244 | s += "" 245 | if dot_product(v, d2) < 0: 246 | s += ", on your left" 247 | elif dot_product(v, d2) > 0: 248 | s += ", on your right" 249 | s += "." 250 | return s 251 | -------------------------------------------------------------------------------- /babyaiPP/additional_levels.py: -------------------------------------------------------------------------------- 1 | from .dynamics_levels import DynamicsLevel 2 | from babyai.levels.iclr19_levels import * 3 | 4 | 5 | class Level_PutNextLocalDynamics_Lorem_Train(DynamicsLevel, Level_PutNextLocal): 6 | def __init__(self, seed=None): 7 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 8 | held_out_cp_pairs=[('green', 0), ('blue', 4)], 9 | rand_text='lorem', instr_words=9, with_instruction=False) 10 | Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed) 11 | 12 | 13 | class Level_PutNextLocalDynamics_Lorem_Fully_Train(DynamicsLevel, Level_PutNextLocal): 14 | def __init__(self, seed=None): 15 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 16 | held_out_cp_pairs=[('green', 0), ('blue', 4)], 17 | rand_text='lorem', total_rand=True, instr_words=9, with_instruction=False) 18 | Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed) 19 | 20 | 21 | class Level_PutNextLocalDynamics_Lorem_Test(DynamicsLevel, Level_PutNextLocal): 22 | def __init__(self, seed=None): 23 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 24 | rand_text='lorem', instr_words=9, with_instruction=False) 25 | Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed) 26 | 27 | 28 | class Level_PutNextLocalDynamics_Lorem_Fully_Test(DynamicsLevel, Level_PutNextLocal): 29 | def __init__(self, seed=None): 30 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 31 | rand_text='lorem', instr_words=9, total_rand=True, with_instruction=False) 32 | Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed) 33 | 34 | 35 | class Level_GoTo_NoDistDynamicsTrain(DynamicsLevel, Level_GoTo): 36 | def __init__(self, 37 | room_size=8, 38 | num_rows=3, 39 | num_cols=3, 40 | doors_open=False, 41 | seed=None 42 | ): 43 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 44 | held_out_cp_pairs=[('green', 0), ('blue', 4)]) 45 | Level_GoTo.__init__(self, room_size, num_rows, num_cols, 0, doors_open, seed) 46 | 47 | 48 | class Level_GoTo_NoDistDynamicsTest(DynamicsLevel, Level_GoTo): 49 | def __init__(self, 50 | room_size=8, 51 | num_rows=3, 52 | num_cols=3, 53 | doors_open=False, 54 | seed=None 55 | ): 56 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2) 57 | Level_GoTo.__init__(self, room_size, num_rows, num_cols, 0, doors_open, seed) 58 | 59 | 60 | 61 | class Level_GoTo2by2_PartialDynamics_Train(DynamicsLevel, Level_GoTo): 62 | def __init__(self, 63 | room_size=9, 64 | num_rows=2, 65 | num_cols=2, 66 | num_dists=18, 67 | doors_open=False, 68 | seed=None 69 | ): 70 | DynamicsLevel.__init__(self, enabled_properties=[1, 2, 3, 4, 5], 71 | n_floor_colors=3, 72 | held_description=1, 73 | held_out_cp_pairs=[('green', 1), ('red', 2), ('blue', 4)], 74 | ) 75 | Level_GoTo.__init__(self, room_size, num_rows, 76 | num_cols, num_dists, doors_open, seed) 77 | 78 | 79 | class Level_GoTo2by2_PartialDynamics_Test(DynamicsLevel, Level_GoTo): 80 | def __init__(self, 81 | room_size=9, 82 | num_rows=2, 83 | num_cols=2, 84 | num_dists=18, 85 | doors_open=False, 86 | seed=None 87 | ): 88 | DynamicsLevel.__init__(self, enabled_properties=[1, 2, 3, 4, 5], 89 | n_floor_colors=3, 90 | held_description=1, 91 | ) 92 | 93 | Level_GoTo.__init__(self, room_size, num_rows, 94 | num_cols, num_dists, doors_open, seed) 95 | 96 | 97 | class Level_GoTo_RedBallDynamics_Lorem(DynamicsLevel, Level_GoToRedBallNoDists): 98 | def __init__(self, 99 | seed=None 100 | ): 101 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 102 | held_out_cp_pairs=[('green', 0), ('blue', 4)], 103 | rand_text='lorem', with_instruction=False) 104 | Level_GoToRedBallNoDists.__init__(self, seed) 105 | 106 | 107 | class Level_GoTo_RedBallDynamics_Lorem_Fully(DynamicsLevel, Level_GoToRedBallNoDists): 108 | def __init__(self, 109 | seed=None 110 | ): 111 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 112 | held_out_cp_pairs=[('green', 0), ('blue', 4)], 113 | rand_text='lorem', total_rand=True, with_instruction=False) 114 | Level_GoToRedBallNoDists.__init__(self, seed) 115 | 116 | 117 | class Level_GoTo_RedBallDynamicsSticky_Train(DynamicsLevel, Level_GoToRedBallNoDists): 118 | def __init__(self, 119 | seed=None 120 | ): 121 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 4], n_floor_colors=2, 122 | held_out_cp_pairs=[('green', 0), ('blue', 1)]) 123 | Level_GoToRedBallNoDists.__init__(self, seed) 124 | 125 | 126 | class Level_GoTo_RedBallDynamicsSticky_TargetPairOnly(DynamicsLevel, Level_GoToRedBallNoDists): 127 | def __init__(self, 128 | seed=None 129 | ): 130 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 4], n_floor_colors=2, 131 | color_property_map={'green': ['trap'], 'blue': ['sticky']}) 132 | Level_GoToRedBallNoDists.__init__(self, seed) 133 | 134 | 135 | class Level_GoTo_RedBallDynamicsSticky_Test(DynamicsLevel, Level_GoToRedBallNoDists): 136 | def __init__(self, 137 | seed=None 138 | ): 139 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 4], n_floor_colors=2) 140 | Level_GoToRedBallNoDists.__init__(self, seed) 141 | 142 | 143 | class Level_PutNextDynamics_Lorem_Train(DynamicsLevel, Level_PutNext): 144 | def __init__(self, seed=None, with_instruction=True): 145 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 146 | held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction, 147 | rand_text='lorem', total_rand=True) 148 | Level_PutNext.__init__(self, room_size=8, num_objs=4, seed=seed) 149 | 150 | 151 | class Level_PutNextDynamics_Lorem_TargetPairOnly(DynamicsLevel, Level_PutNext): 152 | def __init__(self, seed=None, with_instruction=True): 153 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 154 | color_property_map={'green': ['trap', ], 155 | 'blue': ['slippery', ]}, with_instruction=with_instruction, 156 | rand_text='lorem', total_rand=True,) 157 | Level_PutNext.__init__(self, room_size=8, num_objs=4, seed=seed) 158 | 159 | 160 | class Level_PutNextDynamics_Lorem_Test(DynamicsLevel, Level_PutNext): 161 | def __init__(self, seed=None, with_instruction=True): 162 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, with_instruction=with_instruction, 163 | rand_text='lorem', total_rand=True,) 164 | Level_PutNext.__init__(self, room_size=8, num_objs=4, seed=seed) 165 | 166 | 167 | register_levels(__name__, { 168 | 'Level_GoTo_NoDistDynamicsTrain': Level_GoTo_NoDistDynamicsTrain, 169 | 'Level_GoTo_NoDistDynamicsTest': Level_GoTo_NoDistDynamicsTest, 170 | 'Level_GoTo2by2_PartialDynamics_Train': Level_GoTo2by2_PartialDynamics_Train, 171 | 'Level_GoTo2by2_PartialDynamics_Test': Level_GoTo2by2_PartialDynamics_Test, 172 | 'Level_GoTo_RedBallDynamics_Lorem': Level_GoTo_RedBallDynamics_Lorem, 173 | 'Level_GoTo_RedBallDynamics_Lorem_Fully': Level_GoTo_RedBallDynamics_Lorem_Fully, 174 | 'Level_GoTo_RedBallDynamicsSticky_Train': Level_GoTo_RedBallDynamicsSticky_Train, 175 | 'Level_GoTo_RedBallDynamicsSticky_TargetPairOnly': Level_GoTo_RedBallDynamicsSticky_TargetPairOnly, 176 | 'Level_GoTo_RedBallDynamicsSticky_Test': Level_GoTo_RedBallDynamicsSticky_Test, 177 | 'Level_PutNextLocalDynamics_Lorem_Train': Level_PutNextLocalDynamics_Lorem_Train, 178 | 'Level_PutNextLocalDynamics_Lorem_Fully_Train': Level_PutNextLocalDynamics_Lorem_Fully_Train, 179 | 'Level_PutNextLocalDynamics_Lorem_Test': Level_PutNextLocalDynamics_Lorem_Test, 180 | 'Level_PutNextLocalDynamics_Lorem_Fully_Test': Level_PutNextLocalDynamics_Lorem_Fully_Test, 181 | 'Level_PutNextDynamics_Lorem_Train':Level_PutNextDynamics_Lorem_Train, 182 | 'Level_PutNextDynamics_Lorem_TargetPairOnly':Level_PutNextDynamics_Lorem_TargetPairOnly, 183 | 'Level_PutNextDynamics_Lorem_Test':Level_PutNextDynamics_Lorem_Test, 184 | }) -------------------------------------------------------------------------------- /experiment/train_rl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Script to train the agent through reinforcment learning. 5 | """ 6 | 7 | import os 8 | import sys 9 | sys.path.append(os.getcwd()) 10 | 11 | import logging 12 | import csv 13 | import json 14 | import gym 15 | import time 16 | import datetime 17 | import torch 18 | import numpy as np 19 | import subprocess 20 | import babyaiPP 21 | import babyai 22 | import babyai.utils as utils 23 | import babyai.rl 24 | from babyai.evaluate import batch_evaluate 25 | from babyai.utils.agent import ModelAgent 26 | 27 | from experiment.arguments import ArgumentParser 28 | from experiment.model import ACModel 29 | 30 | # Parse arguments 31 | parser = ArgumentParser() 32 | parser.add_argument("--algo", default='ppo', 33 | help="algorithm to use (default: ppo)") 34 | parser.add_argument("--discount", type=float, default=0.99, 35 | help="discount factor (default: 0.99)") 36 | parser.add_argument("--reward-scale", type=float, default=20., 37 | help="Reward scale multiplier") 38 | parser.add_argument("--gae-lambda", type=float, default=0.99, 39 | help="lambda coefficient in GAE formula (default: 0.99, 1 means no gae)") 40 | parser.add_argument("--value-loss-coef", type=float, default=0.5, 41 | help="value loss term coefficient (default: 0.5)") 42 | parser.add_argument("--max-grad-norm", type=float, default=0.5, 43 | help="maximum norm of gradient (default: 0.5)") 44 | parser.add_argument("--clip-eps", type=float, default=0.2, 45 | help="clipping epsilon for PPO (default: 0.2)") 46 | parser.add_argument("--ppo-epochs", type=int, default=4, 47 | help="number of epochs for PPO (default: 4)") 48 | parser.add_argument("--save-interval", type=int, default=50, 49 | help="number of updates between two saves (default: 50, 0 means no saving)") 50 | args = parser.parse_args() 51 | 52 | utils.seed(args.seed) 53 | 54 | # Generate environments 55 | envs = [] 56 | for i in range(args.procs): 57 | env = gym.make(args.env) 58 | env.seed(100 * args.seed + i) 59 | envs.append(env) 60 | 61 | # Get the nubmer of descriptive sentences 62 | n_floor_colors = envs[0].n_floor_colors 63 | 64 | # Define model name 65 | suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S") 66 | instr = args.instr_arch if not args.no_desc else "nodesc" 67 | rand = "rand" if args.random_shuffle else "order" 68 | if args.enable_instr: 69 | if args.instr_only: texts = "instrtexts" 70 | else: texts = "alltexts" 71 | else: texts = "destexts" 72 | mem = "mem" if not args.no_mem else "nomem" 73 | model_name_parts = { 74 | 'env': args.env, 75 | 'algo': args.algo, 76 | 'arch': args.arch, 77 | 'instr': instr, 78 | "rand": rand, 79 | "texts": texts, 80 | 'mem': mem, 81 | 'seed': args.seed, 82 | 'info': '', 83 | 'coef': '', 84 | 'suffix': suffix} 85 | default_model_name = "{env}_{algo}_{arch}_{instr}_{texts}_{rand}_{mem}_seed{seed}{info}{coef}_{suffix}".format(**model_name_parts) 86 | if args.pretrained_model: 87 | default_model_name = args.pretrained_model + '_pretrained_' + default_model_name 88 | args.model = args.model.format(**model_name_parts) if args.model else default_model_name 89 | 90 | utils.configure_logging(args.model) 91 | logger = logging.getLogger(__name__) 92 | 93 | # Define obss preprocessor 94 | if 'emb' in args.arch: 95 | obss_preprocessor = utils.IntObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) 96 | else: 97 | obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) 98 | 99 | # Define actor-critic model 100 | acmodel = utils.load_model(args.model, raise_not_found=False) 101 | if acmodel is None: 102 | if args.pretrained_model: 103 | acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) 104 | else: 105 | acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space, 106 | args.image_dim, args.memory_dim, args.instr_dim, 107 | not args.no_desc, args.instr_arch, not args.no_mem, args.arch, 108 | random_shuffled=args.random_shuffle, instr_sents=n_floor_colors, 109 | enable_instr=args.enable_instr, instr_only=args.instr_only) 110 | 111 | obss_preprocessor.vocab.save() 112 | utils.save_model(acmodel, args.model) 113 | 114 | if torch.cuda.is_available(): 115 | acmodel.cuda() 116 | 117 | # Define actor-critic algo 118 | 119 | reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward 120 | if args.algo == "ppo": 121 | algo = babyai.rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.beta1, args.beta2, 122 | args.gae_lambda, 123 | args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence, 124 | args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size, obss_preprocessor, 125 | reshape_reward) 126 | else: 127 | raise ValueError("Incorrect algorithm name: {}".format(args.algo)) 128 | 129 | # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that. 130 | # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that 131 | # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here. 132 | 133 | utils.seed(args.seed) 134 | 135 | # Restore training status 136 | 137 | status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') 138 | if os.path.exists(status_path): 139 | with open(status_path, 'r') as src: 140 | status = json.load(src) 141 | else: 142 | status = {'i': 0, 143 | 'num_episodes': 0, 144 | 'num_frames': 0} 145 | 146 | # Define logger and Tensorboard writer and CSV writer 147 | 148 | header = (["update", "episodes", "frames", "FPS", "duration"] 149 | + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']] 150 | + ["success_rate_" + stat for stat in ['mean', 'std']] 151 | + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']] 152 | + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"]) 153 | if args.tb: 154 | from tensorboardX import SummaryWriter 155 | writer = SummaryWriter(utils.get_log_dir(args.model)) 156 | csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') 157 | first_created = not os.path.exists(csv_path) 158 | # we don't buffer data going in the csv log, cause we assume 159 | # that one update will take much longer that one write to the log 160 | csv_writer = csv.writer(open(csv_path, 'a', 1)) 161 | if first_created: 162 | csv_writer.writerow(header) 163 | 164 | # Log code state, command, availability of CUDA and model 165 | 166 | babyai_code = list(babyai.__path__)[0] 167 | try: 168 | last_commit = subprocess.check_output( 169 | 'cd {}; git log -n1'.format(babyai_code), shell=True).decode('utf-8') 170 | logger.info('LAST COMMIT INFO:') 171 | logger.info(last_commit) 172 | except subprocess.CalledProcessError: 173 | logger.info('Could not figure out the last commit') 174 | try: 175 | diff = subprocess.check_output( 176 | 'cd {}; git diff'.format(babyai_code), shell=True).decode('utf-8') 177 | if diff: 178 | logger.info('GIT DIFF:') 179 | logger.info(diff) 180 | except subprocess.CalledProcessError: 181 | logger.info('Could not figure out the last commit') 182 | logger.info('COMMAND LINE ARGS:') 183 | logger.info(args) 184 | logger.info("CUDA available: {}".format(torch.cuda.is_available())) 185 | logger.info(acmodel) 186 | 187 | # Train model 188 | 189 | total_start_time = time.time() 190 | best_success_rate = 0 191 | best_mean_return = 0 192 | test_env_name = args.env 193 | while status['num_frames'] < args.frames: 194 | # Update parameters 195 | 196 | update_start_time = time.time() 197 | logs = algo.update_parameters() 198 | update_end_time = time.time() 199 | 200 | status['num_frames'] += logs["num_frames"] 201 | status['num_episodes'] += logs['episodes_done'] 202 | status['i'] += 1 203 | 204 | # Print logs 205 | 206 | if status['i'] % args.log_interval == 0: 207 | total_ellapsed_time = int(time.time() - total_start_time) 208 | fps = logs["num_frames"] / (update_end_time - update_start_time) 209 | duration = datetime.timedelta(seconds=total_ellapsed_time) 210 | return_per_episode = utils.synthesize(logs["return_per_episode"]) 211 | success_per_episode = utils.synthesize( 212 | [1 if r > 0 else 0 for r in logs["return_per_episode"]]) 213 | num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"]) 214 | 215 | data = [status['i'], status['num_episodes'], status['num_frames'], 216 | fps, total_ellapsed_time, 217 | *return_per_episode.values(), 218 | success_per_episode['mean'], 219 | success_per_episode['std'] / np.sqrt(len(logs["return_per_episode"])), 220 | *num_frames_per_episode.values(), 221 | logs["entropy"], logs["value"], logs["policy_loss"], logs["value_loss"], 222 | logs["loss"], logs["grad_norm"]] 223 | 224 | format_str = ("U {} | E {} | F {:06} | FPS {:04.0f} | D {} | R:xsmM {: .2f} {: .2f} {: .2f} {: .2f} | " 225 | "S:xs {:.2f} {:.4f} | F:xsmM {:.1f} {:.1f} {} {} | H {:.3f} | V {:.3f} | " 226 | "pL {: .3f} | vL {:.3f} | L {:.3f} | gN {:.3f} | ") 227 | 228 | logger.info(format_str.format(*data)) 229 | if args.tb: 230 | assert len(header) == len(data) 231 | for key, value in zip(header, data): 232 | writer.add_scalar(key, float(value), status['num_frames']) 233 | 234 | csv_writer.writerow(data) 235 | 236 | # Save obss preprocessor vocabulary and model 237 | 238 | if args.save_interval > 0 and status['i'] % args.save_interval == 0: 239 | obss_preprocessor.vocab.save() 240 | with open(status_path, 'w') as dst: 241 | json.dump(status, dst) 242 | utils.save_model(acmodel, args.model) 243 | 244 | # Testing the model before saving 245 | agent = ModelAgent(args.model, obss_preprocessor, argmax=True) 246 | agent.model = acmodel 247 | agent.model.eval() 248 | logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes) 249 | agent.model.train() 250 | mean_return = np.mean(logs["return_per_episode"]) 251 | success_rate = np.mean([1 if r > 0 else 0 for r in logs['return_per_episode']]) 252 | save_model = False 253 | if success_rate > best_success_rate: 254 | best_success_rate = success_rate 255 | save_model = True 256 | elif (success_rate == best_success_rate) and (mean_return > best_mean_return): 257 | best_mean_return = mean_return 258 | save_model = True 259 | if save_model: 260 | utils.save_model(acmodel, args.model + '_best') 261 | obss_preprocessor.vocab.save(utils.get_vocab_path(args.model + '_best')) 262 | logger.info("Return {: .2f}; best model is saved".format(mean_return)) 263 | else: 264 | logger.info("Return {: .2f}; not the best model; not saved".format(mean_return)) 265 | -------------------------------------------------------------------------------- /experiment/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from torch.distributions.categorical import Categorical 6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 7 | import babyai.rl 8 | from babyai.rl.utils.supervised_losses import required_heads 9 | 10 | 11 | # Function from https://github.com/ikostrikov/pytorch-a2c-ppo-acktr/blob/master/model.py 12 | def initialize_parameters(m): 13 | classname = m.__class__.__name__ 14 | if classname.find('Linear') != -1: 15 | m.weight.data.normal_(0, 1) 16 | m.weight.data *= 1 / torch.sqrt(m.weight.data.pow(2).sum(1, keepdim=True)) 17 | if m.bias is not None: 18 | m.bias.data.fill_(0) 19 | 20 | 21 | # Inspired by FiLMedBlock from https://arxiv.org/abs/1709.07871 22 | class ExpertControllerFiLM(nn.Module): 23 | def __init__(self, in_features, out_features, in_channels, imm_channels): 24 | super().__init__() 25 | self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=imm_channels, kernel_size=(3, 3), padding=1) 26 | self.bn1 = nn.BatchNorm2d(imm_channels) 27 | self.conv2 = nn.Conv2d(in_channels=imm_channels, out_channels=out_features, kernel_size=(3, 3), padding=1) 28 | self.bn2 = nn.BatchNorm2d(out_features) 29 | 30 | self.weight = nn.Linear(in_features, out_features) 31 | self.bias = nn.Linear(in_features, out_features) 32 | 33 | self.apply(initialize_parameters) 34 | 35 | def forward(self, x, y): 36 | x = F.relu(self.bn1(self.conv1(x))) 37 | x = self.conv2(x) 38 | out = x * self.weight(y).unsqueeze(2).unsqueeze(3) + self.bias(y).unsqueeze(2).unsqueeze(3) 39 | out = self.bn2(out) 40 | out = F.relu(out) 41 | return out 42 | 43 | 44 | class ACModel(nn.Module, babyai.rl.RecurrentACModel): 45 | def __init__(self, obs_space, action_space, 46 | image_dim=128, memory_dim=128, instr_dim=128, 47 | use_desc=True, lang_model="gru", use_memory=False, arch="cnn", 48 | aux_info=None, random_shuffled=False, instr_sents=2, enable_instr=False, instr_only=False): 49 | super().__init__() 50 | 51 | # Decide which components are enabled 52 | self.use_desc = use_desc 53 | self.use_memory = use_memory 54 | self.random_shuffled = random_shuffled 55 | self.enable_instr = enable_instr 56 | self.instr_only = instr_only 57 | self.arch = arch 58 | self.lang_model = lang_model 59 | self.aux_info = aux_info 60 | self.image_dim = image_dim 61 | self.memory_dim = memory_dim 62 | self.instr_dim = instr_dim 63 | self.instr_sents = instr_sents 64 | 65 | self.obs_space = obs_space 66 | 67 | if arch == "cnn": 68 | self.image_conv = nn.Sequential( 69 | nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(2, 2)), 70 | nn.BatchNorm2d(128), 71 | nn.ReLU(), 72 | nn.MaxPool2d(kernel_size=(2, 2), stride=2), 73 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1), 74 | nn.BatchNorm2d(128), 75 | nn.ReLU(), 76 | nn.MaxPool2d(kernel_size=(2, 2), stride=2) 77 | ) 78 | elif arch.startswith("expert_filmcnn"): 79 | if not self.use_desc: 80 | raise ValueError("FiLM architecture can be used when instructions are enabled") 81 | 82 | self.image_conv = nn.Sequential( 83 | nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(2, 2), padding=1), 84 | nn.BatchNorm2d(128), 85 | nn.ReLU(), 86 | nn.MaxPool2d(kernel_size=(2, 2), stride=2), 87 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1), 88 | nn.BatchNorm2d(128), 89 | nn.ReLU(), 90 | nn.MaxPool2d(kernel_size=(2, 2), stride=2) 91 | ) 92 | self.film_pool = nn.MaxPool2d(kernel_size=(2, 2), stride=2) 93 | elif arch == "fusion": 94 | if not self.use_desc: 95 | raise ValueError("fusion architecture can be used when instructions are enabled") 96 | 97 | self.image_conv = nn.Sequential( 98 | nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(3, 3), padding=1), 99 | nn.BatchNorm2d(128), 100 | nn.ReLU(), 101 | # nn.MaxPool2d(kernel_size=(2, 2), stride=2), 102 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1), 103 | nn.BatchNorm2d(128), 104 | nn.ReLU(), 105 | # nn.MaxPool2d(kernel_size=(2, 2), stride=2) 106 | ) 107 | self.w_conv = nn.Sequential( 108 | nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(3, 3), padding=1), 109 | nn.BatchNorm2d(128), 110 | nn.ReLU(), 111 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1), 112 | nn.BatchNorm2d(128), 113 | nn.ReLU(), 114 | nn.Conv2d(in_channels=128, out_channels=self.instr_sents+1, kernel_size=(3, 3), padding=1) 115 | ) 116 | self.combined_conv = nn.Sequential( 117 | nn.Conv2d(in_channels=256, out_channels=128, kernel_size=(2, 2)), 118 | nn.ReLU(), 119 | nn.MaxPool2d(kernel_size=(2, 2), stride=2), 120 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(2, 2)), 121 | nn.ReLU(), 122 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(2, 2)), 123 | nn.ReLU() 124 | ) 125 | 126 | ''' 127 | self.image_conv = nn.Sequential( 128 | nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(3, 3), padding=1), 129 | nn.BatchNorm2d(128), 130 | nn.ReLU(), 131 | # nn.MaxPool2d(kernel_size=(2, 2), stride=2), 132 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1), 133 | nn.BatchNorm2d(128), 134 | nn.ReLU(), 135 | # nn.MaxPool2d(kernel_size=(2, 2), stride=2) 136 | ) 137 | self.w_conv = nn.Conv2d(in_channels=128, out_channels=self.instr_sents, kernel_size=(3, 3), padding=1).cuda() 138 | self.combined_conv = nn.Sequential( 139 | nn.Conv2d(in_channels=256, out_channels=128, kernel_size=(2, 2)), 140 | nn.ReLU(), 141 | nn.MaxPool2d(kernel_size=(2, 2), stride=2), 142 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(2, 2)), 143 | nn.ReLU(), 144 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(2, 2)), 145 | nn.ReLU() 146 | ) 147 | ''' 148 | else: 149 | raise ValueError("Incorrect architecture name: {}".format(arch)) 150 | 151 | # Define instruction embedding 152 | if self.use_desc: 153 | if self.lang_model in ['gru', 'bigru', 'attgru']: 154 | self.word_embedding = nn.Embedding(obs_space["instr"], self.instr_dim) 155 | if self.lang_model in ['gru', 'bigru', 'attgru']: 156 | gru_dim = self.instr_dim 157 | if self.lang_model in ['bigru', 'attgru']: 158 | gru_dim //= 2 159 | self.instr_rnn = nn.GRU( 160 | self.instr_dim, gru_dim, batch_first=True, 161 | bidirectional=(self.lang_model in ['bigru', 'attgru'])) 162 | self.final_instr_dim = self.instr_dim 163 | else: 164 | kernel_dim = 64 165 | kernel_sizes = [3, 4] 166 | self.instr_convs = nn.ModuleList([ 167 | nn.Conv2d(1, kernel_dim, (K, self.instr_dim)) for K in kernel_sizes]) 168 | self.final_instr_dim = kernel_dim * len(kernel_sizes) 169 | 170 | if self.lang_model == 'attgru': 171 | self.memory2key = nn.Linear(self.memory_size, self.final_instr_dim) 172 | 173 | # Define memory 174 | if self.use_memory: 175 | self.memory_rnn = nn.LSTMCell(self.image_dim, self.memory_dim) 176 | 177 | # Resize image embedding 178 | self.embedding_size = self.semi_memory_size 179 | if self.use_desc and not "filmcnn" in arch and not "fusion" in arch: 180 | self.embedding_size += self.final_instr_dim 181 | 182 | if arch.startswith("expert_filmcnn") or (self.arch == "fusion" and self.enable_instr): 183 | # if arch == "expert_filmcnn": 184 | # num_module = 2 185 | # else: 186 | # num_module = int(arch[(arch.rfind('_') + 1):]) 187 | num_module = 2 188 | self.controllers = [] 189 | for ni in range(num_module): 190 | if ni < num_module-1: 191 | mod = ExpertControllerFiLM( 192 | in_features=self.final_instr_dim, 193 | out_features=128, in_channels=128, imm_channels=128) 194 | else: 195 | mod = ExpertControllerFiLM( 196 | in_features=self.final_instr_dim, out_features=self.image_dim, 197 | in_channels=128, imm_channels=128) 198 | self.controllers.append(mod) 199 | self.add_module('FiLM_Controler_' + str(ni), mod) 200 | 201 | # Define actor's model 202 | self.actor = nn.Sequential( 203 | nn.Linear(self.embedding_size, 64), 204 | nn.Tanh(), 205 | nn.Linear(64, action_space.n) 206 | ) 207 | 208 | # Define critic's model 209 | self.critic = nn.Sequential( 210 | nn.Linear(self.embedding_size, 64), 211 | nn.Tanh(), 212 | nn.Linear(64, 1) 213 | ) 214 | 215 | # Initialize parameters correctly 216 | self.apply(initialize_parameters) 217 | 218 | # Define head for extra info 219 | if self.aux_info: 220 | self.extra_heads = None 221 | self.add_heads() 222 | 223 | def add_heads(self): 224 | ''' 225 | When using auxiliary tasks, the environment yields at each step some binary, continous, or multiclass 226 | information. The agent needs to predict those information. This function add extra heads to the model 227 | that output the predictions. There is a head per extra information (the head type depends on the extra 228 | information type). 229 | ''' 230 | self.extra_heads = nn.ModuleDict() 231 | for info in self.aux_info: 232 | if required_heads[info] == 'binary': 233 | self.extra_heads[info] = nn.Linear(self.embedding_size, 1) 234 | elif required_heads[info].startswith('multiclass'): 235 | n_classes = int(required_heads[info].split('multiclass')[-1]) 236 | self.extra_heads[info] = nn.Linear(self.embedding_size, n_classes) 237 | elif required_heads[info].startswith('continuous'): 238 | if required_heads[info].endswith('01'): 239 | self.extra_heads[info] = nn.Sequential(nn.Linear(self.embedding_size, 1), nn.Sigmoid()) 240 | else: 241 | raise ValueError('Only continous01 is implemented') 242 | else: 243 | raise ValueError('Type not supported') 244 | # initializing these parameters independently is done in order to have consistency of results when using 245 | # supervised-loss-coef = 0 and when not using any extra binary information 246 | self.extra_heads[info].apply(initialize_parameters) 247 | 248 | def add_extra_heads_if_necessary(self, aux_info): 249 | ''' 250 | This function allows using a pre-trained model without aux_info and add aux_info to it and still make 251 | it possible to finetune. 252 | ''' 253 | try: 254 | if not hasattr(self, 'aux_info') or not set(self.aux_info) == set(aux_info): 255 | self.aux_info = aux_info 256 | self.add_heads() 257 | except Exception: 258 | raise ValueError('Could not add extra heads') 259 | 260 | @property 261 | def memory_size(self): 262 | return 2 * self.semi_memory_size 263 | 264 | @property 265 | def semi_memory_size(self): 266 | return self.memory_dim 267 | 268 | def forward(self, obs, memory, instr_embedding=None): 269 | if self.use_desc and instr_embedding is None: 270 | if self.enable_instr and self.arch == "fusion": 271 | instr_embedding, instr_embedding2 = self._get_instr_embedding(obs.instr) 272 | else: 273 | instr_embedding = self._get_instr_embedding(obs.instr) 274 | 275 | if self.use_desc and self.lang_model == "attgru": 276 | # outputs: B x L x D 277 | # memory: B x M 278 | mask = (obs.instr != 0).float() 279 | instr_embedding = instr_embedding[:, :mask.shape[1]] 280 | keys = self.memory2key(memory) 281 | pre_softmax = (keys[:, None, :] * instr_embedding).sum(2) + 1000 * mask 282 | attention = F.softmax(pre_softmax, dim=1) 283 | instr_embedding = (instr_embedding * attention[:, :, None]).sum(1) 284 | 285 | x = torch.transpose(torch.transpose(obs.image, 1, 3), 2, 3) 286 | 287 | if self.arch.startswith("expert_filmcnn"): 288 | x = self.image_conv(x) 289 | for controler in self.controllers: 290 | x = controler(x, instr_embedding) 291 | x = F.relu(self.film_pool(x)) 292 | elif self.arch == "fusion": 293 | # old fusion model 294 | ''' 295 | x = self.image_conv(x) 296 | w = self.w_conv(x) 297 | N,_,W,H = w.shape 298 | w = w.view([N, self.instr_sents, -1]) 299 | w = F.softmax(w,dim=1) 300 | y = torch.matmul(instr_embedding, w).view([N, 128, W, H]) 301 | ''' 302 | # new fusion model: separate cnns for image extractor and attention module input 303 | x_feat = self.image_conv(x) 304 | w = self.w_conv(x) 305 | N,_,W,H = w.shape 306 | w = w.view([N, self.instr_sents + 1, -1]) 307 | w = F.softmax(w, dim=1) 308 | y = torch.matmul(instr_embedding, w[:,:-1]).view([N, 128, W, H]) 309 | 310 | x = torch.cat([x_feat, y], axis=1) 311 | x = self.combined_conv(x) 312 | x = x.view(x.shape[0], x.shape[1], 1, 1) 313 | if self.enable_instr: 314 | for controler in self.controllers: 315 | x = controler(x, instr_embedding2) 316 | x = F.relu(x) 317 | else: 318 | x = self.image_conv(x) 319 | 320 | 321 | x = x.reshape(x.shape[0], -1) 322 | 323 | if self.use_memory: 324 | hidden = (memory[:, :self.semi_memory_size], memory[:, self.semi_memory_size:]) 325 | hidden = self.memory_rnn(x, hidden) 326 | embedding = hidden[0] 327 | memory = torch.cat(hidden, dim=1) 328 | else: 329 | embedding = x 330 | 331 | if self.use_desc and not "filmcnn" in self.arch and not "fusion" in self.arch: 332 | embedding = torch.cat((embedding, instr_embedding), dim=1) 333 | 334 | if hasattr(self, 'aux_info') and self.aux_info: 335 | extra_predictions = {info: self.extra_heads[info](embedding) for info in self.extra_heads} 336 | else: 337 | extra_predictions = dict() 338 | 339 | x = self.actor(embedding) 340 | dist = Categorical(logits=F.log_softmax(x, dim=1)) 341 | 342 | x = self.critic(embedding) 343 | value = x.squeeze(1) 344 | 345 | return {'dist': dist, 'value': value, 'memory': memory, 'extra_predictions': extra_predictions} 346 | 347 | def _get_instr_embedding(self, instr): 348 | # filtering the "go to the red ball" 349 | if not self.enable_instr: 350 | instr = instr[:,instr.size()[1] - 4 * self.instr_sents:] 351 | if self.instr_only: 352 | assert (self.enable_instr) 353 | instr = instr[:,:instr.size()[1] - 4 * self.instr_sents] 354 | 355 | if self.random_shuffled: 356 | instr = instr[:,torch.randperm(instr.size()[1])] 357 | lengths = (instr != 0).sum(1).long() 358 | if self.lang_model == 'gru': 359 | if self.arch == "fusion": 360 | assert (not self.instr_only) 361 | if self.enable_instr: 362 | desc = instr[:,instr.size()[1] - 4 * self.instr_sents:] 363 | ins = instr[:,:instr.size()[1] - 4 * self.instr_sents] 364 | lengths = (desc != 0).sum(1).long() 365 | lengths /= self.instr_sents 366 | hiddens = [] 367 | for i in range(self.instr_sents): 368 | out, _ = self.instr_rnn(self.word_embedding(desc[:,4*i:4*(i+1)])) 369 | hidden = out[range(len(lengths)), lengths-1, :] 370 | hiddens.append(hidden) 371 | 372 | hidden_desc = torch.stack(hiddens, axis=-1) 373 | lengths = (ins != 0).sum(1).long() 374 | out, _ = self.instr_rnn(self.word_embedding(ins)) 375 | hidden_instr = out[range(len(lengths)), lengths-1, :] 376 | return hidden_desc, hidden_instr 377 | else: 378 | lengths /= self.instr_sents 379 | hiddens = [] 380 | for i in range(self.instr_sents): 381 | out, _ = self.instr_rnn(self.word_embedding(instr[:,4*i:4*(i+1)])) 382 | hidden = out[range(len(lengths)), lengths-1, :] 383 | hiddens.append(hidden) 384 | 385 | hidden = torch.stack(hiddens, axis=-1) 386 | else: 387 | out, _ = self.instr_rnn(self.word_embedding(instr)) 388 | hidden = out[range(len(lengths)), lengths-1, :] 389 | return hidden 390 | 391 | elif self.lang_model in ['bigru', 'attgru']: 392 | if self.arch == "fusion": 393 | raise NotImplementedError("For early fusion model, only gru model is supported!") 394 | 395 | masks = (instr != 0).float() 396 | 397 | if lengths.shape[0] > 1: 398 | seq_lengths, perm_idx = lengths.sort(0, descending=True) 399 | iperm_idx = torch.LongTensor(perm_idx.shape).fill_(0) 400 | if instr.is_cuda: iperm_idx = iperm_idx.cuda() 401 | for i, v in enumerate(perm_idx): 402 | iperm_idx[v.data] = i 403 | 404 | inputs = self.word_embedding(instr) 405 | inputs = inputs[perm_idx] 406 | 407 | inputs = pack_padded_sequence(inputs, seq_lengths.data.cpu().numpy(), batch_first=True) 408 | 409 | outputs, final_states = self.instr_rnn(inputs) 410 | else: 411 | instr = instr[:, 0:lengths[0]] 412 | outputs, final_states = self.instr_rnn(self.word_embedding(instr)) 413 | iperm_idx = None 414 | final_states = final_states.transpose(0, 1).contiguous() 415 | final_states = final_states.view(final_states.shape[0], -1) 416 | if iperm_idx is not None: 417 | outputs, _ = pad_packed_sequence(outputs, batch_first=True) 418 | outputs = outputs[iperm_idx] 419 | final_states = final_states[iperm_idx] 420 | 421 | if outputs.shape[1] < masks.shape[1]: 422 | masks = masks[:, :(outputs.shape[1]-masks.shape[1])] 423 | # the packing truncated the original length 424 | # so we need to change mask to fit it 425 | 426 | return outputs if self.lang_model == 'attgru' else final_states 427 | 428 | else: 429 | ValueError("Undefined instruction architecture: {}".format(self.use_desc)) 430 | -------------------------------------------------------------------------------- /experiment/imitation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.getcwd()) 4 | 5 | import copy 6 | import torch 7 | import gym 8 | import time 9 | import datetime 10 | import numpy as np 11 | import itertools 12 | import multiprocessing 13 | import json 14 | import logging 15 | 16 | import babyaiPP 17 | from babyai.rl import DictList 18 | from babyai.evaluate import batch_evaluate 19 | import babyai.utils as utils 20 | 21 | from experiment.model import ACModel 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | 26 | class ImitationLearning(object): 27 | def __init__(self, args, ): 28 | self.args = args 29 | 30 | utils.seed(self.args.seed) 31 | 32 | # args.env is a list when training on multiple environments 33 | if getattr(args, 'multi_env', None): 34 | self.env = [gym.make(item) for item in args.multi_env] 35 | 36 | self.train_demos = [] 37 | for demos, episodes in zip(args.multi_demos, args.multi_episodes): 38 | demos_path = utils.get_demos_path(demos, None, None, valid=False) 39 | logger.info('loading {} of {} demos'.format(episodes, demos)) 40 | train_demos = utils.load_demos(demos_path) 41 | logger.info('loaded demos') 42 | if episodes > len(train_demos): 43 | raise ValueError("there are only {} train demos in {}".format(len(train_demos), demos)) 44 | self.train_demos.extend(train_demos[:episodes]) 45 | logger.info('So far, {} demos loaded'.format(len(self.train_demos))) 46 | 47 | self.val_demos = [] 48 | for demos, episodes in zip(args.multi_demos, [args.val_episodes] * len(args.multi_demos)): 49 | demos_path_valid = utils.get_demos_path(demos, None, None, valid=True) 50 | logger.info('loading {} of {} valid demos'.format(episodes, demos)) 51 | valid_demos = utils.load_demos(demos_path_valid) 52 | logger.info('loaded demos') 53 | if episodes > len(valid_demos): 54 | logger.info('Using all the available {} demos to evaluate valid. accuracy'.format(len(valid_demos))) 55 | self.val_demos.extend(valid_demos[:episodes]) 56 | logger.info('So far, {} valid demos loaded'.format(len(self.val_demos))) 57 | 58 | logger.info('Loaded all demos') 59 | 60 | observation_space = self.env[0].observation_space 61 | action_space = self.env[0].action_space 62 | 63 | else: 64 | self.env = gym.make(self.args.env) 65 | 66 | demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False) 67 | demos_path_valid = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=True) 68 | 69 | logger.info('loading demos') 70 | self.train_demos = utils.load_demos(demos_path) 71 | logger.info('loaded demos') 72 | if args.episodes: 73 | if args.episodes > len(self.train_demos): 74 | raise ValueError("there are only {} train demos".format(len(self.train_demos))) 75 | self.train_demos = self.train_demos[:args.episodes] 76 | 77 | self.val_demos = utils.load_demos(demos_path_valid) 78 | if args.val_episodes > len(self.val_demos): 79 | logger.info('Using all the available {} demos to evaluate valid. accuracy'.format(len(self.val_demos))) 80 | self.val_demos = self.val_demos[:self.args.val_episodes] 81 | 82 | observation_space = self.env.observation_space 83 | action_space = self.env.action_space 84 | 85 | self.obss_preprocessor = utils.ObssPreprocessor(args.model, observation_space, 86 | getattr(self.args, 'pretrained_model', None)) 87 | 88 | # Define actor-critic model 89 | self.acmodel = utils.load_model(args.model, raise_not_found=False) 90 | if self.acmodel is None: 91 | if getattr(self.args, 'pretrained_model', None): 92 | logger.info("Loading pretrained model") 93 | self.acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) 94 | else: 95 | logger.info('Creating new model') 96 | self.acmodel = ACModel(self.obss_preprocessor.obs_space, action_space, 97 | args.image_dim, args.memory_dim, args.instr_dim, 98 | not self.args.no_desc, self.args.instr_arch, 99 | not self.args.no_mem, self.args.arch, 100 | random_shuffled=self.args.random_shuffle, instr_sents=self.env.n_floor_colors, 101 | enable_instr=self.args.enable_instr, instr_only=self.args.instr_only) 102 | self.obss_preprocessor.vocab.save() 103 | utils.save_model(self.acmodel, args.model) 104 | 105 | self.acmodel.train() 106 | if torch.cuda.is_available(): 107 | self.acmodel.cuda() 108 | 109 | self.optimizer = torch.optim.Adam(self.acmodel.parameters(), self.args.lr, eps=self.args.optim_eps) 110 | self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9) 111 | 112 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 113 | 114 | @staticmethod 115 | def default_model_name(args): 116 | if getattr(args, 'multi_env', None): 117 | # It's better to specify one's own model name for this scenario 118 | named_envs = '-'.join(args.multi_env) 119 | else: 120 | named_envs = args.env 121 | 122 | # Define model name 123 | suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S") 124 | # instr = args.instr_arch if args.instr_arch else "noinstr" 125 | instr = args.instr_arch if not args.no_desc else "nodesc" 126 | model_name_parts = { 127 | 'envs': named_envs, 128 | 'arch': args.arch, 129 | 'instr': instr, 130 | 'seed': args.seed, 131 | 'suffix': suffix} 132 | default_model_name = "{envs}_IL_{arch}_{instr}_seed{seed}_{suffix}".format(**model_name_parts) 133 | if getattr(args, 'pretrained_model', None): 134 | default_model_name = args.pretrained_model + '_pretrained_' + default_model_name 135 | return default_model_name 136 | 137 | def starting_indexes(self, num_frames): 138 | if num_frames % self.args.recurrence == 0: 139 | return np.arange(0, num_frames, self.args.recurrence) 140 | else: 141 | return np.arange(0, num_frames, self.args.recurrence)[:-1] 142 | 143 | def run_epoch_recurrence(self, demos, is_training=False): 144 | if self.args.epoch_length == 0: 145 | indices = list(range(len(demos))) 146 | else: 147 | indices = np.random.choice(len(demos), self.args.epoch_length) 148 | if is_training: 149 | np.random.shuffle(indices) 150 | 151 | batch_size = min(self.args.batch_size, len(demos)) 152 | offset = 0 153 | 154 | if not is_training: 155 | self.acmodel.eval() 156 | 157 | # Log dictionary 158 | log = {"entropy": [], "policy_loss": [], "accuracy": []} 159 | 160 | start_time = time.time() 161 | frames = 0 162 | for batch_index in range(len(indices) // batch_size): 163 | logger.info("batch {}, FPS so far {}".format( 164 | batch_index, frames / (time.time() - start_time) if frames else 0)) 165 | batch = [demos[i] for i in indices[offset: offset + batch_size]] 166 | frames += sum([len(demo[3]) for demo in batch]) 167 | 168 | _log = self.run_epoch_recurrence_one_batch(batch, is_training=is_training) 169 | 170 | log["entropy"].append(_log["entropy"]) 171 | log["policy_loss"].append(_log["policy_loss"]) 172 | log["accuracy"].append(_log["accuracy"]) 173 | log["frames"] = frames 174 | 175 | offset += batch_size 176 | 177 | if not is_training: 178 | self.acmodel.train() 179 | 180 | return log 181 | 182 | def run_epoch_recurrence_one_batch(self, batch, is_training=False): 183 | batch = utils.demos.transform_demos(batch) 184 | batch.sort(key=len, reverse=True) 185 | # Constructing flat batch and indices pointing to start of each demonstration 186 | flat_batch = [] 187 | inds = [0] 188 | 189 | for demo in batch: 190 | flat_batch += demo 191 | inds.append(inds[-1] + len(demo)) 192 | 193 | flat_batch = np.array(flat_batch) 194 | inds = inds[:-1] 195 | num_frames = len(flat_batch) 196 | 197 | mask = np.ones([len(flat_batch)], dtype=np.float64) 198 | mask[inds] = 0 199 | mask = torch.tensor(mask, device=self.device, dtype=torch.float).unsqueeze(1) 200 | 201 | # Observations, true action, values and done for each of the stored demostration 202 | obss, action_true, done = flat_batch[:, 0], flat_batch[:, 1], flat_batch[:, 2] 203 | action_true = torch.tensor([action for action in action_true], device=self.device, dtype=torch.long) 204 | 205 | # Memory to be stored 206 | memories = torch.zeros([len(flat_batch), self.acmodel.memory_size], device=self.device) 207 | episode_ids = np.zeros(len(flat_batch)) 208 | memory = torch.zeros([len(batch), self.acmodel.memory_size], device=self.device) 209 | 210 | preprocessed_first_obs = self.obss_preprocessor(obss[inds], device=self.device) 211 | if not self.args.no_desc: 212 | instr_embedding = self.acmodel._get_instr_embedding(preprocessed_first_obs.instr) 213 | 214 | # Loop terminates when every observation in the flat_batch has been handled 215 | while True: 216 | # taking observations and done located at inds 217 | obs = obss[inds] 218 | done_step = done[inds] 219 | preprocessed_obs = self.obss_preprocessor(obs, device=self.device) 220 | with torch.no_grad(): 221 | # taking the memory till len(inds), as demos beyond that have already finished 222 | if not self.args.no_desc: 223 | new_memory = self.acmodel( 224 | preprocessed_obs, 225 | memory[:len(inds), :], instr_embedding[:len(inds)])['memory'] 226 | else: 227 | new_memory = self.acmodel( 228 | preprocessed_obs, 229 | memory[:len(inds), :])['memory'] 230 | 231 | memories[inds, :] = memory[:len(inds), :] 232 | memory[:len(inds), :] = new_memory 233 | episode_ids[inds] = range(len(inds)) 234 | 235 | # Updating inds, by removing those indices corresponding to which the demonstrations have finished 236 | inds = inds[:len(inds) - sum(done_step)] 237 | if len(inds) == 0: 238 | break 239 | 240 | # Incrementing the remaining indices 241 | inds = [index + 1 for index in inds] 242 | 243 | # Here, actual backprop upto args.recurrence happens 244 | final_loss = 0 245 | final_entropy, final_policy_loss, final_value_loss = 0, 0, 0 246 | 247 | indexes = self.starting_indexes(num_frames) 248 | memory = memories[indexes] 249 | accuracy = 0 250 | total_frames = len(indexes) * self.args.recurrence 251 | for _ in range(self.args.recurrence): 252 | obs = obss[indexes] 253 | preprocessed_obs = self.obss_preprocessor(obs, device=self.device) 254 | action_step = action_true[indexes] 255 | mask_step = mask[indexes] 256 | if not self.args.no_desc: 257 | model_results = self.acmodel( 258 | preprocessed_obs, memory * mask_step, 259 | instr_embedding[episode_ids[indexes]]) 260 | else: 261 | model_results = self.acmodel( 262 | preprocessed_obs, memory * mask_step) 263 | dist = model_results['dist'] 264 | memory = model_results['memory'] 265 | 266 | entropy = dist.entropy().mean() 267 | policy_loss = -dist.log_prob(action_step).mean() 268 | loss = policy_loss - self.args.entropy_coef * entropy 269 | action_pred = dist.probs.max(1, keepdim=True)[1] 270 | accuracy += float((action_pred == action_step.unsqueeze(1)).sum()) / total_frames 271 | final_loss += loss 272 | final_entropy += entropy 273 | final_policy_loss += policy_loss 274 | indexes += 1 275 | 276 | final_loss /= self.args.recurrence 277 | 278 | if is_training: 279 | self.optimizer.zero_grad() 280 | final_loss.backward() 281 | self.optimizer.step() 282 | 283 | log = {} 284 | log["entropy"] = float(final_entropy / self.args.recurrence) 285 | log["policy_loss"] = float(final_policy_loss / self.args.recurrence) 286 | log["accuracy"] = float(accuracy) 287 | 288 | return log 289 | 290 | def validate(self, episodes, verbose=True): 291 | # Seed needs to be reset for each validation, to ensure consistency 292 | utils.seed(self.args.val_seed) 293 | 294 | if verbose: 295 | logger.info("Validating the model") 296 | if getattr(self.args, 'multi_env', None): 297 | agent = utils.load_agent(self.env[0], model_name=self.args.model, argmax=True) 298 | else: 299 | agent = utils.load_agent(self.env, model_name=self.args.model, argmax=True) 300 | 301 | # Setting the agent model to the current model 302 | agent.model = self.acmodel 303 | 304 | agent.model.eval() 305 | logs = [] 306 | 307 | for env_name in ([self.args.env] if not getattr(self.args, 'multi_env', None) 308 | else self.args.multi_env): 309 | logs += [batch_evaluate(agent, env_name, self.args.val_seed, episodes)] 310 | agent.model.train() 311 | 312 | return logs 313 | 314 | def collect_returns(self): 315 | logs = self.validate(episodes=self.args.eval_episodes, verbose=False) 316 | mean_return = {tid: np.mean(log["return_per_episode"]) for tid, log in enumerate(logs)} 317 | return mean_return 318 | 319 | def train(self, train_demos, writer, csv_writer, status_path, header, reset_status=False): 320 | # Load the status 321 | def initial_status(): 322 | return {'i': 0, 323 | 'num_frames': 0, 324 | 'patience': 0} 325 | 326 | status = initial_status() 327 | if os.path.exists(status_path) and not reset_status: 328 | with open(status_path, 'r') as src: 329 | status = json.load(src) 330 | elif not os.path.exists(os.path.dirname(status_path)): 331 | # Ensure that the status directory exists 332 | os.makedirs(os.path.dirname(status_path)) 333 | 334 | # If the batch size is larger than the number of demos, we need to lower the batch size 335 | if self.args.batch_size > len(train_demos): 336 | self.args.batch_size = len(train_demos) 337 | logger.info("Batch size too high. Setting it to the number of train demos ({})".format(len(train_demos))) 338 | 339 | # Model saved initially to avoid "Model not found Exception" during first validation step 340 | utils.save_model(self.acmodel, self.args.model) 341 | 342 | # best mean return to keep track of performance on validation set 343 | best_success_rate, patience, i = 0, 0, 0 344 | total_start_time = time.time() 345 | 346 | while status['i'] < getattr(self.args, 'epochs', int(1e9)): 347 | if 'patience' not in status: # if for some reason you're finetuining with IL an RL pretrained agent 348 | status['patience'] = 0 349 | # Do not learn if using a pre-trained model that already lost patience 350 | if status['patience'] > self.args.patience: 351 | break 352 | if status['num_frames'] > self.args.frames: 353 | break 354 | 355 | status['i'] += 1 356 | i = status['i'] 357 | update_start_time = time.time() 358 | 359 | log = self.run_epoch_recurrence(train_demos, is_training=True) 360 | status['num_frames'] += log['frames'] 361 | 362 | # Learning rate scheduler 363 | self.scheduler.step() 364 | 365 | update_end_time = time.time() 366 | 367 | # Print logs 368 | if status['i'] % self.args.log_interval == 0: 369 | total_ellapsed_time = int(time.time() - total_start_time) 370 | 371 | fps = log['frames'] / (update_end_time - update_start_time) 372 | duration = datetime.timedelta(seconds=total_ellapsed_time) 373 | 374 | for key in log: 375 | log[key] = np.mean(log[key]) 376 | 377 | train_data = [status['i'], status['num_frames'], fps, total_ellapsed_time, 378 | log["entropy"], log["policy_loss"], log["accuracy"]] 379 | 380 | logger.info( 381 | "U {} | F {:06} | FPS {:04.0f} | D {} | H {:.3f} | pL {: .3f} | A {: .3f}".format(*train_data)) 382 | 383 | # Log the gathered data only when we don't evaluate the validation metrics. It will be logged anyways 384 | # afterwards when status['i'] % self.args.val_interval == 0 385 | if status['i'] % self.args.val_interval != 0: 386 | # instantiate a validation_log with empty strings when no validation is done 387 | validation_data = [''] * len([key for key in header if 'valid' in key]) 388 | assert len(header) == len(train_data + validation_data) 389 | if self.args.tb: 390 | for key, value in zip(header, train_data): 391 | writer.add_scalar(key, float(value), status['num_frames']) 392 | csv_writer.writerow(train_data + validation_data) 393 | 394 | if status['i'] % self.args.val_interval == 0: 395 | 396 | valid_log = self.validate(self.args.val_episodes) 397 | mean_return = [np.mean(log['return_per_episode']) for log in valid_log] 398 | success_rate = [np.mean([1 if r > 0 else 0 for r in log['return_per_episode']]) for log in 399 | valid_log] 400 | 401 | val_log = self.run_epoch_recurrence(self.val_demos) 402 | validation_accuracy = np.mean(val_log["accuracy"]) 403 | 404 | if status['i'] % self.args.log_interval == 0: 405 | validation_data = [validation_accuracy] + mean_return + success_rate 406 | logger.info(("Validation: A {: .3f} " + ("| R {: .3f} " * len(mean_return) + 407 | "| S {: .3f} " * len(success_rate)) 408 | ).format(*validation_data)) 409 | 410 | assert len(header) == len(train_data + validation_data) 411 | if self.args.tb: 412 | for key, value in zip(header, train_data + validation_data): 413 | writer.add_scalar(key, float(value), status['num_frames']) 414 | csv_writer.writerow(train_data + validation_data) 415 | 416 | # In case of a multi-env, the update condition would be "better mean success rate" ! 417 | if np.mean(success_rate) > best_success_rate: 418 | best_success_rate = np.mean(success_rate) 419 | status['patience'] = 0 420 | with open(status_path, 'w') as dst: 421 | json.dump(status, dst) 422 | # Saving the model 423 | logger.info("Saving best model") 424 | 425 | if torch.cuda.is_available(): 426 | self.acmodel.cpu() 427 | utils.save_model(self.acmodel, self.args.model + "_best") 428 | self.obss_preprocessor.vocab.save(utils.get_vocab_path(self.args.model + "_best")) 429 | if torch.cuda.is_available(): 430 | self.acmodel.cuda() 431 | else: 432 | status['patience'] += 1 433 | logger.info( 434 | "Losing patience, new value={}, limit={}".format(status['patience'], self.args.patience)) 435 | 436 | 437 | if status['i'] % self.args.save_interval == 0: 438 | logger.info("Saving current model") 439 | if torch.cuda.is_available(): 440 | self.acmodel.cpu() 441 | utils.save_model(self.acmodel, self.args.model) 442 | self.obss_preprocessor.vocab.save() 443 | if torch.cuda.is_available(): 444 | self.acmodel.cuda() 445 | with open(status_path, 'w') as dst: 446 | json.dump(status, dst) 447 | -------------------------------------------------------------------------------- /babyaiPP/dynamics_levels.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from babyai.levels.verifier import * 3 | from babyai.levels.levelgen import * 4 | from babyai.levels.iclr19_levels import * 5 | from gym_minigrid.minigrid import COLOR_NAMES, Floor, DIR_TO_VEC 6 | from lorem.text import TextLorem 7 | 8 | PROPERTY_TO_IDX = { 9 | 'trap': 0, # Agent dies, episode end 10 | 'sticky': 1, # Agent must stay on block for at least 3 time steps. 11 | 'flipud': 2, # causes agent to turn 180 and move one block, requires agent to spin and then getting backed out. 12 | 'fliplr': 3, # Flips rotational actions. 13 | 'slippery': 4, # time warp, increase reward at end 14 | # Agent will fall down 1 block every 2 timesteps on this color. 15 | 'magic': 5, 16 | } 17 | 18 | PROPERTY_ORDER = [['trap'], ['slippery', 'magic'], ['none', 'sticky', 'flipud', 'fliplr']] 19 | Spawn_rates = [0.05, 0.15, 0.30] 20 | N_tries = 20 21 | """ 22 | property game breaking levels: 23 | 1. unconstrained: can be placed anywhere without breaking game 24 | 2. path blocking: creates uncrossable areas, must be placed not infront of doors, must not be contiguous with other path blockers by more than 2 25 | 3. insta-death: path-blocking + must not overlap with object. 26 | property rarity: 27 | 1. unconstrained are common: 30% spawn chance 28 | 2. path blocking are rare: 15% spawn 29 | 3. insta-death super rare: 5% spawn 30 | """ 31 | 32 | IDX_TO_PROPERTY = dict(zip(PROPERTY_TO_IDX.values(), PROPERTY_TO_IDX.keys())) 33 | 34 | 35 | class DynamicsLevel(RoomGridLevel): 36 | # TODO(lts): Adapted floors to be containers. Need to make sure certain objectives will continue 37 | # to work. (Current goto objective should work b/c colored floors never spawn under objects.) 38 | def __init__(self, enabled_properties=(0, 1, 2, 3, 4, 5), n_floor_colors=2, fixed_color_prop_map=False, 39 | color_property_map=None, held_out_cp_pairs=None, held_description=0.0, with_instruction=True, 40 | rand_text=False, total_rand=False, instr_words=5, 41 | *args, **kwargs): 42 | """ 43 | Render this grid at a given scale 44 | :param enabled_properties: list of property idxs that are enabled. 45 | :param n_floor_colors: number of colors for special floors. 46 | :param fixed_color_prop_map: always use the same color prop map, default False 47 | :param color_property_map: use this color prop map, default None (generate random color prop map) 48 | :param held_out_cp_pairs: use this to prevent certain prop maps from showing up 49 | :param held_description: fractional chance of description of a color property pair being withheld 50 | :param with_instruction: Include instruction of the task 51 | :param rand_text: If 'rand_attribute', randomize the color-property maps: If True, replace description with 52 | meaningless text. 53 | :param total_rand: replace description with meaningless text coming from a large dictionary 54 | :param instr_words: number of words in the instruction (only used for generating random text) 55 | :param args: 56 | :param kwargs: 57 | """ 58 | assert n_floor_colors <= len(COLOR_NAMES) 59 | assert len(enabled_properties) > 0 60 | assert max(enabled_properties) < len(PROPERTY_TO_IDX) 61 | assert min(enabled_properties) >= 0 62 | 63 | self.enabled_properties = enabled_properties 64 | self.n_floor_colors = n_floor_colors 65 | self.held_out_cp_pairs = held_out_cp_pairs 66 | self.desc = '' 67 | self.fixed_color_prop_map = fixed_color_prop_map 68 | if color_property_map is None: 69 | self.color_property_map = {} 70 | self.color_property_map_fixed = {} 71 | else: 72 | self.color_property_map = {} #color_property_map 73 | self.color_property_map_fixed = color_property_map.copy() 74 | self.fixed_color_prop_map = True 75 | self.held_description = held_description 76 | # Properties for tile effects. 77 | self.tile_time = 0 78 | self.last_color = None 79 | self.color_time = 0 80 | self.agent_prev_pos = None 81 | self.with_instruction = with_instruction 82 | self.rand_text = rand_text 83 | self.total_rand = total_rand 84 | self.instr_words = instr_words 85 | super().__init__(*args, **kwargs) 86 | 87 | def gen_mission(self): 88 | # TODO(lts) 89 | super().gen_mission() 90 | 91 | return 92 | 93 | def reset(self, **kwargs): 94 | 95 | self.tile_time = 0 96 | self.last_color = None 97 | self.color_time = 0 98 | self.agent_prev_pos = None 99 | 100 | # Rescramble floor property mappings. 101 | # TODO(lts): Hold some out for test. 102 | if len(self.color_property_map_fixed) > 0 and self.fixed_color_prop_map: 103 | # determine which colors are to be used 104 | self.color_property_map = {} 105 | color_keys = [c for c in self.color_property_map_fixed.keys()] 106 | inds = np.arange(len(color_keys)) 107 | self.np_random.shuffle(inds) 108 | assert len(inds) >= self.n_floor_colors 109 | inds = inds[:self.n_floor_colors] 110 | for i in inds: 111 | c = color_keys[i] 112 | self.color_property_map[c] = random.choice(self.color_property_map_fixed[c]) 113 | # for count, c in enumerate(self.color_property_map_fixed.keys()): 114 | # self.color_property_map[c] = random.choice(self.color_property_map_fixed[c]) 115 | # print ("color_maps", self.color_property_map, self.color_property_map_fixed) 116 | else: 117 | self.color_property_map = {} 118 | for i in range(self.n_floor_colors): 119 | # Random property per color. Can have duplicates. 120 | if self.held_out_cp_pairs is not None: 121 | if type(self.held_out_cp_pairs[0][0]) == str: 122 | c = COLOR_NAMES[i] 123 | else: 124 | c = i 125 | # print (c, self.held_out_cp_pairs) 126 | held_out_cs = [] 127 | for c, p in self.held_out_cp_pairs: 128 | if c == COLOR_NAMES[i]: 129 | held_out_cs.append(p) 130 | # held_out_cs = [c for c, p in self.held_out_cp_pairs] 131 | # print ("held_out_cs", held_out_cs) 132 | # if c in held_out_cs: 133 | enabled_properties = self.enabled_properties.copy() 134 | 135 | for c in held_out_cs: 136 | # print (enabled_properties, held_out_cs, c, held_out_cs.index(c)) 137 | # enabled_properties.pop(held_out_cs.index(c)) 138 | enabled_properties.remove(c) 139 | # print (enabled_properties) 140 | rand_property_idx = enabled_properties[self._rand_int( 141 | 0, len(enabled_properties))] 142 | 143 | if held_out_cs == []: 144 | rand_property_idx = self.enabled_properties[self._rand_int( 145 | 0, len(self.enabled_properties))] 146 | else: 147 | rand_property_idx = self.enabled_properties[self._rand_int( 148 | 0, len(self.enabled_properties))] 149 | self.color_property_map[COLOR_NAMES[i]] = IDX_TO_PROPERTY[rand_property_idx] 150 | # print ("color_map", self.color_property_map) 151 | 152 | # print(self.color_property_map) 153 | obs = super().reset() 154 | self.desc = '. ' 155 | if self.held_description == 0: 156 | items = list(self.color_property_map.items()) 157 | else: 158 | N = len(self.color_property_map) 159 | assert self.held_description <= N 160 | inc = N - self.held_description 161 | items = list(self.color_property_map.items()) 162 | random.shuffle(items) 163 | items = items[:inc] 164 | if not self.rand_text: 165 | for color, prop in items: 166 | self.desc += '%s floors are %s. ' % (color, prop) 167 | elif self.rand_text == "rand_attribute": 168 | props = list(PROPERTY_TO_IDX.keys()) 169 | for color, prop in items: 170 | self.desc += '%s floors are %s' % (self._rand_color(), props[self._rand_int(0, len(prop))]) 171 | else: 172 | # separate words by '-' 173 | # sentence length should be between 2 and 3 174 | # choose words from A, B, C and D 175 | if self.total_rand: 176 | lorem = TextLorem(srange=(self.instr_words, self.instr_words)) 177 | self.desc += lorem.sentence() 178 | lorem = TextLorem(srange=(4, 4)) 179 | else: 180 | lorem = TextLorem(srange=(self.instr_words, self.instr_words), 181 | words=['put', 'the', 'ball', 'in', 'lorem', 'ipsum', 'forty-two', 'sentence', 182 | 'length', 'agent', 'dir', 'gen', 'grid', 'word', 'description', 'choose', 183 | 'previous']) 184 | self.desc += lorem.sentence() 185 | lorem = TextLorem(srange=(4, 4), 186 | words=['put', 'the', 'ball', 'in', 'lorem', 'ipsum', 'forty-two', 'sentence', 187 | 'length', 'agent', 'dir', 'gen', 'grid', 'word', 'description', 'choose', 188 | 'previous']) 189 | 190 | for color, prop in items: 191 | self.desc += ' ' + lorem.sentence() 192 | 193 | if self.with_instruction: 194 | obs['mission'] += self.desc 195 | else: 196 | obs['mission'] = self.desc[2:] 197 | return obs 198 | 199 | def _gen_grid(self, width, height): 200 | super()._gen_grid(width, height) 201 | 202 | self.previous_direction = self.agent_dir 203 | # Randomly place some colored floor tiles. 204 | cmap = self.color_property_map 205 | 206 | contig_colors = [] 207 | level_0_cp = [(p, c) for c, p in cmap.items() if p in PROPERTY_ORDER[0]] 208 | contig_colors.extend(c for p, c in level_0_cp) 209 | n_color_0 = len(level_0_cp) 210 | # level 1 211 | level_1_cp = [(p, c) for c, p in cmap.items() if p in PROPERTY_ORDER[1]] 212 | contig_colors.extend(c for p, c in level_1_cp) 213 | n_color_1 = len(level_1_cp) 214 | # level 2 215 | level_2_cp = [(p, c) for c, p in cmap.items() if p in PROPERTY_ORDER[2]] 216 | n_color_2 = len(level_2_cp) 217 | 218 | for i in range(self.num_cols * self.num_rows * pow(self.room_size - 2, 2)): 219 | f = self._rand_float(0, 1) 220 | if f >= 1 - Spawn_rates[0] and n_color_0 > 0: 221 | c = self._rand_int(0, n_color_0) 222 | i = self._rand_int(0, self.num_cols) 223 | j = self._rand_int(0, self.num_rows) 224 | try: 225 | for _ in range(N_tries): 226 | obj, pose = self.place_in_room(i, j, Floor(level_0_cp[c][1])) 227 | room = self.get_room(i, j) 228 | offsets = [(-1, -1), (0, -1), (1, -1), (-1, 0), (1, 0), (-1, 1), (0, 1), (1, 1)] 229 | flag = 0 230 | for offset in offsets: 231 | n_pose = pose + offset 232 | tar = self.grid.get(*n_pose) 233 | if tar is not None: 234 | # check contiguous 235 | if tar.type is 'floor' and tar.color in contig_colors: 236 | flag += 1 237 | # check door 238 | elif tar.type is 'door': 239 | flag += 2 240 | if flag > 1: 241 | # revert 242 | self.grid.set(pose[0], pose[1], None) 243 | room.objs.pop(-1) 244 | else: 245 | # succesfully placed floor 246 | break 247 | except RecursionError: 248 | # print("room %d %d too full" % (i, j)) 249 | continue 250 | elif f >= 1 - sum(Spawn_rates[:2]) and n_color_1 > 0: 251 | c = self._rand_int(0, n_color_1) 252 | i = self._rand_int(0, self.num_cols) 253 | j = self._rand_int(0, self.num_rows) 254 | try: 255 | for _ in range(N_tries): 256 | obj, pose = self.place_in_room(i, j, Floor(level_1_cp[c][1])) 257 | room = self.get_room(i, j) 258 | offsets = [(-1, -1), (0, -1), (1, -1), (-1, 0), (1, 0), (-1, 1), (0, 1), (1, 1)] 259 | flag = 0 260 | for offset in offsets: 261 | n_pose = pose + offset 262 | tar = self.grid.get(*n_pose) 263 | if tar is not None: 264 | # check contiguous 265 | if tar.type is 'floor' and tar.color in contig_colors: 266 | flag += 1 267 | # check door 268 | elif tar.type is 'door': 269 | flag += 2 270 | if flag > 1: 271 | # revert 272 | self.grid.set(pose[0], pose[1], None) 273 | room.objs.pop(-1) 274 | else: 275 | # succesfully placed floor 276 | break 277 | except RecursionError: 278 | # print("room %d %d too full" % (i, j)) 279 | continue 280 | 281 | elif f >= 1 - sum(Spawn_rates[:3]) and n_color_2 > 0: 282 | c = self._rand_int(0, n_color_2) 283 | i = self._rand_int(0, self.num_cols) 284 | j = self._rand_int(0, self.num_rows) 285 | try: 286 | self.place_in_room(i, j, Floor(level_2_cp[c][1])) 287 | except RecursionError: 288 | # print("room %d %d too full" % (i, j)) 289 | continue 290 | 291 | def get_floor_color(self, i, j): 292 | o = self.grid.get(i, j) 293 | if o and o.type == 'floor': 294 | return o.color 295 | return None 296 | 297 | @property 298 | def down_pos(self): 299 | """ 300 | Get the position of the cell that is one cell below agent. 301 | """ 302 | 303 | return self.agent_pos + DIR_TO_VEC[1] 304 | 305 | def step(self, action): 306 | c = self.get_floor_color(*self.agent_pos) 307 | # Deal with different floor tiles. 308 | floor_property = None 309 | if c: 310 | floor_property = self.color_property_map[c] 311 | 312 | if floor_property == 'fliplr': 313 | if action == self.actions.left: 314 | action = self.actions.right 315 | elif action == self.actions.right: 316 | action = self.actions.left 317 | elif floor_property == 'flipud': 318 | if action == self.actions.forward: 319 | self.agent_dir = (self.agent_dir + 2) % 4 320 | elif floor_property == 'sticky': 321 | if self.tile_time < 2 and action == self.actions.forward: 322 | action = self.actions.done # Wait action. 323 | elif floor_property == "slippery": 324 | self.step_count -= 0.5 325 | elif floor_property == 'magic': 326 | if self.color_time > 0 and self.color_time % 2: 327 | down_cell = self.grid.get(*self.down_pos) 328 | # Move Agent down. 329 | if down_cell == None or down_cell.can_overlap(): 330 | self.agent_pos = self.down_pos 331 | # TODO(lts): Allow instructions to be finished via gravity. 332 | 333 | self.agent_prev_pos = self.agent_pos 334 | 335 | # Actually take action 336 | obs, reward, done, info = super().step(action) 337 | c = self.get_floor_color(*self.agent_pos) 338 | if c: 339 | floor_property = self.color_property_map[c] 340 | if floor_property == "trap": 341 | reward = 0 342 | done = True 343 | 344 | # Keep track of some internal variables. 345 | # Previous Location 346 | if not np.array_equal(self.agent_prev_pos, self.agent_pos): 347 | self.tile_time = 0 348 | else: 349 | self.tile_time += 1 350 | 351 | # Previous color 352 | if (self.last_color != self.agent_pos).all(): 353 | #self.last_color = self.get_floor_color(*self.agent_pos) 354 | self.last_color = self.agent_pos 355 | self.color_time = 0 356 | else: 357 | self.color_time += 1 358 | if self.with_instruction: 359 | obs['mission'] += self.desc 360 | else: 361 | obs['mission'] = self.desc[2:] 362 | 363 | return obs, reward, done, info 364 | 365 | 366 | # Goto Red ball Dynamic 367 | class Level_GoTo_RedBallDynamics_Train(DynamicsLevel, Level_GoToRedBallNoDists): 368 | def __init__(self, 369 | seed=None, 370 | with_instruction=True, 371 | ): 372 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 373 | held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction) 374 | Level_GoToRedBallNoDists.__init__(self, seed) 375 | 376 | 377 | class Level_GoTo_RedBallDynamics_TargetPairOnly(DynamicsLevel, Level_GoToRedBallNoDists): 378 | def __init__(self, 379 | seed=None, 380 | with_instruction=True, 381 | ): 382 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 383 | color_property_map={'green': ['trap'], 'blue': ['slippery']}, 384 | with_instruction=with_instruction) 385 | Level_GoToRedBallNoDists.__init__(self, seed) 386 | 387 | 388 | class Level_GoTo_RedBallDynamics_Test(DynamicsLevel, Level_GoToRedBallNoDists): 389 | def __init__(self, 390 | seed=None, 391 | with_instruction=True, 392 | ): 393 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, with_instruction=with_instruction) 394 | Level_GoToRedBallNoDists.__init__(self, seed) 395 | 396 | register_levels(__name__, {'Level_GoTo_RedBallDynamics_Train': Level_GoTo_RedBallDynamics_Train, 397 | 'Level_GoTo_RedBallDynamics_TargetPairOnly': Level_GoTo_RedBallDynamics_TargetPairOnly, 398 | 'Level_GoTo_RedBallDynamics_Test': Level_GoTo_RedBallDynamics_Test 399 | }) 400 | 401 | # Goto Red ball dynamic hard 402 | class Level_GoTo_RedBallDynamics_Hard_Train(DynamicsLevel, Level_GoToRedBallNoDists): 403 | def __init__(self, 404 | seed=None, 405 | with_instruction=True, 406 | ): 407 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 408 | held_out_cp_pairs=[('green', 0), ('green', 2), 409 | ('grey', 3), ('grey', 4), 410 | ('blue', 1), ('blue', 5)], with_instruction=with_instruction, ) 411 | Level_GoToRedBallNoDists.__init__(self, seed) 412 | 413 | 414 | class Level_GoTo_RedBallDynamics_Hard_TargetPairOnly(DynamicsLevel, Level_GoToRedBallNoDists): 415 | def __init__(self, 416 | seed=None, 417 | with_instruction=True, 418 | ): 419 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 420 | color_property_map={'green': ['trap', 'flipud'], 421 | 'grey': ['fliplr', 'slippery'], 422 | 'blue': ['sticky', 'magic']}, with_instruction=with_instruction, ) 423 | Level_GoToRedBallNoDists.__init__(self, seed) 424 | 425 | 426 | class Level_GoTo_RedBallDynamics_Hard_Test(DynamicsLevel, Level_GoToRedBallNoDists): 427 | def __init__(self, 428 | seed=None, 429 | with_instruction=True, 430 | ): 431 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 432 | with_instruction=with_instruction, ) 433 | Level_GoToRedBallNoDists.__init__(self, seed) 434 | 435 | class Level_GoTo_RedBallDynamics_Hard_Fixed(DynamicsLevel, Level_GoToRedBallNoDists): 436 | def __init__(self, 437 | seed=None, 438 | with_instruction=True, 439 | ): 440 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 441 | color_property_map={'green': ['slippery', ], 442 | 'grey': ['flipud'], 443 | 'blue': ['fliplr'], 444 | 'red': ['trap'], 445 | 'purple': ['magic'], 446 | 'yellow': ['sticky'], 447 | }, 448 | with_instruction=with_instruction, ) 449 | Level_GoToRedBallNoDists.__init__(self, seed) 450 | 451 | register_levels(__name__, {'Level_GoTo_RedBallDynamics_Hard_Train': Level_GoTo_RedBallDynamics_Hard_Train, 452 | 'Level_GoTo_RedBallDynamics_Hard_TargetPairOnly': Level_GoTo_RedBallDynamics_Hard_TargetPairOnly, 453 | 'Level_GoTo_RedBallDynamics_Hard_Test': Level_GoTo_RedBallDynamics_Hard_Test, 454 | 'Level_GoTo_RedBallDynamics_Hard_Fixed':Level_GoTo_RedBallDynamics_Hard_Fixed 455 | }) 456 | 457 | # Goto Redball dynamic maze 458 | class Level_GoToRedBall_Maze(RoomGridLevel): 459 | """ 460 | Go to the red ball, 3x3 rooms, without distractors. 461 | """ 462 | 463 | def __init__(self, seed=None): 464 | super().__init__( 465 | num_rows=3, 466 | num_cols=3, 467 | room_size=8, 468 | seed=seed 469 | ) 470 | 471 | def gen_mission(self): 472 | self.place_agent() 473 | self.connect_all() 474 | i = self.np_random.randint(3) 475 | j = self.np_random.randint(3) 476 | obj, _ = self.add_object(i, j, 'ball', 'red') 477 | 478 | # Make sure no unblocking is required 479 | self.check_objs_reachable() 480 | 481 | self.instrs = GoToInstr(ObjDesc(obj.type, obj.color)) 482 | 483 | 484 | class Level_GoTo_RedBallDynamics_Maze_Train(DynamicsLevel, Level_GoToRedBall_Maze): 485 | def __init__(self, 486 | seed=None, 487 | with_instruction=True, 488 | ): 489 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 490 | held_out_cp_pairs=[('green', 0), ('green', 2), 491 | ('grey', 3), ('grey', 4), 492 | ('blue', 1), ('blue', 5)], with_instruction=with_instruction, ) 493 | Level_GoToRedBall_Maze.__init__(self, seed) 494 | 495 | 496 | class Level_GoTo_RedBallDynamics_Maze_TargetPairOnly(DynamicsLevel, Level_GoToRedBall_Maze): 497 | def __init__(self, 498 | seed=None, 499 | with_instruction=True, 500 | ): 501 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 502 | color_property_map={'green': ['trap', 'flipud'], 503 | 'grey': ['fliplr', 'slippery'], 504 | 'blue': ['sticky', 'magic']}, with_instruction=with_instruction, ) 505 | Level_GoToRedBall_Maze.__init__(self, seed) 506 | 507 | 508 | class Level_GoTo_RedBallDynamics_Maze_Test(DynamicsLevel, Level_GoToRedBall_Maze): 509 | def __init__(self, 510 | seed=None, 511 | with_instruction=True, 512 | ): 513 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 514 | with_instruction=with_instruction, ) 515 | Level_GoToRedBall_Maze.__init__(self, seed) 516 | 517 | 518 | class Level_GoTo_RedBallDynamics_Maze_Fixed(DynamicsLevel, Level_GoToRedBall_Maze): 519 | def __init__(self, 520 | seed=None, 521 | with_instruction=True, 522 | ): 523 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 524 | color_property_map={'green': ['slippery', ], 525 | 'grey': ['flipud'], 526 | 'blue': ['fliplr'], 527 | 'red': ['trap'], 528 | 'purple': ['magic'], 529 | 'yellow': ['sticky'], 530 | }, 531 | with_instruction=with_instruction, ) 532 | Level_GoToRedBall_Maze.__init__(self, seed) 533 | 534 | register_levels(__name__, {'Level_GoTo_RedBallDynamics_Maze_Train': Level_GoTo_RedBallDynamics_Maze_Train, 535 | 'Level_GoTo_RedBallDynamics_Maze_TargetPairOnly': Level_GoTo_RedBallDynamics_Maze_TargetPairOnly, 536 | 'Level_GoTo_RedBallDynamics_Maze_Test': Level_GoTo_RedBallDynamics_Maze_Test, 537 | 'Level_GoTo_RedBallDynamics_Maze_Fixed':Level_GoTo_RedBallDynamics_Maze_Fixed 538 | }) 539 | 540 | # Put Next Local Dynamic 541 | class Level_PutNextLocalDynamics_Train(DynamicsLevel, Level_PutNextLocal): 542 | def __init__(self, seed=None, with_instruction=True): 543 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 544 | held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction) 545 | Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed) 546 | 547 | 548 | class Level_PutNextLocalDynamics_TargetPairOnly(DynamicsLevel, Level_PutNextLocal): 549 | def __init__(self, seed=None, with_instruction=True): 550 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 551 | color_property_map={'green': ['trap', ], 552 | 'blue': ['slippery', ]}, with_instruction=with_instruction) 553 | Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed) 554 | 555 | 556 | class Level_PutNextLocalDynamics_Test(DynamicsLevel, Level_PutNextLocal): 557 | def __init__(self, seed=None, with_instruction=True): 558 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, with_instruction=with_instruction) 559 | Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed) 560 | 561 | class Level_PutNextLocalDynamics_Fixed(DynamicsLevel, Level_PutNextLocal): 562 | def __init__(self, seed=None, with_instruction=True): 563 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 564 | color_property_map={'green': ['slippery', ], 565 | 'red': ['trap'], 566 | 'yellow': ['sticky'], 567 | }, 568 | with_instruction=with_instruction) 569 | Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed) 570 | 571 | register_levels(__name__, {'Level_PutNextLocalDynamics_Train': Level_PutNextLocalDynamics_Train, 572 | 'Level_PutNextLocalDynamics_TargetPairOnly': Level_PutNextLocalDynamics_TargetPairOnly, 573 | 'Level_PutNextLocalDynamics_Test': Level_PutNextLocalDynamics_Test, 574 | 'Level_PutNextLocalDynamics_Fixed': Level_PutNextLocalDynamics_Fixed 575 | }) 576 | 577 | # Put Next to Dynamic 578 | class Level_PutNextDynamics_Train(DynamicsLevel, Level_PutNext): 579 | def __init__(self, seed=None, with_instruction=True): 580 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 581 | held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction) 582 | Level_PutNext.__init__(self, room_size=8, seed=seed) 583 | 584 | 585 | class Level_PutNextDynamics_TargetPairOnly(DynamicsLevel, Level_PutNext): 586 | def __init__(self, seed=None, with_instruction=True): 587 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 588 | color_property_map={'green': ['trap', ], 589 | 'blue': ['slippery', ]}, with_instruction=with_instruction) 590 | Level_PutNext.__init__(self, room_size=8, seed=seed) 591 | 592 | 593 | class Level_PutNextDynamics_Test(DynamicsLevel, Level_PutNext): 594 | def __init__(self, seed=None, with_instruction=True): 595 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, with_instruction=with_instruction) 596 | Level_PutNext.__init__(self, room_size=8, seed=seed) 597 | 598 | register_levels(__name__, {'Level_PutNextDynamics_Train': Level_PutNextDynamics_Train, 599 | 'Level_PutNextDynamics_TargetPairOnly': Level_PutNextDynamics_TargetPairOnly, 600 | 'Level_PutNextDynamics_Test': Level_PutNextDynamics_Test, 601 | }) 602 | 603 | # Put Next to Dynamic Hard 604 | class Level_PutNextDynamics_Hard_Train(DynamicsLevel, Level_PutNext): 605 | def __init__(self, seed=None, with_instruction=True): 606 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 607 | held_out_cp_pairs=[('green', 0), ('green', 2), 608 | ('grey', 3), ('grey', 4), 609 | ('blue', 1), ('blue', 5)], with_instruction=with_instruction, ) 610 | Level_PutNext.__init__(self, room_size=8, seed=seed) 611 | 612 | 613 | class Level_PutNextDynamics_Hard_TargetPairOnly(DynamicsLevel, Level_PutNext): 614 | def __init__(self, seed=None, with_instruction=True): 615 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 616 | color_property_map={'green': ['trap', 'flipud'], 617 | 'grey': ['fliplr', 'slippery'], 618 | 'blue': ['sticky', 'magic']}, with_instruction=with_instruction,) 619 | Level_PutNext.__init__(self, room_size=8, seed=seed) 620 | 621 | 622 | class Level_PutNextDynamics_Hard_Test(DynamicsLevel, Level_PutNext): 623 | def __init__(self, seed=None, with_instruction=True): 624 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 625 | with_instruction=with_instruction, ) 626 | Level_PutNext.__init__(self, room_size=8, seed=seed) 627 | 628 | register_levels(__name__, {'Level_PutNextDynamics_Hard_Train': Level_PutNextDynamics_Hard_Train, 629 | 'Level_PutNextDynamics_Hard_TargetPairOnly': Level_PutNextDynamics_Hard_TargetPairOnly, 630 | 'Level_PutNextDynamics_Hard_Test': Level_PutNextDynamics_Hard_Test, 631 | }) 632 | 633 | # Goto Maze Dynamic 634 | class Level_GoToObjMaze_Dynamics_Train(DynamicsLevel, Level_GoTo): 635 | def __init__(self, 636 | seed=None, 637 | with_instruction=True 638 | ): 639 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 640 | held_out_cp_pairs=[('green', 0), ('green', 2), 641 | ('grey', 3), ('grey', 4), 642 | ('blue', 1), ('blue', 5)], with_instruction=with_instruction) 643 | Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=1, 644 | num_cols=3, doors_open=False, seed=seed) 645 | 646 | class Level_GoToObjMaze_Dynamics_TargetPairOnly(DynamicsLevel, Level_GoTo): 647 | def __init__(self, 648 | seed=None, 649 | with_instruction=True 650 | ): 651 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 652 | color_property_map={'green': ['trap', 'flipud'], 653 | 'grey': ['fliplr', 'slippery'], 654 | 'blue': ['sticky', 'magic']}, with_instruction=with_instruction) 655 | Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=1, 656 | num_cols=3, doors_open=False, seed=seed) 657 | 658 | class Level_GoToObjMaze_Dynamics_Test(DynamicsLevel, Level_GoTo): 659 | def __init__(self, 660 | seed=None, 661 | with_instruction=True 662 | ): 663 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 664 | with_instruction=with_instruction) 665 | Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=1, 666 | num_cols=3, doors_open=False, seed=seed) 667 | 668 | register_levels(__name__, {'Level_GoToObjMaze_Dynamics_Train': Level_GoToObjMaze_Dynamics_Train, 669 | 'Level_GoToObjMaze_Dynamics_TargetPairOnly': Level_GoToObjMaze_Dynamics_TargetPairOnly, 670 | 'Level_GoToObjMaze_Dynamics_Test': Level_GoToObjMaze_Dynamics_Test, 671 | }) 672 | 673 | 674 | # Goto local Dynamic 675 | class Level_GoToLocal_Dynamics_Train(DynamicsLevel, Level_GoToLocal): 676 | def __init__(self, 677 | seed=None, 678 | with_instruction=True 679 | ): 680 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 681 | held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction) 682 | Level_GoToLocal.__init__(self, room_size=11, num_dists=8, seed=seed) 683 | 684 | class Level_GoToLocal_Dynamics_TargetPairOnly(DynamicsLevel, Level_GoToLocal): 685 | def __init__(self, 686 | seed=None, 687 | with_instruction=True 688 | ): 689 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 690 | color_property_map={'green': ['trap', ], 691 | 'blue': ['slippery', ]}, with_instruction=with_instruction) 692 | Level_GoToLocal.__init__(self, room_size=11, num_dists=8, seed=seed) 693 | 694 | class Level_GoToLocal_Dynamics_Test(DynamicsLevel, Level_GoToLocal): 695 | def __init__(self, 696 | seed=None, 697 | with_instruction=True 698 | ): 699 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 700 | with_instruction=with_instruction) 701 | Level_GoToLocal.__init__(self, room_size=11, num_dists=8, seed=seed) 702 | 703 | register_levels(__name__, {'Level_GoToLocal_Dynamics_Train': Level_GoToLocal_Dynamics_Train, 704 | 'Level_GoToLocal_Dynamics_TargetPairOnly': Level_GoToLocal_Dynamics_TargetPairOnly, 705 | 'Level_GoToLocal_Dynamics_Test': Level_GoToLocal_Dynamics_Test, 706 | }) 707 | 708 | # Goto Dynamic 709 | class Level_GoTo_Dynamics_Train(DynamicsLevel, Level_GoTo): 710 | def __init__(self, 711 | seed=None, 712 | with_instruction=True 713 | ): 714 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 715 | held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction) 716 | Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11, 717 | num_cols=3, doors_open=False, seed=seed) 718 | 719 | class Level_GoTo_Dynamics_TargetPairOnly(DynamicsLevel, Level_GoTo): 720 | def __init__(self, 721 | seed=None, 722 | with_instruction=True 723 | ): 724 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 725 | color_property_map={'green': ['trap', ], 726 | 'blue': ['slippery', ]}, with_instruction=with_instruction) 727 | Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11, 728 | num_cols=3, doors_open=False, seed=seed) 729 | 730 | class Level_GoTo_Dynamics_Test(DynamicsLevel, Level_GoTo): 731 | def __init__(self, 732 | seed=None, 733 | with_instruction=True 734 | ): 735 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 736 | with_instruction=with_instruction) 737 | Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11, 738 | num_cols=3, doors_open=False, seed=seed) 739 | 740 | 741 | register_levels(__name__, {'Level_GoTo_Dynamics_Train': Level_GoTo_Dynamics_Train, 742 | 'Level_GoTo_Dynamics_TargetPairOnly': Level_GoTo_Dynamics_TargetPairOnly, 743 | 'Level_GoTo_Dynamics_Test': Level_GoTo_Dynamics_Test, 744 | }) 745 | 746 | 747 | # Goto Dynamic Hard 748 | class Level_GoTo_Dynamics_Hard_Train(DynamicsLevel, Level_GoTo): 749 | def __init__(self, 750 | seed=None, 751 | with_instruction=True 752 | ): 753 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 754 | held_out_cp_pairs=[('green', 0), ('green', 2), 755 | ('grey', 3), ('grey', 4), 756 | ('blue', 1), ('blue', 5)], with_instruction=with_instruction) 757 | Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11, 758 | num_cols=3, doors_open=False, seed=seed) 759 | 760 | class Level_GoTo_Dynamics_Hard_TargetPairOnly(DynamicsLevel, Level_GoTo): 761 | def __init__(self, 762 | seed=None, 763 | with_instruction=True 764 | ): 765 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 766 | color_property_map={'green': ['trap', 'flipud'], 767 | 'grey': ['fliplr', 'slippery'], 768 | 'blue': ['sticky', 'magic']}, with_instruction=with_instruction) 769 | Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11, 770 | num_cols=3, doors_open=False, seed=seed) 771 | 772 | class Level_GoTo_Dynamics_Hard_Test(DynamicsLevel, Level_GoTo): 773 | def __init__(self, 774 | seed=None, 775 | with_instruction=True 776 | ): 777 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 778 | with_instruction=with_instruction) 779 | Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11, 780 | num_cols=3, doors_open=False, seed=seed) 781 | 782 | class Level_GoTo_Dynamics_Hard_Fixed(DynamicsLevel, Level_GoTo): 783 | def __init__(self, 784 | seed=None, 785 | with_instruction=True 786 | ): 787 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 788 | color_property_map={'green': ['slippery', ], 789 | 'grey': ['flipud'], 790 | 'blue': ['fliplr'], 791 | 'red': ['trap'], 792 | 'purple': ['magic'], 793 | 'yellow': ['sticky'], 794 | }, 795 | with_instruction=with_instruction) 796 | Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11, 797 | num_cols=3, doors_open=False, seed=seed) 798 | 799 | register_levels(__name__, {'Level_GoTo_Dynamics_Hard_Train': Level_GoTo_Dynamics_Hard_Train, 800 | 'Level_GoTo_Dynamics_Hard_TargetPairOnly': Level_GoTo_Dynamics_Hard_TargetPairOnly, 801 | 'Level_GoTo_Dynamics_Hard_Test': Level_GoTo_Dynamics_Hard_Test, 802 | 'Level_GoTo_Dynamics_Hard_Fixed': Level_GoTo_Dynamics_Hard_Fixed 803 | }) 804 | 805 | # Unlock Dynamic 806 | class Level_Unlock_Dynamic_Train(DynamicsLevel, Level_Unlock): 807 | def __init__(self, 808 | seed=None, 809 | with_instruction=True 810 | ): 811 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 812 | held_out_cp_pairs=[('green', 0), ('green', 2), 813 | ('grey', 3), ('grey', 4), 814 | ('blue', 1), ('blue', 5)], with_instruction=with_instruction) 815 | Level_Unlock.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed) 816 | 817 | class Level_Unlock_Dynamic_TargetPairOnly(DynamicsLevel, Level_Unlock): 818 | def __init__(self, 819 | seed=None, 820 | with_instruction=True 821 | ): 822 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 823 | color_property_map={'green': ['trap', 'flipud'], 824 | 'grey': ['fliplr', 'slippery'], 825 | 'blue': ['sticky', 'magic']}, with_instruction=with_instruction) 826 | Level_Unlock.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed) 827 | 828 | class Level_Unlock_Dynamic_Test(DynamicsLevel, Level_Unlock): 829 | def __init__(self, 830 | seed=None, 831 | with_instruction=True 832 | ): 833 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 834 | with_instruction=with_instruction) 835 | Level_Unlock.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed) 836 | 837 | class Level_Unlock_Dynamic_Fixed(DynamicsLevel, Level_Unlock): 838 | def __init__(self, 839 | seed=None, 840 | with_instruction=True 841 | ): 842 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 843 | color_property_map={'green': ['slippery', ], 844 | 'grey': ['flipud'], 845 | 'blue': ['fliplr'], 846 | 'red': ['trap'], 847 | 'purple': ['magic'], 848 | 'yellow': ['sticky'], 849 | }, 850 | with_instruction=with_instruction) 851 | Level_Unlock.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed) 852 | 853 | register_levels(__name__, {'Level_Unlock_Dynamic_Train': Level_Unlock_Dynamic_Train, 854 | 'Level_Unlock_Dynamic_TargetPairOnly': Level_Unlock_Dynamic_TargetPairOnly, 855 | 'Level_Unlock_Dynamic_Test': Level_Unlock_Dynamic_Test, 856 | 'Level_Unlock_Dynamic_Fixed': Level_Unlock_Dynamic_Fixed 857 | }) 858 | 859 | 860 | # Pickup Location Dynamic 861 | class Level_PickupLoc_Dynamic_Train(DynamicsLevel, Level_PickupLoc): 862 | def __init__(self, 863 | seed=None 864 | ): 865 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 866 | held_out_cp_pairs=[('green', 0), ('blue', 4)]) 867 | Level_PickupLoc.__init__(self, seed=seed) 868 | 869 | 870 | class Level_PickupLoc_Dynamic_TargetPairOnly(DynamicsLevel, Level_PickupLoc): 871 | def __init__(self, 872 | seed=None 873 | ): 874 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 875 | color_property_map={'green': ['trap', ], 876 | 'blue': ['slippery', ]}) 877 | Level_PickupLoc.__init__(self, seed=seed) 878 | 879 | 880 | class Level_PickupLoc_Dynamic_Test(DynamicsLevel, Level_PickupLoc): 881 | def __init__(self, 882 | seed=None 883 | ): 884 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2) 885 | Level_PickupLoc.__init__(self, seed=seed) 886 | 887 | class Level_PickupLoc_Dynamic_Fixed(DynamicsLevel, Level_PickupLoc): 888 | def __init__(self, 889 | seed=None 890 | ): 891 | DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, 892 | color_property_map={'green': ['slippery', ], 893 | 'red': ['trap'], 894 | 'yellow': ['sticky'], 895 | }, 896 | ) 897 | Level_PickupLoc.__init__(self, seed=seed) 898 | 899 | register_levels(__name__, {'Level_PickupLoc_Dynamic_Train': Level_PickupLoc_Dynamic_Train, 900 | 'Level_PickupLoc_Dynamic_TargetPairOnly': Level_PickupLoc_Dynamic_TargetPairOnly, 901 | 'Level_PickupLoc_Dynamic_Test': Level_PickupLoc_Dynamic_Test, 902 | 'Level_PickupLoc_Dynamic_Fixed': Level_PickupLoc_Dynamic_Fixed 903 | }) 904 | 905 | 906 | # Goto Sequential Dynamic 907 | class Level_GotoSeq_Dynamic_Train(DynamicsLevel, Level_GoToSeq): 908 | def __init__(self, 909 | seed=None 910 | ): 911 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 912 | held_out_cp_pairs=[('green', 0), ('green', 2), 913 | ('grey', 3), ('grey', 4), 914 | ('blue', 1), ('blue', 5)]) 915 | Level_GoToSeq.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed) 916 | 917 | class Level_GotoSeq_Dynamic_TargetPairOnly(DynamicsLevel, Level_GoToSeq): 918 | def __init__(self, 919 | seed=None, 920 | ): 921 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 922 | color_property_map={'green': ['trap', 'flipud'], 923 | 'grey': ['fliplr', 'slippery'], 924 | 'blue': ['sticky', 'magic']}) 925 | Level_GoToSeq.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed) 926 | 927 | class Level_GotoSeq_Dynamic_Test(DynamicsLevel, Level_GoToSeq): 928 | def __init__(self, 929 | seed=None, 930 | ): 931 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3) 932 | Level_GoToSeq.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed) 933 | 934 | class Level_GotoSeq_Dynamic_Fixed(DynamicsLevel, Level_GoToSeq): 935 | def __init__(self, 936 | seed=None, 937 | ): 938 | DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3, 939 | color_property_map={'green': ['slippery', ], 940 | 'grey': ['flipud'], 941 | 'blue': ['fliplr'], 942 | 'red': ['trap'], 943 | 'purple': ['magic'], 944 | 'yellow': ['sticky'], 945 | } 946 | ) 947 | Level_GoToSeq.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed) 948 | 949 | register_levels(__name__, {'Level_GotoSeq_Dynamic_Train': Level_GotoSeq_Dynamic_Train, 950 | 'Level_GotoSeq_Dynamic_TargetPairOnly': Level_GotoSeq_Dynamic_TargetPairOnly, 951 | 'Level_GotoSeq_Dynamic_Test': Level_GotoSeq_Dynamic_Test, 952 | 'Level_GotoSeq_Dynamic_Fixed':Level_GotoSeq_Dynamic_Fixed 953 | }) 954 | 955 | --------------------------------------------------------------------------------