├── babyai_levels.png
├── babyaiPP
    ├── __init__.py
    ├── descriptive_levels.py
    ├── decriptive_level_base.py
    ├── additional_levels.py
    └── dynamics_levels.py
├── .gitmodules
├── scripts
    ├── train_att_fusion_agent_redball.sh
    ├── train_concat_fusion_agent_redball.sh
    ├── train_film_agent_redball.sh
    └── train_image_only_agent_redball.sh
├── experiment
    ├── bAIPlusgui.py
    ├── train_il.py
    ├── batch_evaluate.py
    ├── evaluate.py
    ├── arguments.py
    ├── make_agent_demos.py
    ├── train_rl.py
    ├── model.py
    └── imitation.py
├── LICENSE
├── .gitignore
└── README.md


/babyai_levels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caotians1/BabyAIPlusPlus/HEAD/babyai_levels.png


--------------------------------------------------------------------------------
/babyaiPP/__init__.py:
--------------------------------------------------------------------------------
1 | from . import descriptive_levels
2 | from . import dynamics_levels
3 | from . import decriptive_level_base
4 | from . import additional_levels


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "gym-minigrid"]
 2 | 	path = gym-minigrid
 3 | 	url = https://github.com/caotians1/gym-minigrid.git
 4 | 	branch = master
 5 | 
 6 | [submodule "babyai"]
 7 | 	path = babyai
 8 | 	url = https://github.com/caotians1/babyai.git
 9 | 	branch = master
10 | 


--------------------------------------------------------------------------------
/scripts/train_att_fusion_agent_redball.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | frames=5000000
3 | batch_size=12800
4 | procs=256
5 | log_interval=2
6 | env=BabyAI-GoTo_RedBallDynamics_Train-v0
7 | 
8 | python experiment/train_rl.py --env $env --arch fusion --tb --frames $frames --batch-size $batch_size --procs $procs --log-interval $log_interval
9 | 


--------------------------------------------------------------------------------
/scripts/train_concat_fusion_agent_redball.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | frames=5000000
3 | batch_size=12800
4 | procs=256
5 | log_interval=2
6 | env=BabyAI-GoTo_RedBallDynamics_Train-v0
7 | 
8 | python experiment/train_rl.py --env $env --arch cnn --tb --frames $frames --batch-size $batch_size --procs $procs --log-interval $log_interval
9 | 


--------------------------------------------------------------------------------
/scripts/train_film_agent_redball.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | frames=5000000
3 | batch_size=12800
4 | procs=256
5 | log_interval=2
6 | env=BabyAI-GoTo_RedBallDynamics_Train-v0
7 | 
8 | python experiment/train_rl.py --env $env --arch expert_filmcnn --tb --frames $frames --batch-size $batch_size --procs $procs --log-interval $log_interval
9 | 


--------------------------------------------------------------------------------
/scripts/train_image_only_agent_redball.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | frames=5000000
3 | batch_size=12800
4 | procs=256
5 | log_interval=2
6 | env=BabyAI-GoTo_RedBallDynamics_Train-v0
7 | 
8 | python experiment/train_rl.py --env $env --arch cnn --no-desc --tb --frames $frames --batch-size $batch_size --procs $procs --log-interval $log_interval
9 | 


--------------------------------------------------------------------------------
/experiment/bAIPlusgui.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Quick script to register babyAI++ levels before passing to babyAI gui.
 3 | """
 4 | 
 5 | import sys
 6 | import os
 7 | sys.path.append(os.getcwd())
 8 | sys.path.append(os.path.join(os.getcwd(),"babyai/scripts"))
 9 | import babyaiPP
10 | import gui
11 | 
12 | if __name__ == '__main__':
13 |     gui.main(sys.argv)
14 | 


--------------------------------------------------------------------------------
/babyaiPP/descriptive_levels.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | from babyai.levels.verifier import *
 3 | from babyai.levels.levelgen import register_levels
 4 | from babyai.levels.iclr19_levels import Level_GoTo
 5 | from .decriptive_level_base import DescriptiveLevel
 6 | from .dynamics_levels import DynamicsLevel
 7 | 
 8 | class Level_GoTo_Desc(DescriptiveLevel, Level_GoTo):
 9 |     def __init__(self,
10 |                  room_size=8,
11 |                  num_rows=3,
12 |                  num_cols=3,
13 |                  num_dists=18,
14 |                  doors_open=False,
15 |                  seed=None
16 |                  ):
17 | 
18 |         DescriptiveLevel.__init__(self, 1, 1.0)
19 |         Level_GoTo.__init__(self, room_size, num_rows, num_cols, num_dists, doors_open, seed)
20 | 
21 | class Level_GoTo_DescDynamic(DescriptiveLevel, DynamicsLevel, Level_GoTo):
22 |     def __init__(self,
23 |                  room_size=8,
24 |                  num_rows=3,
25 |                  num_cols=3,
26 |                  num_dists=18,
27 |                  doors_open=False,
28 |                  seed=None):
29 |         DynamicsLevel.__init__(self, [1, 3, 5], 3, seed, 1, 1.0, room_size, num_rows, num_cols, num_dists, doors_open, seed)
30 | 
31 | 
32 | register_levels(__name__, {'Level_GoTo_Desc':Level_GoTo_Desc, 'Level_GoTo_DescDynamic':Level_GoTo_DescDynamic})
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, caotians1
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | logs/
  2 | models/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 97 | __pypackages__/
 98 | 
 99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 | 
103 | # SageMath parsed files
104 | *.sage.py
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 | 
130 | # Pyre type checker
131 | .pyre/
132 | .idea/
133 | .idea/modules.xml
134 | .idea/workspace.xml
135 | 


--------------------------------------------------------------------------------
/experiment/train_il.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Script to train agent through imitation learning using demonstrations.
 5 | """
 6 | 
 7 | import os
 8 | import sys	
 9 | sys.path.append(os.getcwd())
10 | 
11 | import csv
12 | import copy
13 | import gym
14 | import time
15 | import datetime
16 | import numpy as np
17 | import sys
18 | import logging
19 | import torch
20 | import babyaiPP
21 | import babyai.utils as utils
22 | 
23 | from experiment.arguments import ArgumentParser
24 | from experiment.imitation import ImitationLearning
25 | 
26 | # Parse arguments
27 | parser = ArgumentParser()
28 | parser.add_argument("--demos", default=None,
29 |                     help="demos filename (REQUIRED or demos-origin or multi-demos required)")
30 | parser.add_argument("--demos-origin", required=False,
31 |                     help="origin of the demonstrations: human | agent (REQUIRED or demos or multi-demos required)")
32 | parser.add_argument("--episodes", type=int, default=0,
33 |                     help="number of episodes of demonstrations to use"
34 |                          "(default: 0, meaning all demos)")
35 | parser.add_argument("--multi-env", nargs='*', default=None,
36 |                   help="name of the environments used for validation/model loading")
37 | parser.add_argument("--multi-demos", nargs='*', default=None,
38 |                     help="demos filenames for envs to train on (REQUIRED when multi-env is specified)")
39 | parser.add_argument("--multi-episodes", type=int, nargs='*', default=None,
40 |                     help="number of episodes of demos to use from each file (REQUIRED when multi-env is specified)")
41 | parser.add_argument("--save-interval", type=int, default=1,
42 |                     help="number of epochs between two saves (default: 1, 0 means no saving)")
43 | 
44 | 
45 | def main(args):
46 |     # Verify the arguments when we train on multiple environments
47 |     # No need to check for the length of len(args.multi_env) in case, for some reason, we need to validate on other envs
48 |     if args.multi_env is not None:
49 |         assert len(args.multi_demos) == len(args.multi_episodes)
50 | 
51 |     args.model = args.model or ImitationLearning.default_model_name(args)
52 |     utils.configure_logging(args.model)
53 |     logger = logging.getLogger(__name__)
54 | 
55 |     print (args)
56 |     il_learn = ImitationLearning(args)
57 | 
58 |     # Define logger and Tensorboard writer
59 |     header = (["update", "frames", "FPS", "duration", "entropy", "policy_loss", "train_accuracy"]
60 |               + ["validation_accuracy"])
61 |     if args.multi_env is None:
62 |         header.extend(["validation_return", "validation_success_rate"])
63 |     else:
64 |         header.extend(["validation_return_{}".format(env) for env in args.multi_env])
65 |         header.extend(["validation_success_rate_{}".format(env) for env in args.multi_env])
66 |     writer = None
67 |     if args.tb:
68 |         from tensorboardX import SummaryWriter
69 |         writer = SummaryWriter(utils.get_log_dir(args.model))
70 | 
71 |     # Define csv writer
72 |     csv_writer = None
73 |     csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
74 |     first_created = not os.path.exists(csv_path)
75 |     # we don't buffer data going in the csv log, cause we assume
76 |     # that one update will take much longer that one write to the log
77 |     csv_writer = csv.writer(open(csv_path, 'a', 1))
78 |     if first_created:
79 |         csv_writer.writerow(header)
80 | 
81 |     # Get the status path
82 |     status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
83 | 
84 |     # Log command, availability of CUDA, and model
85 |     logger.info(args)
86 |     logger.info("CUDA available: {}".format(torch.cuda.is_available()))
87 |     logger.info(il_learn.acmodel)
88 | 
89 |     il_learn.train(il_learn.train_demos, writer, csv_writer, status_path, header)
90 | 
91 | 
92 | if __name__ == "__main__":
93 |     args = parser.parse_args()
94 |     main(args)


--------------------------------------------------------------------------------
/experiment/batch_evaluate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Evaluate a trained model or bot
  5 | """
  6 | 
  7 | import os
  8 | import sys
  9 | sys.path.append(os.getcwd())
 10 | 
 11 | import argparse
 12 | import gym
 13 | import time
 14 | import datetime
 15 | import math
 16 | import babyaiPP
 17 | import babyai.utils as utils
 18 | from babyai.evaluate import evaluate_demo_agent, batch_evaluate, evaluate
 19 | # Parse arguments
 20 | 
 21 | parser = argparse.ArgumentParser()
 22 | parser.add_argument("--exp_name", required=True, type=str)
 23 | parser.add_argument("--env", required=True,
 24 |                     help="name of the environment to be run (REQUIRED)")
 25 | parser.add_argument("--model", default=None,
 26 |                     help="name of the trained model (REQUIRED or --demos-origin or --demos REQUIRED)")
 27 | parser.add_argument("--episodes", type=int, default=1000,
 28 |                     help="number of episodes of evaluation (default: 1000)")
 29 | parser.add_argument("--seed", type=int, default=int(1e9),
 30 |                     help="random seed")
 31 | parser.add_argument("--argmax", action="store_true", default=False,
 32 |                     help="action with highest probability is selected for model agent")
 33 | 
 34 | def main_train(args, seed, episodes):
 35 |     # Set seed for all randomness sources
 36 |     utils.seed(seed)
 37 | 
 38 |     # Define agent
 39 |     # do train environment
 40 |     env_name = args.env + "_Train-v0"
 41 |     env = gym.make(env_name)
 42 |     env.seed(seed)
 43 |     agent = utils.load_agent(env, args.model, argmax=args.argmax, env_name=env_name)
 44 |     if args.model is None and args.episodes > len(agent.demos):
 45 |         # Set the number of episodes to be the number of demos
 46 |         episodes = len(agent.demos)
 47 | 
 48 |     # Evaluate
 49 |     if isinstance(agent, utils.DemoAgent):
 50 |         logs = evaluate_demo_agent(agent, episodes)
 51 |     elif isinstance(agent, utils.BotAgent):
 52 |         logs = evaluate(agent, env, episodes, False)
 53 |     else:
 54 |         logs = batch_evaluate(agent, env_name, seed, episodes)
 55 | 
 56 |     return logs
 57 | 
 58 | 
 59 | def main_test(args, seed, episodes):
 60 |     # Set seed for all randomness sources
 61 |     utils.seed(seed)
 62 | 
 63 |     # Define agent
 64 |     # do test environment
 65 |     env_name = args.env + "_Test-v0"
 66 |     env = gym.make(env_name)
 67 |     env.seed(seed)
 68 |     agent = utils.load_agent(env, args.model, argmax = args.argmax, env_name=env_name)
 69 |     if args.model is None and args.episodes > len(agent.demos):
 70 |         # Set the number of episodes to be the number of demos
 71 |         episodes = len(agent.demos)
 72 | 
 73 |     # Evaluate
 74 |     if isinstance(agent, utils.DemoAgent):
 75 |         logs = evaluate_demo_agent(agent, episodes)
 76 |     elif isinstance(agent, utils.BotAgent):
 77 |         logs = evaluate(agent, env, episodes, False)
 78 |     else:
 79 |         logs = batch_evaluate(agent, env_name, seed, episodes)
 80 | 
 81 |     return logs
 82 | 
 83 | if __name__ == "__main__":
 84 |     args = parser.parse_args()
 85 | 
 86 |     start_time = time.time()
 87 |     logs = main_train(args, args.seed, args.episodes)
 88 |     logs_ts = main_test(args, args.seed, args.episodes)
 89 |     end_time = time.time()
 90 | 
 91 |     # Print logs
 92 |     return_per_episode_tr = utils.synthesize(logs["return_per_episode"])
 93 |     success_per_episode_tr = utils.synthesize(
 94 |         [1 if r > 0 else 0 for r in logs["return_per_episode"]])
 95 | 
 96 |     num_frames_per_episode_tr = utils.synthesize(logs["num_frames_per_episode"])
 97 |     succ_se_tr = math.sqrt(success_per_episode_tr['mean'] * (1 - success_per_episode_tr['mean']) / args.episodes)
 98 |     R_se_tr = return_per_episode_tr['std']/math.sqrt(args.episodes)
 99 |     N_se_tr = num_frames_per_episode_tr['std']/math.sqrt(args.episodes)
100 | 
101 |     return_per_episode_ts = utils.synthesize(logs_ts["return_per_episode"])
102 |     success_per_episode_ts = utils.synthesize(
103 |         [1 if r > 0 else 0 for r in logs_ts["return_per_episode"]])
104 | 
105 |     num_frames_per_episode_ts = utils.synthesize(logs_ts["num_frames_per_episode"])
106 |     succ_se_ts = math.sqrt(success_per_episode_ts['mean'] * (1 - success_per_episode_ts['mean']) / args.episodes)
107 |     R_se_ts = return_per_episode_ts['std'] / math.sqrt(args.episodes)
108 |     N_se_ts = num_frames_per_episode_ts['std'] / math.sqrt(args.episodes)
109 | 
110 |     print(
111 |         "{} & ${:.3f}\pm{:.3f}$ & ${:.3f}\pm{:.3f}$ & ${:.3f}\pm{:.3f}$ & ${:.3f}\pm{:.3f}$ & ${:.3f}\pm{:.3f}$ & ${:.3f}\pm{:.3f}$ \\\\"
112 |         .format(args.exp_name, success_per_episode_tr['mean'], succ_se_tr, return_per_episode_tr['mean'], R_se_tr,
113 |                 num_frames_per_episode_tr['mean'], N_se_tr,
114 |                 success_per_episode_ts['mean'], succ_se_ts, return_per_episode_ts['mean'], R_se_ts,
115 |                 num_frames_per_episode_ts['mean'], N_se_ts))
116 | 


--------------------------------------------------------------------------------
/experiment/evaluate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Evaluate a trained model or bot
  5 | """
  6 | 
  7 | import os
  8 | import sys
  9 | sys.path.append(os.getcwd())
 10 | 
 11 | import argparse
 12 | import gym
 13 | import time
 14 | import datetime
 15 | import math
 16 | import babyaiPP
 17 | import babyai.utils as utils
 18 | from babyai.evaluate import evaluate_demo_agent, batch_evaluate, evaluate
 19 | # Parse arguments
 20 | 
 21 | parser = argparse.ArgumentParser()
 22 | parser.add_argument("--env", required=True,
 23 |                     help="name of the environment to be run (REQUIRED)")
 24 | parser.add_argument("--model", default=None,
 25 |                     help="name of the trained model (REQUIRED or --demos-origin or --demos REQUIRED)")
 26 | parser.add_argument("--demos-origin", default=None,
 27 |                     help="origin of the demonstrations: human | agent (REQUIRED or --model or --demos REQUIRED)")
 28 | parser.add_argument("--demos", default=None,
 29 |                     help="name of the demos file (REQUIRED or --demos-origin or --model REQUIRED)")
 30 | parser.add_argument("--episodes", type=int, default=1000,
 31 |                     help="number of episodes of evaluation (default: 1000)")
 32 | parser.add_argument("--seed", type=int, default=int(1e9),
 33 |                     help="random seed")
 34 | parser.add_argument("--argmax", action="store_true", default=False,
 35 |                     help="action with highest probability is selected for model agent")
 36 | parser.add_argument("--contiguous-episodes", action="store_true", default=False,
 37 |                     help="Make sure episodes on which evaluation is done are contiguous")
 38 | parser.add_argument("--worst-episodes-to-show", type=int, default=10,
 39 |                     help="The number of worse episodes to show")
 40 | 
 41 | 
 42 | def main(args, seed, episodes):
 43 |     # Set seed for all randomness sources
 44 |     utils.seed(seed)
 45 | 
 46 |     # Define agent
 47 | 
 48 |     env = gym.make(args.env)
 49 |     env.seed(seed)
 50 |     agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, args.env)
 51 |     if args.model is None and args.episodes > len(agent.demos):
 52 |         # Set the number of episodes to be the number of demos
 53 |         episodes = len(agent.demos)
 54 | 
 55 |     # Evaluate
 56 |     if isinstance(agent, utils.DemoAgent):
 57 |         logs = evaluate_demo_agent(agent, episodes)
 58 |     elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes:
 59 |         logs = evaluate(agent, env, episodes, False)
 60 |     else:
 61 |         logs = batch_evaluate(agent, args.env, seed, episodes)
 62 | 
 63 | 
 64 |     return logs
 65 | 
 66 | 
 67 | if __name__ == "__main__":
 68 |     args = parser.parse_args()
 69 |     assert_text = "ONE of --model or --demos-origin or --demos must be specified."
 70 |     assert int(args.model is None) + int(args.demos_origin is None) + int(args.demos is None) == 2, assert_text
 71 | 
 72 |     start_time = time.time()
 73 |     logs = main(args, args.seed, args.episodes)
 74 |     end_time = time.time()
 75 | 
 76 |     # Print logs
 77 |     num_frames = sum(logs["num_frames_per_episode"])
 78 |     fps = num_frames/(end_time - start_time)
 79 |     ellapsed_time = int(end_time - start_time)
 80 |     duration = datetime.timedelta(seconds=ellapsed_time)
 81 | 
 82 |     if args.model is not None:
 83 |         return_per_episode = utils.synthesize(logs["return_per_episode"])
 84 |         success_per_episode = utils.synthesize(
 85 |             [1 if r > 0 else 0 for r in logs["return_per_episode"]])
 86 | 
 87 |     num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])
 88 | 
 89 |     if args.model is not None:
 90 |         print("F {} | FPS {:.0f} | D {} | R:xsmM {:.3f} {:.3f} {:.3f} {:.3f} | S {:.3f} {:.4f} | F:xsmM {:.1f} {:.1f} {} {}"
 91 |               .format(num_frames, fps, duration,
 92 |                       *return_per_episode.values(),
 93 |                       success_per_episode['mean'],
 94 |                       math.sqrt(success_per_episode['mean'] * (1 - success_per_episode['mean']) / args.episodes),
 95 |                       *num_frames_per_episode.values()))
 96 |     else:
 97 |         print("F {} | FPS {:.0f} | D {} | F:xsmM {:.1f} {:.1f} {} {}"
 98 |               .format(num_frames, fps, duration, *num_frames_per_episode.values()))
 99 | 
100 | 
101 |     indexes = sorted(range(len(logs["num_frames_per_episode"])), key=lambda k: - logs["num_frames_per_episode"][k])
102 | 
103 |     n = args.worst_episodes_to_show
104 |     if n > 0:
105 |         print("{} worst episodes:".format(n))
106 |         for i in indexes[:n]:
107 |             if 'seed_per_episode' in logs:
108 |                 print(logs['seed_per_episode'][i])
109 |             if args.model is not None:
110 |                 print("- episode {}: R={}, F={}".format(i, logs["return_per_episode"][i], logs["num_frames_per_episode"][i]))
111 |             else:
112 |                 print("- episode {}: F={}".format(i, logs["num_frames_per_episode"][i]))
113 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BabyAI++
 2 | This is the implementation of [BabyAI++ : Towards Grounded-Language Learning beyond Memorization](https://arxiv.org/pdf/2004.07200.pdf), as described in the following ICLR2020 [BeTR-RL](http://www.betr-rl.ml/2020/) workshop paper. 
 3 | 
 4 | ```
 5 | @inproceedings{cao2020babiai++,
 6 |   title={BabyAI++ : Towards Grounded-Language Learning beyond Memorization},
 7 |   author={Cao, Tianshi and Wang, Jingkang and Zhang, Yining and Manivasagam, Sivabalan},
 8 |   booktitle={ICLR},
 9 |   year={2020}
10 | }
11 | ```
12 | 
13 | ## Introduction
14 | Although recent works have shown the benefits of instructive texts in goal-conditioned RL, few have studied whether descriptive texts help agents to generalize across dynamic environments. To promote research in this direction, we introduce a new platform BabyAI++, to generate various dynamic environments along with corresponding descriptive texts (see following Table). Experiments on BabyAI++ show strong evidence that using descriptive texts improves the generalization of RL agents across environments with varied dynamics.
15 | 
16 | | <img width=290/>Environments                   | Instructive Text   | Descriptive Text   | State Manipulation | Variable Dynamics  | Procedural Envs    | Multi-task         |
17 | |----------------------------------|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:|
18 | | [Kolve et al. (2017)](https://arxiv.org/abs/1712.05474)              | :x:                | :x:                | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :x:                |
19 | | [Narasimhan et al. (2017)](https://arxiv.org/abs/1708.00133)         | :heavy_check_mark: | :heavy_check_mark: | :x:                | :x:                | :heavy_check_mark: | :x:                |
20 | | [Wu et al. (2018)](https://arxiv.org/abs/1801.02209)                 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :x:                | :x:                | :x:                |
21 | | [Chaplot et al. (2018)](https://arxiv.org/abs/1706.07230)            | :heavy_check_mark: | :x:                | :x:                | :x:                | :heavy_check_mark: | :x:                |
22 | | [Chevalier-Boisvert et al. (2019)](https://arxiv.org/abs/1810.08272) | :heavy_check_mark: | :x:                | :heavy_check_mark: | :x:                | :heavy_check_mark: | :heavy_check_mark: |
23 | | __BabyAI++ (Ours, 2020)__        | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
24 | 
25 | 
26 | ## Getting started
27 | First, clone this repository to your local device recursively:
28 | ```bash
29 | git clone --recursive https://github.com/caotians1/BabyAIPlusPlus.git
30 | ```
31 | Install the prerequisites (Anaconda):
32 | ```bash
33 | conda install pytorch=1.2.0 torchvision -c pytorch
34 | conda install pyqt
35 | pip install lorem tensorboardX blosc gym
36 | ```
37 | Install `gym-minigrid` and `babyai`:
38 | ```bash
39 | cd gym-minigrid
40 | pip install --editable .
41 | cd ../babyai
42 | pip install --no-deps --editable .
43 | ```
44 | 
45 | ## Using BabyAI++
46 | Play a level in BabyAI++:
47 | ```
48 | python experiment/bAIPlusgui.py --env="BabyAI-GoTo_Dynamics_Train-v0"
49 | ```
50 | To train a model in BabyAI++, see `scripts/train_film_agent_redball.sh` and `experiment/train_rl.py`. 
51 | 
52 | ## BabyAI++ Levels
53 | Please refer to [`babyaiPP/dynamics_levels.py`](https://github.com/caotians1/BabyAIPlusPlus/blob/master/babyaiPP/dynamics_levels.py) and [`babyaiPP/additional_levels.py`](https://github.com/caotians1/BabyAIPlusPlus/blob/master/babyaiPP/additional_levels.py) for the definition of supported levels. The following table lists the available environments of BabyAI++ currently.
54 | 
55 | ![babyai_levels](https://github.com/caotians1/BabyAIPlusPlus/blob/master/babyai_levels.png )
56 | 
57 | 
58 | ## Customize BabyAI++ Levels
59 | You could also define your own environments with descriptive texts and varying dynamics. Here is an example for creating `PutNextLocalDynamics_Medium` Level:
60 | ```
61 | # define dynamics setting
62 | class Level_PutNextDynamics_Medium_Train(DynamicsLevel, Level_PutNext):
63 |     def __init__(self, seed=None, with_instruction=True):
64 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=2,
65 |                                held_out_cp_pairs=[('green', 0), ('green', 2), ('green', 4),
66 |                                                   ('blue', 1), ('blue', 3), ('blue', 5)],
67 |                                with_instruction=with_instruction)
68 |         Level_PutNext.__init__(self, room_size=11, seed=seed)
69 | 
70 | class Level_PutNextDynamics_Medium_Test(DynamicsLevel, Level_PutNext):
71 |     def __init__(self, seed=None, with_instruction=True):
72 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=2,
73 |                                with_instruction=with_instruction)
74 |         Level_PutNext.__init__(self, room_size=11, seed=seed)
75 |         
76 | # register your environment
77 | register_levels(__name__, {'Level_PutNextDynamics_Medium_Train': Level_PutNextDynamics_Medium_Train})
78 | ```
79 | Note that you could augument any levels supported in [BabyAI platform](https://github.com/mila-iqia/babyai) with varying dynamics and descriptive texts by inheriting `DynamicsLevel`.
80 | 
81 | ## Questions/Bugs
82 | Please submit a Github issue or contact jcao@cs.toronto.edu or wangjk@cs.toronto.edu if you have any questions or find any bugs. Contributions to this repository (e.g., pull requests for other baselines) are also well welcomed.
83 | 


--------------------------------------------------------------------------------
/experiment/arguments.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Common arguments for BabyAI training scripts
  3 | """
  4 | 
  5 | import os
  6 | import argparse
  7 | import numpy as np
  8 | 
  9 | 
 10 | class ArgumentParser(argparse.ArgumentParser):
 11 | 
 12 |     def __init__(self):
 13 |         super().__init__()
 14 | 
 15 |         # Base arguments
 16 |         self.add_argument("--env", default=None,
 17 |                             help="name of the environment to train on (REQUIRED)")
 18 |         self.add_argument("--model", default=None,
 19 |                             help="name of the model (default: ENV_ALGO_TIME)")
 20 |         self.add_argument("--pretrained-model", default=None,
 21 |                             help='If you\'re using a pre-trained model and want the fine-tuned one to have a new name')
 22 |         self.add_argument("--seed", type=int, default=1,
 23 |                             help="random seed; if 0, a random random seed will be used  (default: 1)")
 24 |         self.add_argument("--task-id-seed", action='store_true',
 25 |                             help="use the task id within a Slurm job array as the seed")
 26 |         self.add_argument("--procs", type=int, default=64,
 27 |                             help="number of processes (default: 64)")
 28 |         self.add_argument("--tb", action="store_true", default=False,
 29 |                             help="log into Tensorboard")
 30 | 
 31 |         # Training arguments
 32 |         self.add_argument("--log-interval", type=int, default=10,
 33 |                             help="number of updates between two logs (default: 10)")
 34 |         self.add_argument("--frames", type=int, default=int(9e10),
 35 |                             help="number of frames of training (default: 9e10)")
 36 |         self.add_argument("--patience", type=int, default=100,
 37 |                             help="patience for early stopping (default: 100)")
 38 |         self.add_argument("--epochs", type=int, default=40,
 39 |                             help="maximum number of epochs")
 40 |         self.add_argument("--epoch-length", type=int, default=0,
 41 |                             help="number of examples per epoch; the whole dataset is used by if 0")
 42 |         self.add_argument("--frames-per-proc", type=int, default=40,
 43 |                             help="number of frames per process before update (default: 40)")
 44 |         self.add_argument("--lr", type=float, default=1e-4,
 45 |                             help="learning rate (default: 1e-4)")
 46 |         self.add_argument("--beta1", type=float, default=0.9,
 47 |                             help="beta1 for Adam (default: 0.9)")
 48 |         self.add_argument("--beta2", type=float, default=0.999,
 49 |                             help="beta2 for Adam (default: 0.999)")
 50 |         self.add_argument("--recurrence", type=int, default=20,
 51 |                             help="number of timesteps gradient is backpropagated (default: 20)")
 52 |         self.add_argument("--optim-eps", type=float, default=1e-5,
 53 |                             help="Adam and RMSprop optimizer epsilon (default: 1e-5)")
 54 |         self.add_argument("--optim-alpha", type=float, default=0.99,
 55 |                             help="RMSprop optimizer apha (default: 0.99)")
 56 |         self.add_argument("--batch-size", type=int, default=1280,
 57 |                                 help="batch size for PPO (default: 1280)")
 58 |         self.add_argument("--entropy-coef", type=float, default=0.01,
 59 |                             help="entropy term coefficient (default: 0.01)")
 60 | 
 61 |         # Model parameters
 62 |         self.add_argument("--image-dim", type=int, default=128,
 63 |                             help="dimensionality of the image embedding")
 64 |         self.add_argument("--memory-dim", type=int, default=128,
 65 |                             help="dimensionality of the memory LSTM")
 66 |         self.add_argument("--instr-dim", type=int, default=128,
 67 |                             help="dimensionality of the memory LSTM")
 68 |         self.add_argument("--no-desc", action="store_true", default=False,
 69 |                             help="don't use descriptive texts in the model")
 70 |         self.add_argument("--instr-arch", default="gru",
 71 |                             help="arch to encode instructions, possible values: gru, bigru, conv, attgru (default: gru)")
 72 |         self.add_argument("--no-mem", action="store_true", default=False,
 73 |                             help="don't use memory in the model")
 74 |         self.add_argument("--arch", default='expert_filmcnn',
 75 |                             help="image embedding architecture")
 76 |         self.add_argument("--random-shuffle", action="store_true", default=False,
 77 |                             help='random shuffled texts')
 78 |         self.add_argument("--enable-instr", action="store_true", default=False,
 79 |                             help='enalbe instructional texts')
 80 |         self.add_argument("--instr-only", action="store_true", default=False,
 81 |                             help='use instructional texts only')
 82 | 
 83 |         # Validation parameters
 84 |         self.add_argument("--val-seed", type=int, default=int(1e9),
 85 |                             help="seed for environment used for validation (default: 1e9)")
 86 |         self.add_argument("--val-interval", type=int, default=1,
 87 |                             help="number of epochs between two validation checks (default: 1)")
 88 |         self.add_argument("--val-episodes", type=int, default=500,
 89 |                             help="number of episodes used to evaluate the agent, and to evaluate validation accuracy")
 90 | 
 91 |     def parse_args(self):
 92 |         """
 93 |         Parse the arguments and perform some basic validation
 94 |         """
 95 | 
 96 |         args = super().parse_args()
 97 | 
 98 |         # Set seed for all randomness sources
 99 |         if args.seed == 0:
100 |             args.seed = np.random.randint(10000)
101 |         if args.task_id_seed:
102 |             args.seed = int(os.environ['SLURM_ARRAY_TASK_ID'])
103 |             print('set seed to {}'.format(args.seed))
104 | 
105 |         # TODO: more validation
106 | 
107 |         return args
108 | 


--------------------------------------------------------------------------------
/experiment/make_agent_demos.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Generate a set of agent demonstrations.
  5 | 
  6 | The agent can either be a trained model or the heuristic expert (bot).
  7 | 
  8 | Demonstration generation can take a long time, but it can be parallelized
  9 | if you have a cluster at your disposal. Provide a script that launches
 10 | make_agent_demos.py at your cluster as --job-script and the number of jobs as --jobs.
 11 | 
 12 | 
 13 | """
 14 | 
 15 | import argparse
 16 | import gym
 17 | import logging
 18 | import sys, os
 19 | sys.path.append(os.getcwd())
 20 | 
 21 | import subprocess
 22 | import os
 23 | import time
 24 | import numpy as np
 25 | import blosc
 26 | import torch
 27 | 
 28 | import babyaiPP
 29 | import babyai.utils as utils
 30 | 
 31 | # Parse arguments
 32 | 
 33 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 34 | parser.add_argument("--env", required=True,
 35 |                     help="name of the environment to be run (REQUIRED)")
 36 | parser.add_argument("--model", default='BOT',
 37 |                     help="name of the trained model (REQUIRED)")
 38 | parser.add_argument("--demos", default=None,
 39 |                     help="path to save demonstrations (based on --model and --origin by default)")
 40 | parser.add_argument("--episodes", type=int, default=1000000,
 41 |                     help="number of episodes to generate demonstrations for")
 42 | parser.add_argument("--valid-episodes", type=int, default=512,
 43 |                     help="number of validation episodes to generate demonstrations for")
 44 | parser.add_argument("--seed", type=int, default=0,
 45 |                     help="start random seed")
 46 | parser.add_argument("--argmax", action="store_true", default=False,
 47 |                     help="action with highest probability is selected")
 48 | parser.add_argument("--log-interval", type=int, default=100,
 49 |                     help="interval between progress reports")
 50 | parser.add_argument("--save-interval", type=int, default=10000,
 51 |                     help="interval between demonstrations saving")
 52 | parser.add_argument("--filter-steps", type=int, default=0,
 53 |                     help="filter out demos with number of steps more than filter-steps")
 54 | parser.add_argument("--on-exception", type=str, default='warn', choices=('warn', 'crash'),
 55 |                     help="How to handle exceptions during demo generation")
 56 | 
 57 | parser.add_argument("--job-script", type=str, default=None,
 58 |                     help="The script that launches make_agent_demos.py at a cluster.")
 59 | parser.add_argument("--jobs", type=int, default=0,
 60 |                     help="Split generation in that many jobs")
 61 | 
 62 | args = parser.parse_args()
 63 | logger = logging.getLogger(__name__)
 64 | 
 65 | # Set seed for all randomness sources
 66 | 
 67 | 
 68 | def print_demo_lengths(demos):
 69 |     num_frames_per_episode = [len(demo[2]) for demo in demos]
 70 |     logger.info('Demo length: {:.3f}+-{:.3f}'.format(
 71 |         np.mean(num_frames_per_episode), np.std(num_frames_per_episode)))
 72 | 
 73 | 
 74 | def generate_demos(n_episodes, valid, seed, shift=0):
 75 |     utils.seed(seed)
 76 | 
 77 |     # Generate environment
 78 |     env = gym.make(args.env)
 79 | 
 80 |     agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax, args.env)
 81 |     demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid)
 82 |     demos = []
 83 | 
 84 |     checkpoint_time = time.time()
 85 | 
 86 |     just_crashed = False
 87 |     while True:
 88 |         if len(demos) == n_episodes:
 89 |             break
 90 | 
 91 |         done = False
 92 |         if just_crashed:
 93 |             logger.info("reset the environment to find a mission that the bot can solve")
 94 |             env.reset()
 95 |         else:
 96 |             env.seed(seed + len(demos))
 97 |         obs = env.reset()
 98 |         agent.on_reset()
 99 | 
100 |         actions = []
101 |         mission = obs["mission"]
102 |         images = []
103 |         directions = []
104 | 
105 |         try:
106 |             while not done:
107 |                 action = agent.act(obs)['action']
108 |                 if isinstance(action, torch.Tensor):
109 |                     action = action.item()
110 |                 new_obs, reward, done, _ = env.step(action)
111 |                 agent.analyze_feedback(reward, done)
112 | 
113 |                 actions.append(action)
114 |                 images.append(obs['image'])
115 |                 directions.append(obs['direction'])
116 | 
117 |                 obs = new_obs
118 |             if reward > 0 and (args.filter_steps == 0 or len(images) <= args.filter_steps):
119 |                 demos.append((mission, blosc.pack_array(np.array(images)), directions, actions))
120 |                 just_crashed = False
121 | 
122 |             if reward == 0:
123 |                 if args.on_exception == 'crash':
124 |                     raise Exception("mission failed, the seed is {}".format(seed + len(demos)))
125 |                 just_crashed = True
126 |                 logger.info("mission failed")
127 |         except (Exception, AssertionError):
128 |             if args.on_exception == 'crash':
129 |                 raise
130 |             just_crashed = True
131 |             logger.exception("error while generating demo #{}".format(len(demos)))
132 |             continue
133 | 
134 |         if len(demos) and len(demos) % args.log_interval == 0:
135 |             now = time.time()
136 |             demos_per_second = args.log_interval / (now - checkpoint_time)
137 |             to_go = (n_episodes - len(demos)) / demos_per_second
138 |             logger.info("demo #{}, {:.3f} demos per second, {:.3f} seconds to go".format(
139 |                 len(demos) - 1, demos_per_second, to_go))
140 |             checkpoint_time = now
141 | 
142 |         # Save demonstrations
143 | 
144 |         if args.save_interval > 0 and len(demos) < n_episodes and len(demos) % args.save_interval == 0:
145 |             logger.info("Saving demos...")
146 |             utils.save_demos(demos, demos_path)
147 |             logger.info("{} demos saved".format(len(demos)))
148 |             # print statistics for the last 100 demonstrations
149 |             print_demo_lengths(demos[-100:])
150 | 
151 | 
152 |     # Save demonstrations
153 |     logger.info("Saving demos...")
154 |     utils.save_demos(demos, demos_path)
155 |     logger.info("{} demos saved".format(len(demos)))
156 |     print_demo_lengths(demos[-100:])
157 | 
158 | 
159 | def generate_demos_cluster():
160 |     demos_per_job = args.episodes // args.jobs
161 |     demos_path = utils.get_demos_path(args.demos, args.env, 'agent')
162 |     job_demo_names = [os.path.realpath(demos_path + '.shard{}'.format(i))
163 |                      for i in range(args.jobs)]
164 |     for demo_name in job_demo_names:
165 |         job_demos_path = utils.get_demos_path(demo_name)
166 |         if os.path.exists(job_demos_path):
167 |             os.remove(job_demos_path)
168 | 
169 |     command = [args.job_script]
170 |     command += sys.argv[1:]
171 |     for i in range(args.jobs):
172 |         cmd_i = list(map(str,
173 |             command
174 |               + ['--seed', args.seed + i * demos_per_job]
175 |               + ['--demos', job_demo_names[i]]
176 |               + ['--episodes', demos_per_job]
177 |               + ['--jobs', 0]
178 |               + ['--valid-episodes', 0]))
179 |         logger.info('LAUNCH COMMAND')
180 |         logger.info(cmd_i)
181 |         subprocess.Popen(cmd_i)
182 |         # output = subprocess.check_output(cmd_i)
183 |         # logger.info('LAUNCH OUTPUT')
184 |         # logger.info(output.decode('utf-8'))
185 | 
186 |     job_demos = [None] * args.jobs
187 |     while True:
188 |         jobs_done = 0
189 |         for i in range(args.jobs):
190 |             if job_demos[i] is None or len(job_demos[i]) < demos_per_job:
191 |                 try:
192 |                     logger.info("Trying to load shard {}".format(i))
193 |                     job_demos[i] = utils.load_demos(utils.get_demos_path(job_demo_names[i]))
194 |                     logger.info("{} demos ready in shard {}".format(
195 |                         len(job_demos[i]), i))
196 |                 except Exception:
197 |                     logger.exception("Failed to load the shard")
198 |             if job_demos[i] and len(job_demos[i]) == demos_per_job:
199 |                 jobs_done += 1
200 |         logger.info("{} out of {} shards done".format(jobs_done, args.jobs))
201 |         if jobs_done == args.jobs:
202 |             break
203 |         logger.info("sleep for 60 seconds")
204 |         time.sleep(60)
205 | 
206 |     # Training demos
207 |     all_demos = []
208 |     for demos in job_demos:
209 |         all_demos.extend(demos)
210 |     utils.save_demos(all_demos, demos_path)
211 | 
212 | 
213 | logging.basicConfig(level='INFO', format="%(asctime)s: %(levelname)s: %(message)s")
214 | logger.info(args)
215 | # Training demos
216 | if args.jobs == 0:
217 |     generate_demos(args.episodes, False, args.seed)
218 | else:
219 |     generate_demos_cluster()
220 | # Validation demos
221 | if args.valid_episodes:
222 |     generate_demos(args.valid_episodes, True, int(1e9))
223 | 


--------------------------------------------------------------------------------
/babyaiPP/decriptive_level_base.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from babyai.levels.verifier import *
  3 | from babyai.levels.levelgen import *
  4 | from babyai.levels.iclr19_levels import *
  5 | from gym_minigrid.minigrid import DIR_TO_VEC
  6 | import copy
  7 | 
  8 | class DescriptiveLevel(RoomGridLevel):
  9 |     def __init__(self, description_level, desc_sample_frac, *args, **kwargs):
 10 |         print("entered descriptive level init")
 11 |         self.description_level = description_level  # type of description, 0 = no description, 1 = aggregated description, 2 = list description
 12 |         self.description_frac = desc_sample_frac  # if description is used, how much to use; 1 = describe everything
 13 |         self.desc = None
 14 |         super(DescriptiveLevel, self).__init__(*args, **kwargs)
 15 |         assert description_level in [0,1,2]
 16 | 
 17 |     def gen_mission(self):
 18 |         super(DescriptiveLevel, self).gen_mission()
 19 |         if self.description_level == 0:
 20 |             self.desc = None
 21 |             return
 22 |         # search for objects
 23 |         cell_infos = []
 24 |         for i in range(self.grid.width):
 25 |             for j in range(self.grid.height):
 26 |                 cell = self.grid.get(i, j)
 27 |                 if cell is None:
 28 |                     continue
 29 |                 if cell.type == "wall":
 30 |                     continue
 31 |                 cell_infos.append((cell, i, j))
 32 | 
 33 |         if self.description_frac < 1:
 34 |             N = int(self.description_frac * len(cell_infos))
 35 |             inds = np.arange(len(cell_infos))
 36 |             np.random.shuffle(inds)
 37 |             cell_infos_new = [cell_infos[i] for i in inds[:N]]
 38 |             cell_infos = cell_infos_new
 39 | 
 40 |         if self.description_level == 1:
 41 | 
 42 |             self.desc, _, _ = gen_aggregated_description(cell_infos, agent_pos=self.agent_pos, agent_dir=self.agent_dir, aggregation_order="012")
 43 | 
 44 |         elif self.description_level == 2:
 45 |             descs = []
 46 |             for cell, i, j in cell_infos:
 47 |                 desc = gen_description(cell, pos=(i,j), agent_pos=self.agent_pos, agent_dir=self.agent_dir)
 48 |                 descs.append(desc)
 49 | 
 50 |             self.desc = " ".join(descs)
 51 | 
 52 |         return
 53 | 
 54 |     def step(self, action):
 55 |         obs, reward, done, info = super().step(action)
 56 |         obs['mission'] = self.desc
 57 |         return obs, reward, done, info
 58 | 
 59 |     def reset(self, **kwargs):
 60 |         obs = super().reset(**kwargs)
 61 |         obs['mission'] = self.desc
 62 |         return obs
 63 | 
 64 | def gen_aggregated_description(cell_info, agent_pos, agent_dir, aggregation_order):
 65 |     """
 66 |     :param aggregation_order: "type:0", "color:1", "direction:2",
 67 |     aggregation_order = 012: "there are are 2 keys, 2 red, one in front of you to your right and one in front of you to your left. There are 4 balls, 1 blue and 3 green..."
 68 |     aggregation_order = 201: "there are 3 objects in front of you to your left, 2 keys, 1 red and 1 green, and a box, 1 blue. There are 1 object in front of you to your right..."
 69 |     """
 70 |     # first, add a pos attribute to cells
 71 |     cell_info = copy.deepcopy(cell_info)
 72 |     all_types = []
 73 |     all_colors = []
 74 |     all_poses = []
 75 |     for cell, i, j in cell_info:
 76 |         v = (i - agent_pos[0], j - agent_pos[1])
 77 |         d1 = DIR_TO_VEC[agent_dir]
 78 |         d2 = (-d1[1], d1[0])
 79 |         pos = ""
 80 |         if dot_product(v, d1) > 0:
 81 |             pos += "F"
 82 |         elif dot_product(v, d1) < 0:
 83 |             pos += "B"
 84 |         if dot_product(v, d2) < 0:
 85 |             pos += "L"
 86 |         elif dot_product(v, d2) > 0:
 87 |             pos += "R"
 88 |         cell.pos = pos
 89 |         if cell.type not in all_types:
 90 |             all_types.append(cell.type)
 91 |         if cell.color not in all_colors:
 92 |             all_colors.append(cell.color)
 93 |         if cell.pos not in all_poses:
 94 |             all_poses.append(cell.pos)
 95 | 
 96 |     dims = {'type':all_types, 'color':all_colors, 'pos':all_poses}
 97 |     index = {'0':'type', '1':'color', '2':'pos'}
 98 |     t0 = index[aggregation_order[0]]
 99 |     t1 = index[aggregation_order[1]]
100 |     t2 = index[aggregation_order[2]]
101 |     dim_0 = dims[t0]
102 |     dim_1 = dims[t1]
103 |     dim_2 = dims[t2]
104 |     count_array = np.zeros((len(dim_0), len(dim_1), len(dim_2)))
105 | 
106 |     for cell, i, j in cell_info:
107 |         d0 = dim_0.index(getattr(cell, t0))
108 |         d1 = dim_1.index(getattr(cell, t1))
109 |         d2 = dim_2.index(getattr(cell, t2))
110 |         count_array[d0,d1,d2] += 1
111 | 
112 |     desc = ""
113 |     direction_list = {"FL":"front left",
114 |                       "FR": "front right",
115 |                       "BL": "back left",
116 |                       "BR": "back right",
117 |                       "B": "back",
118 |                       "F": "front",
119 |                       "R": "right",
120 |                       "L": "left",
121 |                       }
122 | 
123 |     ONEONEFLAG = 0
124 | 
125 |     for i in range(count_array.shape[0]):
126 |         print(i)
127 |         n_ins = count_array.sum(axis=(1,2))[i]
128 |         if n_ins == 0:
129 |             continue
130 | 
131 |         desc += "There "
132 |         if n_ins == 1:
133 |             desc += "is a "
134 |             ONEONEFLAG = 1
135 |         else:
136 |             desc += "are %d " % n_ins
137 |             ONEONEFLAG = 0
138 |         value = dim_0[i]
139 | 
140 |         if t0 == "pos":
141 |             content = direction_list[value]
142 |             if n_ins > 1:
143 |                 desc += "objects to your " + content + ", "
144 |             else:
145 |                 desc += "object to your " + content + ", "
146 | 
147 |         elif t0 == "type":
148 |             desc += value
149 |             if n_ins > 1:
150 |                 desc += "s, "
151 |             else:
152 |                 desc += ", "
153 |         else:
154 |             desc += value + " object"
155 |             if n_ins > 1:
156 |                 desc += "s, "
157 |             else:
158 |                 desc += ", "
159 | 
160 |         for j in range(count_array.shape[1]):
161 |             n_ins = count_array.sum(axis=(2,))[i, j]
162 |             if n_ins == 0:
163 |                 continue
164 |             if np.sum(count_array.sum(axis=(2,))[i, j:]) == n_ins:
165 |                 desc += "and "
166 |             if ONEONEFLAG:
167 |                 if n_ins > 1:
168 |                     desc += "%d " % n_ins
169 |             else:
170 |                 desc += "%d " % n_ins
171 | 
172 |             if n_ins == 1:
173 |                 ONEONEFLAG = 1
174 |             else:
175 |                 ONEONEFLAG = 0
176 | 
177 |             value = dim_1[j]
178 | 
179 |             if t1 == "pos":
180 |                 content = direction_list[value]
181 |                 desc += "to your " + content + ", "
182 |             elif t1 == "type":
183 |                 desc += value
184 |                 if n_ins > 1:
185 |                     desc += "s, "
186 |                 else:
187 |                     desc += ", "
188 |             else:
189 |                 desc += value + ", "
190 | 
191 |             for k in range(count_array.shape[2]):
192 |                 n_ins = count_array[i, j, k]
193 |                 if n_ins == 0:
194 |                     continue
195 |                 if ONEONEFLAG:
196 |                     if n_ins > 1:
197 |                         desc += "%d " % n_ins
198 |                 else:
199 |                     desc += "%d " % n_ins
200 |                 ONEONEFLAG = 0
201 | 
202 |                 value = dim_2[k]
203 | 
204 |                 if t2 == "pos":
205 |                     content = direction_list[value]
206 |                     desc += "to your " + content
207 | 
208 |                 elif t2 == "type":
209 |                     desc += value
210 |                     if n_ins > 1:
211 |                         desc += "s"
212 |                 else:
213 |                     desc += value
214 | 
215 |                 desc += ", "
216 |             desc = desc[:-2] + "; "
217 |         desc = desc[:-2] + ". "
218 |     desc = desc[:-1]
219 | 
220 | 
221 |     return desc, count_array, dims
222 | 
223 | def gen_description(cell, pos, agent_pos, agent_dir):
224 |     # Direction from the agent to the object
225 |     v = (pos[0] - agent_pos[0], pos[1] - agent_pos[1])
226 | 
227 |     # (d1, d2) is an oriented orthonormal basis
228 |     d1 = DIR_TO_VEC[agent_dir]
229 |     d2 = (-d1[1], d1[0])
230 | 
231 |     # Check if object's position matches with location
232 |     pos_matches = {
233 |         "left": dot_product(v, d2) < 0,
234 |         "right": dot_product(v, d2) > 0,
235 |         "in front of": dot_product(v, d1) > 0,
236 |         "behind": dot_product(v, d1) < 0
237 |     }
238 |     s = "There is a %s %s " % (cell.color, cell.type)
239 |     if dot_product(v, d1) > 0:
240 |         s += "in front of you"
241 |     elif dot_product(v, d1) < 0:
242 |         s += "behind you"
243 |     else:
244 |         s += ""
245 |     if dot_product(v, d2) < 0:
246 |         s += ", on your left"
247 |     elif dot_product(v, d2) > 0:
248 |         s += ", on your right"
249 |     s += "."
250 |     return s
251 | 


--------------------------------------------------------------------------------
/babyaiPP/additional_levels.py:
--------------------------------------------------------------------------------
  1 | from .dynamics_levels import DynamicsLevel
  2 | from babyai.levels.iclr19_levels import *
  3 | 
  4 | 
  5 | class Level_PutNextLocalDynamics_Lorem_Train(DynamicsLevel, Level_PutNextLocal):
  6 |     def __init__(self, seed=None):
  7 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
  8 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)],
  9 |                                rand_text='lorem', instr_words=9, with_instruction=False)
 10 |         Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed)
 11 | 
 12 | 
 13 | class Level_PutNextLocalDynamics_Lorem_Fully_Train(DynamicsLevel, Level_PutNextLocal):
 14 |     def __init__(self, seed=None):
 15 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
 16 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)],
 17 |                                rand_text='lorem', total_rand=True, instr_words=9, with_instruction=False)
 18 |         Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed)
 19 | 
 20 | 
 21 | class Level_PutNextLocalDynamics_Lorem_Test(DynamicsLevel, Level_PutNextLocal):
 22 |     def __init__(self, seed=None):
 23 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
 24 |                                rand_text='lorem', instr_words=9, with_instruction=False)
 25 |         Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed)
 26 | 
 27 | 
 28 | class Level_PutNextLocalDynamics_Lorem_Fully_Test(DynamicsLevel, Level_PutNextLocal):
 29 |     def __init__(self, seed=None):
 30 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
 31 |                                rand_text='lorem', instr_words=9, total_rand=True, with_instruction=False)
 32 |         Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed)
 33 | 
 34 | 
 35 | class Level_GoTo_NoDistDynamicsTrain(DynamicsLevel, Level_GoTo):
 36 |     def __init__(self,
 37 |                  room_size=8,
 38 |                  num_rows=3,
 39 |                  num_cols=3,
 40 |                  doors_open=False,
 41 |                  seed=None
 42 |                  ):
 43 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
 44 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)])
 45 |         Level_GoTo.__init__(self, room_size, num_rows, num_cols, 0, doors_open, seed)
 46 | 
 47 | 
 48 | class Level_GoTo_NoDistDynamicsTest(DynamicsLevel, Level_GoTo):
 49 |     def __init__(self,
 50 |                  room_size=8,
 51 |                  num_rows=3,
 52 |                  num_cols=3,
 53 |                  doors_open=False,
 54 |                  seed=None
 55 |                  ):
 56 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2)
 57 |         Level_GoTo.__init__(self, room_size, num_rows, num_cols, 0, doors_open, seed)
 58 | 
 59 | 
 60 | 
 61 | class Level_GoTo2by2_PartialDynamics_Train(DynamicsLevel, Level_GoTo):
 62 |     def __init__(self,
 63 |                  room_size=9,
 64 |                  num_rows=2,
 65 |                  num_cols=2,
 66 |                  num_dists=18,
 67 |                  doors_open=False,
 68 |                  seed=None
 69 |                  ):
 70 |         DynamicsLevel.__init__(self, enabled_properties=[1, 2, 3, 4, 5],
 71 |                                n_floor_colors=3,
 72 |                                held_description=1,
 73 |                                held_out_cp_pairs=[('green', 1), ('red', 2), ('blue', 4)],
 74 |                                )
 75 |         Level_GoTo.__init__(self, room_size, num_rows,
 76 |                             num_cols, num_dists, doors_open, seed)
 77 | 
 78 | 
 79 | class Level_GoTo2by2_PartialDynamics_Test(DynamicsLevel, Level_GoTo):
 80 |     def __init__(self,
 81 |                  room_size=9,
 82 |                  num_rows=2,
 83 |                  num_cols=2,
 84 |                  num_dists=18,
 85 |                  doors_open=False,
 86 |                  seed=None
 87 |                  ):
 88 |         DynamicsLevel.__init__(self, enabled_properties=[1, 2, 3, 4, 5],
 89 |                                n_floor_colors=3,
 90 |                                held_description=1,
 91 |                                )
 92 | 
 93 |         Level_GoTo.__init__(self, room_size, num_rows,
 94 |                             num_cols, num_dists, doors_open, seed)
 95 | 
 96 | 
 97 | class Level_GoTo_RedBallDynamics_Lorem(DynamicsLevel, Level_GoToRedBallNoDists):
 98 |     def __init__(self,
 99 |                  seed=None
100 |                  ):
101 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
102 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)],
103 |                                rand_text='lorem', with_instruction=False)
104 |         Level_GoToRedBallNoDists.__init__(self, seed)
105 | 
106 | 
107 | class Level_GoTo_RedBallDynamics_Lorem_Fully(DynamicsLevel, Level_GoToRedBallNoDists):
108 |     def __init__(self,
109 |                  seed=None
110 |                  ):
111 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
112 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)],
113 |                                rand_text='lorem', total_rand=True, with_instruction=False)
114 |         Level_GoToRedBallNoDists.__init__(self, seed)
115 | 
116 | 
117 | class Level_GoTo_RedBallDynamicsSticky_Train(DynamicsLevel, Level_GoToRedBallNoDists):
118 |     def __init__(self,
119 |                  seed=None
120 |                  ):
121 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 4], n_floor_colors=2,
122 |                                held_out_cp_pairs=[('green', 0), ('blue', 1)])
123 |         Level_GoToRedBallNoDists.__init__(self, seed)
124 | 
125 | 
126 | class Level_GoTo_RedBallDynamicsSticky_TargetPairOnly(DynamicsLevel, Level_GoToRedBallNoDists):
127 |     def __init__(self,
128 |                  seed=None
129 |                  ):
130 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 4], n_floor_colors=2,
131 |                                color_property_map={'green': ['trap'], 'blue': ['sticky']})
132 |         Level_GoToRedBallNoDists.__init__(self, seed)
133 | 
134 | 
135 | class Level_GoTo_RedBallDynamicsSticky_Test(DynamicsLevel, Level_GoToRedBallNoDists):
136 |     def __init__(self,
137 |                  seed=None
138 |                  ):
139 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 4], n_floor_colors=2)
140 |         Level_GoToRedBallNoDists.__init__(self, seed)
141 | 
142 | 
143 | class Level_PutNextDynamics_Lorem_Train(DynamicsLevel, Level_PutNext):
144 |     def __init__(self, seed=None, with_instruction=True):
145 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
146 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction,
147 |                                rand_text='lorem', total_rand=True)
148 |         Level_PutNext.__init__(self, room_size=8, num_objs=4, seed=seed)
149 | 
150 | 
151 | class Level_PutNextDynamics_Lorem_TargetPairOnly(DynamicsLevel, Level_PutNext):
152 |     def __init__(self, seed=None, with_instruction=True):
153 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
154 |                                color_property_map={'green': ['trap', ],
155 |                                                    'blue': ['slippery', ]}, with_instruction=with_instruction,
156 |                                rand_text='lorem', total_rand=True,)
157 |         Level_PutNext.__init__(self, room_size=8, num_objs=4, seed=seed)
158 | 
159 | 
160 | class Level_PutNextDynamics_Lorem_Test(DynamicsLevel, Level_PutNext):
161 |     def __init__(self, seed=None, with_instruction=True):
162 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, with_instruction=with_instruction,
163 |                                rand_text='lorem', total_rand=True,)
164 |         Level_PutNext.__init__(self, room_size=8, num_objs=4, seed=seed)
165 | 
166 | 
167 | register_levels(__name__, {
168 |                            'Level_GoTo_NoDistDynamicsTrain': Level_GoTo_NoDistDynamicsTrain,
169 |                            'Level_GoTo_NoDistDynamicsTest': Level_GoTo_NoDistDynamicsTest,
170 |                            'Level_GoTo2by2_PartialDynamics_Train': Level_GoTo2by2_PartialDynamics_Train,
171 |                            'Level_GoTo2by2_PartialDynamics_Test': Level_GoTo2by2_PartialDynamics_Test,
172 |                            'Level_GoTo_RedBallDynamics_Lorem': Level_GoTo_RedBallDynamics_Lorem,
173 |                            'Level_GoTo_RedBallDynamics_Lorem_Fully': Level_GoTo_RedBallDynamics_Lorem_Fully,
174 |                            'Level_GoTo_RedBallDynamicsSticky_Train': Level_GoTo_RedBallDynamicsSticky_Train,
175 |                            'Level_GoTo_RedBallDynamicsSticky_TargetPairOnly': Level_GoTo_RedBallDynamicsSticky_TargetPairOnly,
176 |                            'Level_GoTo_RedBallDynamicsSticky_Test': Level_GoTo_RedBallDynamicsSticky_Test,
177 |                            'Level_PutNextLocalDynamics_Lorem_Train': Level_PutNextLocalDynamics_Lorem_Train,
178 |                            'Level_PutNextLocalDynamics_Lorem_Fully_Train': Level_PutNextLocalDynamics_Lorem_Fully_Train,
179 |                            'Level_PutNextLocalDynamics_Lorem_Test': Level_PutNextLocalDynamics_Lorem_Test,
180 |                            'Level_PutNextLocalDynamics_Lorem_Fully_Test': Level_PutNextLocalDynamics_Lorem_Fully_Test,
181 |                             'Level_PutNextDynamics_Lorem_Train':Level_PutNextDynamics_Lorem_Train,
182 |                             'Level_PutNextDynamics_Lorem_TargetPairOnly':Level_PutNextDynamics_Lorem_TargetPairOnly,
183 |                             'Level_PutNextDynamics_Lorem_Test':Level_PutNextDynamics_Lorem_Test,
184 |                            })


--------------------------------------------------------------------------------
/experiment/train_rl.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Script to train the agent through reinforcment learning.
  5 | """
  6 | 
  7 | import os
  8 | import sys
  9 | sys.path.append(os.getcwd())
 10 | 
 11 | import logging
 12 | import csv
 13 | import json
 14 | import gym
 15 | import time
 16 | import datetime
 17 | import torch
 18 | import numpy as np
 19 | import subprocess
 20 | import babyaiPP
 21 | import babyai
 22 | import babyai.utils as utils
 23 | import babyai.rl
 24 | from babyai.evaluate import batch_evaluate
 25 | from babyai.utils.agent import ModelAgent
 26 | 
 27 | from experiment.arguments import ArgumentParser
 28 | from experiment.model import ACModel
 29 | 
 30 | # Parse arguments
 31 | parser = ArgumentParser()
 32 | parser.add_argument("--algo", default='ppo',
 33 |                     help="algorithm to use (default: ppo)")
 34 | parser.add_argument("--discount", type=float, default=0.99,
 35 |                     help="discount factor (default: 0.99)")
 36 | parser.add_argument("--reward-scale", type=float, default=20.,
 37 |                     help="Reward scale multiplier")
 38 | parser.add_argument("--gae-lambda", type=float, default=0.99,
 39 |                     help="lambda coefficient in GAE formula (default: 0.99, 1 means no gae)")
 40 | parser.add_argument("--value-loss-coef", type=float, default=0.5,
 41 |                     help="value loss term coefficient (default: 0.5)")
 42 | parser.add_argument("--max-grad-norm", type=float, default=0.5,
 43 |                     help="maximum norm of gradient (default: 0.5)")
 44 | parser.add_argument("--clip-eps", type=float, default=0.2,
 45 |                     help="clipping epsilon for PPO (default: 0.2)")
 46 | parser.add_argument("--ppo-epochs", type=int, default=4,
 47 |                     help="number of epochs for PPO (default: 4)")
 48 | parser.add_argument("--save-interval", type=int, default=50,
 49 |                     help="number of updates between two saves (default: 50, 0 means no saving)")
 50 | args = parser.parse_args()
 51 | 
 52 | utils.seed(args.seed)
 53 | 
 54 | # Generate environments
 55 | envs = []
 56 | for i in range(args.procs):
 57 |     env = gym.make(args.env)
 58 |     env.seed(100 * args.seed + i)
 59 |     envs.append(env)
 60 | 
 61 | # Get the nubmer of descriptive sentences
 62 | n_floor_colors = envs[0].n_floor_colors
 63 | 
 64 | # Define model name
 65 | suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
 66 | instr = args.instr_arch if not args.no_desc else "nodesc"
 67 | rand = "rand" if args.random_shuffle else "order"
 68 | if args.enable_instr:
 69 |     if args.instr_only: texts = "instrtexts"
 70 |     else: texts = "alltexts"
 71 | else: texts = "destexts"
 72 | mem = "mem" if not args.no_mem else "nomem"
 73 | model_name_parts = {
 74 |     'env': args.env,
 75 |     'algo': args.algo,
 76 |     'arch': args.arch,
 77 |     'instr': instr,
 78 |     "rand": rand,
 79 |     "texts": texts,
 80 |     'mem': mem,
 81 |     'seed': args.seed,
 82 |     'info': '',
 83 |     'coef': '',
 84 |     'suffix': suffix}
 85 | default_model_name = "{env}_{algo}_{arch}_{instr}_{texts}_{rand}_{mem}_seed{seed}{info}{coef}_{suffix}".format(**model_name_parts)
 86 | if args.pretrained_model:
 87 |     default_model_name = args.pretrained_model + '_pretrained_' + default_model_name
 88 | args.model = args.model.format(**model_name_parts) if args.model else default_model_name
 89 | 
 90 | utils.configure_logging(args.model)
 91 | logger = logging.getLogger(__name__)
 92 | 
 93 | # Define obss preprocessor
 94 | if 'emb' in args.arch:
 95 |     obss_preprocessor = utils.IntObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model)
 96 | else:
 97 |     obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model)
 98 | 
 99 | # Define actor-critic model
100 | acmodel = utils.load_model(args.model, raise_not_found=False)
101 | if acmodel is None:
102 |     if args.pretrained_model:
103 |         acmodel = utils.load_model(args.pretrained_model, raise_not_found=True)
104 |     else:
105 |         acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space,
106 |                           args.image_dim, args.memory_dim, args.instr_dim,
107 |                           not args.no_desc, args.instr_arch, not args.no_mem, args.arch,
108 |                           random_shuffled=args.random_shuffle, instr_sents=n_floor_colors,
109 |                           enable_instr=args.enable_instr, instr_only=args.instr_only)
110 | 
111 | obss_preprocessor.vocab.save()
112 | utils.save_model(acmodel, args.model)
113 | 
114 | if torch.cuda.is_available():
115 |     acmodel.cuda()
116 | 
117 | # Define actor-critic algo
118 | 
119 | reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward
120 | if args.algo == "ppo":
121 |     algo = babyai.rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.beta1, args.beta2,
122 |                              args.gae_lambda,
123 |                              args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence,
124 |                              args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size, obss_preprocessor,
125 |                              reshape_reward)
126 | else:
127 |     raise ValueError("Incorrect algorithm name: {}".format(args.algo))
128 | 
129 | # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that.
130 | # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that
131 | # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here.
132 | 
133 | utils.seed(args.seed)
134 | 
135 | # Restore training status
136 | 
137 | status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
138 | if os.path.exists(status_path):
139 |     with open(status_path, 'r') as src:
140 |         status = json.load(src)
141 | else:
142 |     status = {'i': 0,
143 |               'num_episodes': 0,
144 |               'num_frames': 0}
145 | 
146 | # Define logger and Tensorboard writer and CSV writer
147 | 
148 | header = (["update", "episodes", "frames", "FPS", "duration"]
149 |           + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']]
150 |           + ["success_rate_" + stat for stat in ['mean', 'std']]
151 |           + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']]
152 |           + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"])
153 | if args.tb:
154 |     from tensorboardX import SummaryWriter
155 |     writer = SummaryWriter(utils.get_log_dir(args.model))
156 | csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
157 | first_created = not os.path.exists(csv_path)
158 | # we don't buffer data going in the csv log, cause we assume
159 | # that one update will take much longer that one write to the log
160 | csv_writer = csv.writer(open(csv_path, 'a', 1))
161 | if first_created:
162 |     csv_writer.writerow(header)
163 | 
164 | # Log code state, command, availability of CUDA and model
165 | 
166 | babyai_code = list(babyai.__path__)[0]
167 | try:
168 |     last_commit = subprocess.check_output(
169 |         'cd {}; git log -n1'.format(babyai_code), shell=True).decode('utf-8')
170 |     logger.info('LAST COMMIT INFO:')
171 |     logger.info(last_commit)
172 | except subprocess.CalledProcessError:
173 |     logger.info('Could not figure out the last commit')
174 | try:
175 |     diff = subprocess.check_output(
176 |         'cd {}; git diff'.format(babyai_code), shell=True).decode('utf-8')
177 |     if diff:
178 |         logger.info('GIT DIFF:')
179 |         logger.info(diff)
180 | except subprocess.CalledProcessError:
181 |     logger.info('Could not figure out the last commit')
182 | logger.info('COMMAND LINE ARGS:')
183 | logger.info(args)
184 | logger.info("CUDA available: {}".format(torch.cuda.is_available()))
185 | logger.info(acmodel)
186 | 
187 | # Train model
188 | 
189 | total_start_time = time.time()
190 | best_success_rate = 0
191 | best_mean_return = 0
192 | test_env_name = args.env
193 | while status['num_frames'] < args.frames:
194 |     # Update parameters
195 | 
196 |     update_start_time = time.time()
197 |     logs = algo.update_parameters()
198 |     update_end_time = time.time()
199 | 
200 |     status['num_frames'] += logs["num_frames"]
201 |     status['num_episodes'] += logs['episodes_done']
202 |     status['i'] += 1
203 | 
204 |     # Print logs
205 | 
206 |     if status['i'] % args.log_interval == 0:
207 |         total_ellapsed_time = int(time.time() - total_start_time)
208 |         fps = logs["num_frames"] / (update_end_time - update_start_time)
209 |         duration = datetime.timedelta(seconds=total_ellapsed_time)
210 |         return_per_episode = utils.synthesize(logs["return_per_episode"])
211 |         success_per_episode = utils.synthesize(
212 |             [1 if r > 0 else 0 for r in logs["return_per_episode"]])
213 |         num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])
214 | 
215 |         data = [status['i'], status['num_episodes'], status['num_frames'],
216 |                 fps, total_ellapsed_time,
217 |                 *return_per_episode.values(),
218 |                 success_per_episode['mean'],
219 |                 success_per_episode['std'] / np.sqrt(len(logs["return_per_episode"])),
220 |                 *num_frames_per_episode.values(),
221 |                 logs["entropy"], logs["value"], logs["policy_loss"], logs["value_loss"],
222 |                 logs["loss"], logs["grad_norm"]]
223 | 
224 |         format_str = ("U {} | E {} | F {:06} | FPS {:04.0f} | D {} | R:xsmM {: .2f} {: .2f} {: .2f} {: .2f} | "
225 |                       "S:xs {:.2f} {:.4f} | F:xsmM {:.1f} {:.1f} {} {} | H {:.3f} | V {:.3f} | "
226 |                       "pL {: .3f} | vL {:.3f} | L {:.3f} | gN {:.3f} | ")
227 | 
228 |         logger.info(format_str.format(*data))
229 |         if args.tb:
230 |             assert len(header) == len(data)
231 |             for key, value in zip(header, data):
232 |                 writer.add_scalar(key, float(value), status['num_frames'])
233 | 
234 |         csv_writer.writerow(data)
235 | 
236 |     # Save obss preprocessor vocabulary and model
237 | 
238 |     if args.save_interval > 0 and status['i'] % args.save_interval == 0:
239 |         obss_preprocessor.vocab.save()
240 |         with open(status_path, 'w') as dst:
241 |             json.dump(status, dst)
242 |             utils.save_model(acmodel, args.model)
243 | 
244 |         # Testing the model before saving
245 |         agent = ModelAgent(args.model, obss_preprocessor, argmax=True)
246 |         agent.model = acmodel
247 |         agent.model.eval()
248 |         logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes)
249 |         agent.model.train()
250 |         mean_return = np.mean(logs["return_per_episode"])
251 |         success_rate = np.mean([1 if r > 0 else 0 for r in logs['return_per_episode']])
252 |         save_model = False
253 |         if success_rate > best_success_rate:
254 |             best_success_rate = success_rate
255 |             save_model = True
256 |         elif (success_rate == best_success_rate) and (mean_return > best_mean_return):
257 |             best_mean_return = mean_return
258 |             save_model = True
259 |         if save_model:
260 |             utils.save_model(acmodel, args.model + '_best')
261 |             obss_preprocessor.vocab.save(utils.get_vocab_path(args.model + '_best'))
262 |             logger.info("Return {: .2f}; best model is saved".format(mean_return))
263 |         else:
264 |             logger.info("Return {: .2f}; not the best model; not saved".format(mean_return))
265 | 


--------------------------------------------------------------------------------
/experiment/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from torch.distributions.categorical import Categorical
  6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
  7 | import babyai.rl
  8 | from babyai.rl.utils.supervised_losses import required_heads
  9 | 
 10 | 
 11 | # Function from https://github.com/ikostrikov/pytorch-a2c-ppo-acktr/blob/master/model.py
 12 | def initialize_parameters(m):
 13 |     classname = m.__class__.__name__
 14 |     if classname.find('Linear') != -1:
 15 |         m.weight.data.normal_(0, 1)
 16 |         m.weight.data *= 1 / torch.sqrt(m.weight.data.pow(2).sum(1, keepdim=True))
 17 |         if m.bias is not None:
 18 |             m.bias.data.fill_(0)
 19 | 
 20 | 
 21 | # Inspired by FiLMedBlock from https://arxiv.org/abs/1709.07871
 22 | class ExpertControllerFiLM(nn.Module):
 23 |     def __init__(self, in_features, out_features, in_channels, imm_channels):
 24 |         super().__init__()
 25 |         self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=imm_channels, kernel_size=(3, 3), padding=1)
 26 |         self.bn1 = nn.BatchNorm2d(imm_channels)
 27 |         self.conv2 = nn.Conv2d(in_channels=imm_channels, out_channels=out_features, kernel_size=(3, 3), padding=1)
 28 |         self.bn2 = nn.BatchNorm2d(out_features)
 29 | 
 30 |         self.weight = nn.Linear(in_features, out_features)
 31 |         self.bias = nn.Linear(in_features, out_features)
 32 | 
 33 |         self.apply(initialize_parameters)
 34 | 
 35 |     def forward(self, x, y):
 36 |         x = F.relu(self.bn1(self.conv1(x)))
 37 |         x = self.conv2(x)
 38 |         out = x * self.weight(y).unsqueeze(2).unsqueeze(3) + self.bias(y).unsqueeze(2).unsqueeze(3)
 39 |         out = self.bn2(out)
 40 |         out = F.relu(out)
 41 |         return out
 42 | 
 43 | 
 44 | class ACModel(nn.Module, babyai.rl.RecurrentACModel):
 45 |     def __init__(self, obs_space, action_space,
 46 |                  image_dim=128, memory_dim=128, instr_dim=128,
 47 |                  use_desc=True, lang_model="gru", use_memory=False, arch="cnn",
 48 |                  aux_info=None, random_shuffled=False, instr_sents=2, enable_instr=False, instr_only=False):
 49 |         super().__init__()
 50 | 
 51 |         # Decide which components are enabled
 52 |         self.use_desc = use_desc
 53 |         self.use_memory = use_memory
 54 |         self.random_shuffled = random_shuffled
 55 |         self.enable_instr = enable_instr
 56 |         self.instr_only = instr_only
 57 |         self.arch = arch
 58 |         self.lang_model = lang_model
 59 |         self.aux_info = aux_info
 60 |         self.image_dim = image_dim
 61 |         self.memory_dim = memory_dim
 62 |         self.instr_dim = instr_dim
 63 |         self.instr_sents = instr_sents
 64 | 
 65 |         self.obs_space = obs_space
 66 | 
 67 |         if arch == "cnn":
 68 |             self.image_conv = nn.Sequential(
 69 |                 nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(2, 2)),
 70 |                 nn.BatchNorm2d(128),
 71 |                 nn.ReLU(),
 72 |                 nn.MaxPool2d(kernel_size=(2, 2), stride=2),
 73 |                 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1),
 74 |                 nn.BatchNorm2d(128),
 75 |                 nn.ReLU(),
 76 |                 nn.MaxPool2d(kernel_size=(2, 2), stride=2)
 77 |             )
 78 |         elif arch.startswith("expert_filmcnn"):
 79 |             if not self.use_desc:
 80 |                 raise ValueError("FiLM architecture can be used when instructions are enabled")
 81 | 
 82 |             self.image_conv = nn.Sequential(
 83 |                 nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(2, 2), padding=1),
 84 |                 nn.BatchNorm2d(128),
 85 |                 nn.ReLU(),
 86 |                 nn.MaxPool2d(kernel_size=(2, 2), stride=2),
 87 |                 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1),
 88 |                 nn.BatchNorm2d(128),
 89 |                 nn.ReLU(),
 90 |                 nn.MaxPool2d(kernel_size=(2, 2), stride=2)
 91 |             )
 92 |             self.film_pool = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
 93 |         elif arch == "fusion":
 94 |             if not self.use_desc:
 95 |                 raise ValueError("fusion architecture can be used when instructions are enabled")
 96 | 
 97 |             self.image_conv = nn.Sequential(
 98 |                 nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(3, 3), padding=1),
 99 |                 nn.BatchNorm2d(128),
100 |                 nn.ReLU(),
101 |                 # nn.MaxPool2d(kernel_size=(2, 2), stride=2),
102 |                 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1),
103 |                 nn.BatchNorm2d(128),
104 |                 nn.ReLU(),
105 |                 # nn.MaxPool2d(kernel_size=(2, 2), stride=2)
106 |             )
107 |             self.w_conv = nn.Sequential(
108 |                 nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(3, 3), padding=1),
109 |                 nn.BatchNorm2d(128),
110 |                 nn.ReLU(),
111 |                 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1),
112 |                 nn.BatchNorm2d(128),
113 |                 nn.ReLU(),
114 |                 nn.Conv2d(in_channels=128, out_channels=self.instr_sents+1, kernel_size=(3, 3), padding=1)
115 |             )
116 |             self.combined_conv = nn.Sequential(
117 |                 nn.Conv2d(in_channels=256, out_channels=128, kernel_size=(2, 2)),
118 |                 nn.ReLU(),
119 |                 nn.MaxPool2d(kernel_size=(2, 2), stride=2),
120 |                 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(2, 2)),
121 |                 nn.ReLU(),
122 |                 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(2, 2)),
123 |                 nn.ReLU()
124 |             ) 
125 | 
126 |             '''
127 |             self.image_conv = nn.Sequential(
128 |                 nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(3, 3), padding=1),
129 |                 nn.BatchNorm2d(128),
130 |                 nn.ReLU(),
131 |                 # nn.MaxPool2d(kernel_size=(2, 2), stride=2),
132 |                 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1),
133 |                 nn.BatchNorm2d(128),
134 |                 nn.ReLU(),
135 |                 # nn.MaxPool2d(kernel_size=(2, 2), stride=2)
136 |             )
137 |             self.w_conv = nn.Conv2d(in_channels=128, out_channels=self.instr_sents, kernel_size=(3, 3), padding=1).cuda()
138 |             self.combined_conv = nn.Sequential(
139 |                 nn.Conv2d(in_channels=256, out_channels=128, kernel_size=(2, 2)),
140 |                 nn.ReLU(),
141 |                 nn.MaxPool2d(kernel_size=(2, 2), stride=2),
142 |                 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(2, 2)),
143 |                 nn.ReLU(),
144 |                 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(2, 2)),
145 |                 nn.ReLU()
146 |             ) 
147 |             '''
148 |         else:
149 |             raise ValueError("Incorrect architecture name: {}".format(arch))
150 | 
151 |         # Define instruction embedding
152 |         if self.use_desc:
153 |             if self.lang_model in ['gru', 'bigru', 'attgru']:
154 |                 self.word_embedding = nn.Embedding(obs_space["instr"], self.instr_dim)
155 |                 if self.lang_model in ['gru', 'bigru', 'attgru']:
156 |                     gru_dim = self.instr_dim
157 |                     if self.lang_model in ['bigru', 'attgru']:
158 |                         gru_dim //= 2
159 |                     self.instr_rnn = nn.GRU(
160 |                         self.instr_dim, gru_dim, batch_first=True,
161 |                         bidirectional=(self.lang_model in ['bigru', 'attgru']))
162 |                     self.final_instr_dim = self.instr_dim
163 |                 else:
164 |                     kernel_dim = 64
165 |                     kernel_sizes = [3, 4]
166 |                     self.instr_convs = nn.ModuleList([
167 |                         nn.Conv2d(1, kernel_dim, (K, self.instr_dim)) for K in kernel_sizes])
168 |                     self.final_instr_dim = kernel_dim * len(kernel_sizes)
169 | 
170 |             if self.lang_model == 'attgru':
171 |                 self.memory2key = nn.Linear(self.memory_size, self.final_instr_dim)
172 | 
173 |         # Define memory
174 |         if self.use_memory:
175 |             self.memory_rnn = nn.LSTMCell(self.image_dim, self.memory_dim)
176 | 
177 |         # Resize image embedding
178 |         self.embedding_size = self.semi_memory_size
179 |         if self.use_desc and not "filmcnn" in arch and not "fusion" in arch:
180 |             self.embedding_size += self.final_instr_dim
181 | 
182 |         if arch.startswith("expert_filmcnn") or (self.arch == "fusion" and self.enable_instr):
183 |             # if arch == "expert_filmcnn":
184 |             #     num_module = 2
185 |             # else:
186 |             #     num_module = int(arch[(arch.rfind('_') + 1):])
187 |             num_module = 2
188 |             self.controllers = []
189 |             for ni in range(num_module):
190 |                 if ni < num_module-1:
191 |                     mod = ExpertControllerFiLM(
192 |                         in_features=self.final_instr_dim,
193 |                         out_features=128, in_channels=128, imm_channels=128)
194 |                 else:
195 |                     mod = ExpertControllerFiLM(
196 |                         in_features=self.final_instr_dim, out_features=self.image_dim,
197 |                         in_channels=128, imm_channels=128)
198 |                 self.controllers.append(mod)
199 |                 self.add_module('FiLM_Controler_' + str(ni), mod)
200 | 
201 |         # Define actor's model
202 |         self.actor = nn.Sequential(
203 |             nn.Linear(self.embedding_size, 64),
204 |             nn.Tanh(),
205 |             nn.Linear(64, action_space.n)
206 |         )
207 | 
208 |         # Define critic's model
209 |         self.critic = nn.Sequential(
210 |             nn.Linear(self.embedding_size, 64),
211 |             nn.Tanh(),
212 |             nn.Linear(64, 1)
213 |         )
214 | 
215 |         # Initialize parameters correctly
216 |         self.apply(initialize_parameters)
217 | 
218 |         # Define head for extra info
219 |         if self.aux_info:
220 |             self.extra_heads = None
221 |             self.add_heads()
222 | 
223 |     def add_heads(self):
224 |         '''
225 |         When using auxiliary tasks, the environment yields at each step some binary, continous, or multiclass
226 |         information. The agent needs to predict those information. This function add extra heads to the model
227 |         that output the predictions. There is a head per extra information (the head type depends on the extra
228 |         information type).
229 |         '''
230 |         self.extra_heads = nn.ModuleDict()
231 |         for info in self.aux_info:
232 |             if required_heads[info] == 'binary':
233 |                 self.extra_heads[info] = nn.Linear(self.embedding_size, 1)
234 |             elif required_heads[info].startswith('multiclass'):
235 |                 n_classes = int(required_heads[info].split('multiclass')[-1])
236 |                 self.extra_heads[info] = nn.Linear(self.embedding_size, n_classes)
237 |             elif required_heads[info].startswith('continuous'):
238 |                 if required_heads[info].endswith('01'):
239 |                     self.extra_heads[info] = nn.Sequential(nn.Linear(self.embedding_size, 1), nn.Sigmoid())
240 |                 else:
241 |                     raise ValueError('Only continous01 is implemented')
242 |             else:
243 |                 raise ValueError('Type not supported')
244 |             # initializing these parameters independently is done in order to have consistency of results when using
245 |             # supervised-loss-coef = 0 and when not using any extra binary information
246 |             self.extra_heads[info].apply(initialize_parameters)
247 | 
248 |     def add_extra_heads_if_necessary(self, aux_info):
249 |         '''
250 |         This function allows using a pre-trained model without aux_info and add aux_info to it and still make
251 |         it possible to finetune.
252 |         '''
253 |         try:
254 |             if not hasattr(self, 'aux_info') or not set(self.aux_info) == set(aux_info):
255 |                 self.aux_info = aux_info
256 |                 self.add_heads()
257 |         except Exception:
258 |             raise ValueError('Could not add extra heads')
259 | 
260 |     @property
261 |     def memory_size(self):
262 |         return 2 * self.semi_memory_size
263 | 
264 |     @property
265 |     def semi_memory_size(self):
266 |         return self.memory_dim
267 | 
268 |     def forward(self, obs, memory, instr_embedding=None):
269 |         if self.use_desc and instr_embedding is None:
270 |             if self.enable_instr and self.arch == "fusion":
271 |                 instr_embedding, instr_embedding2 = self._get_instr_embedding(obs.instr)
272 |             else:
273 |                 instr_embedding = self._get_instr_embedding(obs.instr)
274 |         
275 |         if self.use_desc and self.lang_model == "attgru":
276 |             # outputs: B x L x D
277 |             # memory: B x M
278 |             mask = (obs.instr != 0).float()
279 |             instr_embedding = instr_embedding[:, :mask.shape[1]]
280 |             keys = self.memory2key(memory)
281 |             pre_softmax = (keys[:, None, :] * instr_embedding).sum(2) + 1000 * mask
282 |             attention = F.softmax(pre_softmax, dim=1)
283 |             instr_embedding = (instr_embedding * attention[:, :, None]).sum(1)
284 | 
285 |         x = torch.transpose(torch.transpose(obs.image, 1, 3), 2, 3)
286 | 
287 |         if self.arch.startswith("expert_filmcnn"):
288 |             x = self.image_conv(x)
289 |             for controler in self.controllers:
290 |                 x = controler(x, instr_embedding)
291 |             x = F.relu(self.film_pool(x))
292 |         elif self.arch == "fusion":
293 |             # old fusion model
294 |             '''
295 |             x = self.image_conv(x)
296 |             w = self.w_conv(x)
297 |             N,_,W,H = w.shape
298 |             w = w.view([N, self.instr_sents, -1])
299 |             w = F.softmax(w,dim=1)
300 |             y = torch.matmul(instr_embedding, w).view([N, 128, W, H])
301 |             '''
302 |             # new fusion model: separate cnns for image extractor and attention module input
303 |             x_feat = self.image_conv(x)
304 |             w = self.w_conv(x)
305 |             N,_,W,H = w.shape
306 |             w = w.view([N, self.instr_sents + 1, -1])
307 |             w = F.softmax(w, dim=1)
308 |             y = torch.matmul(instr_embedding, w[:,:-1]).view([N, 128, W, H])
309 | 
310 |             x = torch.cat([x_feat, y], axis=1)
311 |             x = self.combined_conv(x)
312 |             x = x.view(x.shape[0], x.shape[1], 1, 1)
313 |             if self.enable_instr:
314 |                 for controler in self.controllers:
315 |                     x = controler(x, instr_embedding2)
316 |                 x = F.relu(x)
317 |         else:
318 |             x = self.image_conv(x)
319 |         
320 | 
321 |         x = x.reshape(x.shape[0], -1)
322 | 
323 |         if self.use_memory:
324 |             hidden = (memory[:, :self.semi_memory_size], memory[:, self.semi_memory_size:])
325 |             hidden = self.memory_rnn(x, hidden)
326 |             embedding = hidden[0]
327 |             memory = torch.cat(hidden, dim=1)
328 |         else:
329 |             embedding = x
330 | 
331 |         if self.use_desc and not "filmcnn" in self.arch and not "fusion" in self.arch:
332 |             embedding = torch.cat((embedding, instr_embedding), dim=1)
333 | 
334 |         if hasattr(self, 'aux_info') and self.aux_info:
335 |             extra_predictions = {info: self.extra_heads[info](embedding) for info in self.extra_heads}
336 |         else:
337 |             extra_predictions = dict()
338 | 
339 |         x = self.actor(embedding)
340 |         dist = Categorical(logits=F.log_softmax(x, dim=1))
341 | 
342 |         x = self.critic(embedding)
343 |         value = x.squeeze(1)
344 | 
345 |         return {'dist': dist, 'value': value, 'memory': memory, 'extra_predictions': extra_predictions}
346 | 
347 |     def _get_instr_embedding(self, instr):
348 |         # filtering the "go to the red ball"
349 |         if not self.enable_instr:
350 |             instr = instr[:,instr.size()[1] - 4 * self.instr_sents:]
351 |         if self.instr_only:
352 |             assert (self.enable_instr)
353 |             instr = instr[:,:instr.size()[1] - 4 * self.instr_sents]
354 | 
355 |         if self.random_shuffled:
356 |             instr = instr[:,torch.randperm(instr.size()[1])]
357 |         lengths = (instr != 0).sum(1).long()
358 |         if self.lang_model == 'gru':
359 |             if self.arch == "fusion":
360 |                 assert (not self.instr_only)
361 |                 if self.enable_instr:
362 |                     desc = instr[:,instr.size()[1] - 4 * self.instr_sents:]
363 |                     ins = instr[:,:instr.size()[1] - 4 * self.instr_sents]
364 |                     lengths = (desc != 0).sum(1).long()
365 |                     lengths /= self.instr_sents
366 |                     hiddens = []
367 |                     for i in range(self.instr_sents):
368 |                         out, _ = self.instr_rnn(self.word_embedding(desc[:,4*i:4*(i+1)]))
369 |                         hidden = out[range(len(lengths)), lengths-1, :]
370 |                         hiddens.append(hidden)                
371 |                     
372 |                     hidden_desc = torch.stack(hiddens, axis=-1)
373 |                     lengths = (ins != 0).sum(1).long()
374 |                     out, _ = self.instr_rnn(self.word_embedding(ins))
375 |                     hidden_instr = out[range(len(lengths)), lengths-1, :]
376 |                     return hidden_desc, hidden_instr
377 |                 else:
378 |                     lengths /= self.instr_sents
379 |                     hiddens = []
380 |                     for i in range(self.instr_sents):
381 |                         out, _ = self.instr_rnn(self.word_embedding(instr[:,4*i:4*(i+1)]))
382 |                         hidden = out[range(len(lengths)), lengths-1, :]
383 |                         hiddens.append(hidden)                
384 |                     
385 |                     hidden = torch.stack(hiddens, axis=-1)
386 |             else:
387 |                 out, _ = self.instr_rnn(self.word_embedding(instr))
388 |                 hidden = out[range(len(lengths)), lengths-1, :]
389 |             return hidden
390 | 
391 |         elif self.lang_model in ['bigru', 'attgru']:
392 |             if self.arch == "fusion":
393 |                 raise NotImplementedError("For early fusion model, only gru model is supported!")
394 | 
395 |             masks = (instr != 0).float()
396 | 
397 |             if lengths.shape[0] > 1:
398 |                 seq_lengths, perm_idx = lengths.sort(0, descending=True)
399 |                 iperm_idx = torch.LongTensor(perm_idx.shape).fill_(0)
400 |                 if instr.is_cuda: iperm_idx = iperm_idx.cuda()
401 |                 for i, v in enumerate(perm_idx):
402 |                     iperm_idx[v.data] = i
403 | 
404 |                 inputs = self.word_embedding(instr)
405 |                 inputs = inputs[perm_idx]
406 | 
407 |                 inputs = pack_padded_sequence(inputs, seq_lengths.data.cpu().numpy(), batch_first=True)
408 | 
409 |                 outputs, final_states = self.instr_rnn(inputs)
410 |             else:
411 |                 instr = instr[:, 0:lengths[0]]
412 |                 outputs, final_states = self.instr_rnn(self.word_embedding(instr))
413 |                 iperm_idx = None
414 |             final_states = final_states.transpose(0, 1).contiguous()
415 |             final_states = final_states.view(final_states.shape[0], -1)
416 |             if iperm_idx is not None:
417 |                 outputs, _ = pad_packed_sequence(outputs, batch_first=True)
418 |                 outputs = outputs[iperm_idx]
419 |                 final_states = final_states[iperm_idx]
420 | 
421 |             if outputs.shape[1] < masks.shape[1]:
422 |                 masks = masks[:, :(outputs.shape[1]-masks.shape[1])]
423 |                 # the packing truncated the original length
424 |                 # so we need to change mask to fit it
425 | 
426 |             return outputs if self.lang_model == 'attgru' else final_states
427 | 
428 |         else:
429 |             ValueError("Undefined instruction architecture: {}".format(self.use_desc))
430 | 


--------------------------------------------------------------------------------
/experiment/imitation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.append(os.getcwd())
  4 | 
  5 | import copy
  6 | import torch
  7 | import gym
  8 | import time
  9 | import datetime
 10 | import numpy as np
 11 | import itertools
 12 | import multiprocessing
 13 | import json
 14 | import logging
 15 | 
 16 | import babyaiPP
 17 | from babyai.rl import DictList
 18 | from babyai.evaluate import batch_evaluate
 19 | import babyai.utils as utils
 20 | 
 21 | from experiment.model import ACModel
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | 
 26 | class ImitationLearning(object):
 27 |     def __init__(self, args, ):
 28 |         self.args = args
 29 | 
 30 |         utils.seed(self.args.seed)
 31 | 
 32 |         # args.env is a list when training on multiple environments
 33 |         if getattr(args, 'multi_env', None):
 34 |             self.env = [gym.make(item) for item in args.multi_env]
 35 | 
 36 |             self.train_demos = []
 37 |             for demos, episodes in zip(args.multi_demos, args.multi_episodes):
 38 |                 demos_path = utils.get_demos_path(demos, None, None, valid=False)
 39 |                 logger.info('loading {} of {} demos'.format(episodes, demos))
 40 |                 train_demos = utils.load_demos(demos_path)
 41 |                 logger.info('loaded demos')
 42 |                 if episodes > len(train_demos):
 43 |                     raise ValueError("there are only {} train demos in {}".format(len(train_demos), demos))
 44 |                 self.train_demos.extend(train_demos[:episodes])
 45 |                 logger.info('So far, {} demos loaded'.format(len(self.train_demos)))
 46 | 
 47 |             self.val_demos = []
 48 |             for demos, episodes in zip(args.multi_demos, [args.val_episodes] * len(args.multi_demos)):
 49 |                 demos_path_valid = utils.get_demos_path(demos, None, None, valid=True)
 50 |                 logger.info('loading {} of {} valid demos'.format(episodes, demos))
 51 |                 valid_demos = utils.load_demos(demos_path_valid)
 52 |                 logger.info('loaded demos')
 53 |                 if episodes > len(valid_demos):
 54 |                     logger.info('Using all the available {} demos to evaluate valid. accuracy'.format(len(valid_demos)))
 55 |                 self.val_demos.extend(valid_demos[:episodes])
 56 |                 logger.info('So far, {} valid demos loaded'.format(len(self.val_demos)))
 57 | 
 58 |             logger.info('Loaded all demos')
 59 | 
 60 |             observation_space = self.env[0].observation_space
 61 |             action_space = self.env[0].action_space
 62 | 
 63 |         else:
 64 |             self.env = gym.make(self.args.env)
 65 | 
 66 |             demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False)
 67 |             demos_path_valid = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=True)
 68 | 
 69 |             logger.info('loading demos')
 70 |             self.train_demos = utils.load_demos(demos_path)
 71 |             logger.info('loaded demos')
 72 |             if args.episodes:
 73 |                 if args.episodes > len(self.train_demos):
 74 |                     raise ValueError("there are only {} train demos".format(len(self.train_demos)))
 75 |                 self.train_demos = self.train_demos[:args.episodes]
 76 | 
 77 |             self.val_demos = utils.load_demos(demos_path_valid)
 78 |             if args.val_episodes > len(self.val_demos):
 79 |                 logger.info('Using all the available {} demos to evaluate valid. accuracy'.format(len(self.val_demos)))
 80 |             self.val_demos = self.val_demos[:self.args.val_episodes]
 81 | 
 82 |             observation_space = self.env.observation_space
 83 |             action_space = self.env.action_space
 84 |  
 85 |         self.obss_preprocessor = utils.ObssPreprocessor(args.model, observation_space,
 86 |                                                         getattr(self.args, 'pretrained_model', None))
 87 | 
 88 |         # Define actor-critic model
 89 |         self.acmodel = utils.load_model(args.model, raise_not_found=False)
 90 |         if self.acmodel is None:
 91 |             if getattr(self.args, 'pretrained_model', None):
 92 |                 logger.info("Loading pretrained model")
 93 |                 self.acmodel = utils.load_model(args.pretrained_model, raise_not_found=True)
 94 |             else:
 95 |                 logger.info('Creating new model')
 96 |                 self.acmodel = ACModel(self.obss_preprocessor.obs_space, action_space,
 97 |                                        args.image_dim, args.memory_dim, args.instr_dim,
 98 |                                        not self.args.no_desc, self.args.instr_arch,
 99 |                                        not self.args.no_mem, self.args.arch,
100 |                                        random_shuffled=self.args.random_shuffle, instr_sents=self.env.n_floor_colors,
101 |                                        enable_instr=self.args.enable_instr, instr_only=self.args.instr_only)
102 |         self.obss_preprocessor.vocab.save()
103 |         utils.save_model(self.acmodel, args.model)
104 | 
105 |         self.acmodel.train()
106 |         if torch.cuda.is_available():
107 |             self.acmodel.cuda()
108 | 
109 |         self.optimizer = torch.optim.Adam(self.acmodel.parameters(), self.args.lr, eps=self.args.optim_eps)
110 |         self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9)
111 | 
112 |         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
113 | 
114 |     @staticmethod
115 |     def default_model_name(args):
116 |         if getattr(args, 'multi_env', None):
117 |             # It's better to specify one's own model name for this scenario
118 |             named_envs = '-'.join(args.multi_env)
119 |         else:
120 |             named_envs = args.env
121 | 
122 |         # Define model name
123 |         suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
124 |         # instr = args.instr_arch if args.instr_arch else "noinstr"
125 |         instr = args.instr_arch if not args.no_desc else "nodesc"
126 |         model_name_parts = {
127 |             'envs': named_envs,
128 |             'arch': args.arch,
129 |             'instr': instr,
130 |             'seed': args.seed,
131 |             'suffix': suffix}
132 |         default_model_name = "{envs}_IL_{arch}_{instr}_seed{seed}_{suffix}".format(**model_name_parts)
133 |         if getattr(args, 'pretrained_model', None):
134 |             default_model_name = args.pretrained_model + '_pretrained_' + default_model_name
135 |         return default_model_name
136 | 
137 |     def starting_indexes(self, num_frames):
138 |         if num_frames % self.args.recurrence == 0:
139 |             return np.arange(0, num_frames, self.args.recurrence)
140 |         else:
141 |             return np.arange(0, num_frames, self.args.recurrence)[:-1]
142 | 
143 |     def run_epoch_recurrence(self, demos, is_training=False):
144 |         if self.args.epoch_length == 0:
145 |             indices = list(range(len(demos)))
146 |         else:
147 |             indices = np.random.choice(len(demos), self.args.epoch_length)
148 |         if is_training:
149 |             np.random.shuffle(indices)
150 | 
151 |         batch_size = min(self.args.batch_size, len(demos))
152 |         offset = 0
153 | 
154 |         if not is_training:
155 |             self.acmodel.eval()
156 | 
157 |         # Log dictionary
158 |         log = {"entropy": [], "policy_loss": [], "accuracy": []}
159 | 
160 |         start_time = time.time()
161 |         frames = 0
162 |         for batch_index in range(len(indices) // batch_size):
163 |             logger.info("batch {}, FPS so far {}".format(
164 |                 batch_index, frames / (time.time() - start_time) if frames else 0))
165 |             batch = [demos[i] for i in indices[offset: offset + batch_size]]
166 |             frames += sum([len(demo[3]) for demo in batch])
167 | 
168 |             _log = self.run_epoch_recurrence_one_batch(batch, is_training=is_training)
169 | 
170 |             log["entropy"].append(_log["entropy"])
171 |             log["policy_loss"].append(_log["policy_loss"])
172 |             log["accuracy"].append(_log["accuracy"])
173 |             log["frames"] = frames
174 | 
175 |             offset += batch_size
176 | 
177 |         if not is_training:
178 |             self.acmodel.train()
179 | 
180 |         return log
181 | 
182 |     def run_epoch_recurrence_one_batch(self, batch, is_training=False):
183 |         batch = utils.demos.transform_demos(batch)
184 |         batch.sort(key=len, reverse=True)
185 |         # Constructing flat batch and indices pointing to start of each demonstration
186 |         flat_batch = []
187 |         inds = [0]
188 | 
189 |         for demo in batch:
190 |             flat_batch += demo
191 |             inds.append(inds[-1] + len(demo))
192 | 
193 |         flat_batch = np.array(flat_batch)
194 |         inds = inds[:-1]
195 |         num_frames = len(flat_batch)
196 | 
197 |         mask = np.ones([len(flat_batch)], dtype=np.float64)
198 |         mask[inds] = 0
199 |         mask = torch.tensor(mask, device=self.device, dtype=torch.float).unsqueeze(1)
200 | 
201 |         # Observations, true action, values and done for each of the stored demostration
202 |         obss, action_true, done = flat_batch[:, 0], flat_batch[:, 1], flat_batch[:, 2]
203 |         action_true = torch.tensor([action for action in action_true], device=self.device, dtype=torch.long)
204 | 
205 |         # Memory to be stored
206 |         memories = torch.zeros([len(flat_batch), self.acmodel.memory_size], device=self.device)
207 |         episode_ids = np.zeros(len(flat_batch))
208 |         memory = torch.zeros([len(batch), self.acmodel.memory_size], device=self.device)
209 | 
210 |         preprocessed_first_obs = self.obss_preprocessor(obss[inds], device=self.device)
211 |         if not self.args.no_desc:
212 |             instr_embedding = self.acmodel._get_instr_embedding(preprocessed_first_obs.instr)
213 | 
214 |         # Loop terminates when every observation in the flat_batch has been handled
215 |         while True:
216 |             # taking observations and done located at inds
217 |             obs = obss[inds]
218 |             done_step = done[inds]
219 |             preprocessed_obs = self.obss_preprocessor(obs, device=self.device)
220 |             with torch.no_grad():
221 |                 # taking the memory till len(inds), as demos beyond that have already finished
222 |                 if not self.args.no_desc:
223 |                     new_memory = self.acmodel(
224 |                         preprocessed_obs,
225 |                         memory[:len(inds), :], instr_embedding[:len(inds)])['memory']
226 |                 else:
227 |                     new_memory = self.acmodel(
228 |                         preprocessed_obs,
229 |                         memory[:len(inds), :])['memory']
230 | 
231 |             memories[inds, :] = memory[:len(inds), :]
232 |             memory[:len(inds), :] = new_memory
233 |             episode_ids[inds] = range(len(inds))
234 | 
235 |             # Updating inds, by removing those indices corresponding to which the demonstrations have finished
236 |             inds = inds[:len(inds) - sum(done_step)]
237 |             if len(inds) == 0:
238 |                 break
239 | 
240 |             # Incrementing the remaining indices
241 |             inds = [index + 1 for index in inds]
242 | 
243 |         # Here, actual backprop upto args.recurrence happens
244 |         final_loss = 0
245 |         final_entropy, final_policy_loss, final_value_loss = 0, 0, 0
246 | 
247 |         indexes = self.starting_indexes(num_frames)
248 |         memory = memories[indexes]
249 |         accuracy = 0
250 |         total_frames = len(indexes) * self.args.recurrence
251 |         for _ in range(self.args.recurrence):
252 |             obs = obss[indexes]
253 |             preprocessed_obs = self.obss_preprocessor(obs, device=self.device)
254 |             action_step = action_true[indexes]
255 |             mask_step = mask[indexes]
256 |             if not self.args.no_desc:
257 |                 model_results = self.acmodel(
258 |                     preprocessed_obs, memory * mask_step,
259 |                     instr_embedding[episode_ids[indexes]])
260 |             else:
261 |                 model_results = self.acmodel(
262 |                     preprocessed_obs, memory * mask_step)
263 |             dist = model_results['dist']
264 |             memory = model_results['memory']
265 | 
266 |             entropy = dist.entropy().mean()
267 |             policy_loss = -dist.log_prob(action_step).mean()
268 |             loss = policy_loss - self.args.entropy_coef * entropy
269 |             action_pred = dist.probs.max(1, keepdim=True)[1]
270 |             accuracy += float((action_pred == action_step.unsqueeze(1)).sum()) / total_frames
271 |             final_loss += loss
272 |             final_entropy += entropy
273 |             final_policy_loss += policy_loss
274 |             indexes += 1
275 | 
276 |         final_loss /= self.args.recurrence
277 | 
278 |         if is_training:
279 |             self.optimizer.zero_grad()
280 |             final_loss.backward()
281 |             self.optimizer.step()
282 | 
283 |         log = {}
284 |         log["entropy"] = float(final_entropy / self.args.recurrence)
285 |         log["policy_loss"] = float(final_policy_loss / self.args.recurrence)
286 |         log["accuracy"] = float(accuracy)
287 | 
288 |         return log
289 | 
290 |     def validate(self, episodes, verbose=True):
291 |         # Seed needs to be reset for each validation, to ensure consistency
292 |         utils.seed(self.args.val_seed)
293 | 
294 |         if verbose:
295 |             logger.info("Validating the model")
296 |         if getattr(self.args, 'multi_env', None):
297 |             agent = utils.load_agent(self.env[0], model_name=self.args.model, argmax=True)
298 |         else:
299 |             agent = utils.load_agent(self.env, model_name=self.args.model, argmax=True)
300 | 
301 |         # Setting the agent model to the current model
302 |         agent.model = self.acmodel
303 | 
304 |         agent.model.eval()
305 |         logs = []
306 | 
307 |         for env_name in ([self.args.env] if not getattr(self.args, 'multi_env', None)
308 |                          else self.args.multi_env):
309 |             logs += [batch_evaluate(agent, env_name, self.args.val_seed, episodes)]
310 |         agent.model.train()
311 | 
312 |         return logs
313 | 
314 |     def collect_returns(self):
315 |         logs = self.validate(episodes=self.args.eval_episodes, verbose=False)
316 |         mean_return = {tid: np.mean(log["return_per_episode"]) for tid, log in enumerate(logs)}
317 |         return mean_return
318 | 
319 |     def train(self, train_demos, writer, csv_writer, status_path, header, reset_status=False):
320 |         # Load the status
321 |         def initial_status():
322 |             return {'i': 0,
323 |                     'num_frames': 0,
324 |                     'patience': 0}
325 | 
326 |         status = initial_status()
327 |         if os.path.exists(status_path) and not reset_status:
328 |             with open(status_path, 'r') as src:
329 |                 status = json.load(src)
330 |         elif not os.path.exists(os.path.dirname(status_path)):
331 |             # Ensure that the status directory exists
332 |             os.makedirs(os.path.dirname(status_path))
333 | 
334 |         # If the batch size is larger than the number of demos, we need to lower the batch size
335 |         if self.args.batch_size > len(train_demos):
336 |             self.args.batch_size = len(train_demos)
337 |             logger.info("Batch size too high. Setting it to the number of train demos ({})".format(len(train_demos)))
338 | 
339 |         # Model saved initially to avoid "Model not found Exception" during first validation step
340 |         utils.save_model(self.acmodel, self.args.model)
341 | 
342 |         # best mean return to keep track of performance on validation set
343 |         best_success_rate, patience, i = 0, 0, 0
344 |         total_start_time = time.time()
345 | 
346 |         while status['i'] < getattr(self.args, 'epochs', int(1e9)):
347 |             if 'patience' not in status:  # if for some reason you're finetuining with IL an RL pretrained agent
348 |                 status['patience'] = 0
349 |             # Do not learn if using a pre-trained model that already lost patience
350 |             if status['patience'] > self.args.patience:
351 |                 break
352 |             if status['num_frames'] > self.args.frames:
353 |                 break
354 | 
355 |             status['i'] += 1
356 |             i = status['i']
357 |             update_start_time = time.time()
358 | 
359 |             log = self.run_epoch_recurrence(train_demos, is_training=True)
360 |             status['num_frames'] += log['frames']
361 | 
362 |             # Learning rate scheduler
363 |             self.scheduler.step()
364 | 
365 |             update_end_time = time.time()
366 | 
367 |             # Print logs
368 |             if status['i'] % self.args.log_interval == 0:
369 |                 total_ellapsed_time = int(time.time() - total_start_time)
370 | 
371 |                 fps = log['frames'] / (update_end_time - update_start_time)
372 |                 duration = datetime.timedelta(seconds=total_ellapsed_time)
373 | 
374 |                 for key in log:
375 |                     log[key] = np.mean(log[key])
376 | 
377 |                 train_data = [status['i'], status['num_frames'], fps, total_ellapsed_time,
378 |                               log["entropy"], log["policy_loss"], log["accuracy"]]
379 | 
380 |                 logger.info(
381 |                     "U {} | F {:06} | FPS {:04.0f} | D {} | H {:.3f} | pL {: .3f} | A {: .3f}".format(*train_data))
382 | 
383 |                 # Log the gathered data only when we don't evaluate the validation metrics. It will be logged anyways
384 |                 # afterwards when status['i'] % self.args.val_interval == 0
385 |                 if status['i'] % self.args.val_interval != 0:
386 |                     # instantiate a validation_log with empty strings when no validation is done
387 |                     validation_data = [''] * len([key for key in header if 'valid' in key])
388 |                     assert len(header) == len(train_data + validation_data)
389 |                     if self.args.tb:
390 |                         for key, value in zip(header, train_data):
391 |                             writer.add_scalar(key, float(value), status['num_frames'])
392 |                     csv_writer.writerow(train_data + validation_data)
393 | 
394 |             if status['i'] % self.args.val_interval == 0:
395 | 
396 |                 valid_log = self.validate(self.args.val_episodes)
397 |                 mean_return = [np.mean(log['return_per_episode']) for log in valid_log]
398 |                 success_rate = [np.mean([1 if r > 0 else 0 for r in log['return_per_episode']]) for log in
399 |                                 valid_log]
400 | 
401 |                 val_log = self.run_epoch_recurrence(self.val_demos)
402 |                 validation_accuracy = np.mean(val_log["accuracy"])
403 | 
404 |                 if status['i'] % self.args.log_interval == 0:
405 |                     validation_data = [validation_accuracy] + mean_return + success_rate
406 |                     logger.info(("Validation: A {: .3f} " + ("| R {: .3f} " * len(mean_return) +
407 |                                                              "| S {: .3f} " * len(success_rate))
408 |                                  ).format(*validation_data))
409 | 
410 |                     assert len(header) == len(train_data + validation_data)
411 |                     if self.args.tb:
412 |                         for key, value in zip(header, train_data + validation_data):
413 |                             writer.add_scalar(key, float(value), status['num_frames'])
414 |                     csv_writer.writerow(train_data + validation_data)
415 | 
416 |                 # In case of a multi-env, the update condition would be "better mean success rate" !
417 |                 if np.mean(success_rate) > best_success_rate:
418 |                     best_success_rate = np.mean(success_rate)
419 |                     status['patience'] = 0
420 |                     with open(status_path, 'w') as dst:
421 |                         json.dump(status, dst)
422 |                     # Saving the model
423 |                     logger.info("Saving best model")
424 | 
425 |                     if torch.cuda.is_available():
426 |                         self.acmodel.cpu()
427 |                     utils.save_model(self.acmodel, self.args.model + "_best")
428 |                     self.obss_preprocessor.vocab.save(utils.get_vocab_path(self.args.model + "_best"))
429 |                     if torch.cuda.is_available():
430 |                         self.acmodel.cuda()
431 |                 else:
432 |                     status['patience'] += 1
433 |                     logger.info(
434 |                         "Losing patience, new value={}, limit={}".format(status['patience'], self.args.patience))
435 | 
436 | 
437 |             if status['i'] % self.args.save_interval == 0:
438 |                 logger.info("Saving current model")
439 |                 if torch.cuda.is_available():
440 |                     self.acmodel.cpu()
441 |                 utils.save_model(self.acmodel, self.args.model)
442 |                 self.obss_preprocessor.vocab.save()
443 |                 if torch.cuda.is_available():
444 |                     self.acmodel.cuda()
445 |                 with open(status_path, 'w') as dst:
446 |                     json.dump(status, dst)
447 | 


--------------------------------------------------------------------------------
/babyaiPP/dynamics_levels.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from babyai.levels.verifier import *
  3 | from babyai.levels.levelgen import *
  4 | from babyai.levels.iclr19_levels import *
  5 | from gym_minigrid.minigrid import COLOR_NAMES, Floor, DIR_TO_VEC
  6 | from lorem.text import TextLorem
  7 | 
  8 | PROPERTY_TO_IDX = {
  9 |     'trap': 0,  # Agent dies, episode end
 10 |     'sticky': 1,  # Agent must stay on block for at least 3 time steps.
 11 |     'flipud': 2,  # causes agent to turn 180 and move one block, requires agent to spin and then getting backed out.
 12 |     'fliplr': 3,  # Flips rotational actions.
 13 |     'slippery': 4,  # time warp, increase reward at end
 14 |     # Agent will fall down 1 block every 2 timesteps on this color.
 15 |     'magic': 5,
 16 | }
 17 | 
 18 | PROPERTY_ORDER = [['trap'], ['slippery', 'magic'], ['none', 'sticky', 'flipud', 'fliplr']]
 19 | Spawn_rates = [0.05, 0.15, 0.30]
 20 | N_tries = 20
 21 | """
 22 | property game breaking levels:
 23 | 1. unconstrained: can be placed anywhere without breaking game
 24 | 2. path blocking: creates uncrossable areas, must be placed not infront of doors, must not be contiguous with other path blockers by more than 2
 25 | 3. insta-death: path-blocking + must not overlap with object. 
 26 | property rarity:
 27 | 1. unconstrained are common: 30% spawn chance
 28 | 2. path blocking are rare: 15% spawn
 29 | 3. insta-death super rare: 5% spawn
 30 | """
 31 | 
 32 | IDX_TO_PROPERTY = dict(zip(PROPERTY_TO_IDX.values(), PROPERTY_TO_IDX.keys()))
 33 | 
 34 | 
 35 | class DynamicsLevel(RoomGridLevel):
 36 |     # TODO(lts): Adapted floors to be containers. Need to make sure certain objectives will continue
 37 |     # to work. (Current goto objective should work b/c colored floors never spawn under objects.)
 38 |     def __init__(self, enabled_properties=(0, 1, 2, 3, 4, 5), n_floor_colors=2, fixed_color_prop_map=False,
 39 |                  color_property_map=None, held_out_cp_pairs=None, held_description=0.0, with_instruction=True,
 40 |                  rand_text=False, total_rand=False, instr_words=5,
 41 |                  *args, **kwargs):
 42 |         """
 43 |                 Render this grid at a given scale
 44 |         :param enabled_properties: list of property idxs that are enabled.
 45 |         :param n_floor_colors: number of colors for special floors.
 46 |         :param fixed_color_prop_map: always use the same color prop map, default False
 47 |         :param color_property_map: use this color prop map, default None (generate random color prop map)
 48 |         :param held_out_cp_pairs: use this to prevent certain prop maps from showing up
 49 |         :param held_description: fractional chance of description of a color property pair being withheld
 50 |         :param with_instruction: Include instruction of the task
 51 |         :param rand_text: If 'rand_attribute', randomize the color-property maps: If True, replace description with
 52 |                           meaningless text.
 53 |         :param total_rand: replace description with meaningless text coming from a large dictionary
 54 |         :param instr_words: number of words in the instruction (only used for generating random text)
 55 |         :param args:
 56 |         :param kwargs:
 57 |         """
 58 |         assert n_floor_colors <= len(COLOR_NAMES)
 59 |         assert len(enabled_properties) > 0
 60 |         assert max(enabled_properties) < len(PROPERTY_TO_IDX)
 61 |         assert min(enabled_properties) >= 0
 62 | 
 63 |         self.enabled_properties = enabled_properties
 64 |         self.n_floor_colors = n_floor_colors
 65 |         self.held_out_cp_pairs = held_out_cp_pairs
 66 |         self.desc = ''
 67 |         self.fixed_color_prop_map = fixed_color_prop_map
 68 |         if color_property_map is None:
 69 |             self.color_property_map = {}
 70 |             self.color_property_map_fixed = {}
 71 |         else:
 72 |             self.color_property_map = {} #color_property_map
 73 |             self.color_property_map_fixed = color_property_map.copy()
 74 |             self.fixed_color_prop_map = True
 75 |         self.held_description = held_description
 76 |         # Properties for tile effects.
 77 |         self.tile_time = 0
 78 |         self.last_color = None
 79 |         self.color_time = 0
 80 |         self.agent_prev_pos = None
 81 |         self.with_instruction = with_instruction
 82 |         self.rand_text = rand_text
 83 |         self.total_rand = total_rand
 84 |         self.instr_words = instr_words
 85 |         super().__init__(*args, **kwargs)
 86 | 
 87 |     def gen_mission(self):
 88 |         # TODO(lts)
 89 |         super().gen_mission()
 90 | 
 91 |         return
 92 | 
 93 |     def reset(self, **kwargs):
 94 | 
 95 |         self.tile_time = 0
 96 |         self.last_color = None
 97 |         self.color_time = 0
 98 |         self.agent_prev_pos = None
 99 | 
100 |         # Rescramble floor property mappings.
101 |         # TODO(lts): Hold some out for test.
102 |         if len(self.color_property_map_fixed) > 0 and self.fixed_color_prop_map:
103 |             # determine which colors are to be used
104 |             self.color_property_map = {}
105 |             color_keys = [c for c in self.color_property_map_fixed.keys()]
106 |             inds = np.arange(len(color_keys))
107 |             self.np_random.shuffle(inds)
108 |             assert len(inds) >= self.n_floor_colors
109 |             inds = inds[:self.n_floor_colors]
110 |             for i in inds:
111 |                 c = color_keys[i]
112 |                 self.color_property_map[c] = random.choice(self.color_property_map_fixed[c])
113 |             # for count, c in enumerate(self.color_property_map_fixed.keys()):
114 |             #     self.color_property_map[c] = random.choice(self.color_property_map_fixed[c])
115 |             # print ("color_maps", self.color_property_map, self.color_property_map_fixed)
116 |         else:
117 |             self.color_property_map = {}
118 |             for i in range(self.n_floor_colors):
119 |                 # Random property per color. Can have duplicates.
120 |                 if self.held_out_cp_pairs is not None:
121 |                     if type(self.held_out_cp_pairs[0][0]) == str:
122 |                         c = COLOR_NAMES[i]
123 |                     else:
124 |                         c = i
125 |                     # print (c, self.held_out_cp_pairs)
126 |                     held_out_cs = []
127 |                     for c, p in self.held_out_cp_pairs:
128 |                         if c == COLOR_NAMES[i]:
129 |                             held_out_cs.append(p)
130 |                     # held_out_cs = [c for c, p in self.held_out_cp_pairs]
131 |                     # print ("held_out_cs", held_out_cs)
132 |                     # if c in held_out_cs:
133 |                     enabled_properties = self.enabled_properties.copy()
134 | 
135 |                     for c in held_out_cs:
136 |                         # print (enabled_properties, held_out_cs, c, held_out_cs.index(c))
137 |                         # enabled_properties.pop(held_out_cs.index(c))
138 |                         enabled_properties.remove(c)
139 |                         # print (enabled_properties)
140 |                         rand_property_idx = enabled_properties[self._rand_int(
141 |                             0, len(enabled_properties))]
142 | 
143 |                     if held_out_cs == []:
144 |                         rand_property_idx = self.enabled_properties[self._rand_int(
145 |                             0, len(self.enabled_properties))]
146 |                 else:
147 |                     rand_property_idx = self.enabled_properties[self._rand_int(
148 |                         0, len(self.enabled_properties))]
149 |                 self.color_property_map[COLOR_NAMES[i]] = IDX_TO_PROPERTY[rand_property_idx]
150 |                 # print ("color_map", self.color_property_map)
151 | 
152 |         # print(self.color_property_map)
153 |         obs = super().reset()
154 |         self.desc = '. '
155 |         if self.held_description == 0:
156 |             items = list(self.color_property_map.items())
157 |         else:
158 |             N = len(self.color_property_map)
159 |             assert self.held_description <= N
160 |             inc = N - self.held_description
161 |             items = list(self.color_property_map.items())
162 |             random.shuffle(items)
163 |             items = items[:inc]
164 |         if not self.rand_text:
165 |             for color, prop in items:
166 |                 self.desc += '%s floors are %s. ' % (color, prop)
167 |         elif self.rand_text == "rand_attribute":
168 |             props = list(PROPERTY_TO_IDX.keys())
169 |             for color, prop in items:
170 |                 self.desc += '%s floors are %s' % (self._rand_color(), props[self._rand_int(0, len(prop))])
171 |         else:
172 |             # separate words by '-'
173 |             # sentence length should be between 2 and 3
174 |             # choose words from A, B, C and D
175 |             if self.total_rand:
176 |                 lorem = TextLorem(srange=(self.instr_words, self.instr_words))
177 |                 self.desc += lorem.sentence()
178 |                 lorem = TextLorem(srange=(4, 4))
179 |             else:
180 |                 lorem = TextLorem(srange=(self.instr_words, self.instr_words),
181 |                                   words=['put', 'the', 'ball', 'in', 'lorem', 'ipsum', 'forty-two', 'sentence',
182 |                                          'length', 'agent', 'dir', 'gen', 'grid', 'word', 'description', 'choose',
183 |                                          'previous'])
184 |                 self.desc += lorem.sentence()
185 |                 lorem = TextLorem(srange=(4, 4),
186 |                                   words=['put', 'the', 'ball', 'in', 'lorem', 'ipsum', 'forty-two', 'sentence',
187 |                                          'length', 'agent', 'dir', 'gen', 'grid', 'word', 'description', 'choose',
188 |                                          'previous'])
189 | 
190 |             for color, prop in items:
191 |                 self.desc += ' ' + lorem.sentence()
192 | 
193 |         if self.with_instruction:
194 |             obs['mission'] += self.desc
195 |         else:
196 |             obs['mission'] = self.desc[2:]
197 |         return obs
198 | 
199 |     def _gen_grid(self, width, height):
200 |         super()._gen_grid(width, height)
201 | 
202 |         self.previous_direction = self.agent_dir
203 |         # Randomly place some colored floor tiles.
204 |         cmap = self.color_property_map
205 | 
206 |         contig_colors = []
207 |         level_0_cp = [(p, c) for c, p in cmap.items() if p in PROPERTY_ORDER[0]]
208 |         contig_colors.extend(c for p, c in level_0_cp)
209 |         n_color_0 = len(level_0_cp)
210 |         # level 1
211 |         level_1_cp = [(p, c) for c, p in cmap.items() if p in PROPERTY_ORDER[1]]
212 |         contig_colors.extend(c for p, c in level_1_cp)
213 |         n_color_1 = len(level_1_cp)
214 |         # level 2
215 |         level_2_cp = [(p, c) for c, p in cmap.items() if p in PROPERTY_ORDER[2]]
216 |         n_color_2 = len(level_2_cp)
217 | 
218 |         for i in range(self.num_cols * self.num_rows * pow(self.room_size - 2, 2)):
219 |             f = self._rand_float(0, 1)
220 |             if f >= 1 - Spawn_rates[0] and n_color_0 > 0:
221 |                 c = self._rand_int(0, n_color_0)
222 |                 i = self._rand_int(0, self.num_cols)
223 |                 j = self._rand_int(0, self.num_rows)
224 |                 try:
225 |                     for _ in range(N_tries):
226 |                         obj, pose = self.place_in_room(i, j, Floor(level_0_cp[c][1]))
227 |                         room = self.get_room(i, j)
228 |                         offsets = [(-1, -1), (0, -1), (1, -1), (-1, 0), (1, 0), (-1, 1), (0, 1), (1, 1)]
229 |                         flag = 0
230 |                         for offset in offsets:
231 |                             n_pose = pose + offset
232 |                             tar = self.grid.get(*n_pose)
233 |                             if tar is not None:
234 |                                 # check contiguous
235 |                                 if tar.type is 'floor' and tar.color in contig_colors:
236 |                                     flag += 1
237 |                                 # check door
238 |                                 elif tar.type is 'door':
239 |                                     flag += 2
240 |                         if flag > 1:
241 |                             # revert
242 |                             self.grid.set(pose[0], pose[1], None)
243 |                             room.objs.pop(-1)
244 |                         else:
245 |                             # succesfully placed floor
246 |                             break
247 |                 except RecursionError:
248 |                     # print("room %d %d too full" % (i, j))
249 |                     continue
250 |             elif f >= 1 - sum(Spawn_rates[:2]) and n_color_1 > 0:
251 |                 c = self._rand_int(0, n_color_1)
252 |                 i = self._rand_int(0, self.num_cols)
253 |                 j = self._rand_int(0, self.num_rows)
254 |                 try:
255 |                     for _ in range(N_tries):
256 |                         obj, pose = self.place_in_room(i, j, Floor(level_1_cp[c][1]))
257 |                         room = self.get_room(i, j)
258 |                         offsets = [(-1, -1), (0, -1), (1, -1), (-1, 0), (1, 0), (-1, 1), (0, 1), (1, 1)]
259 |                         flag = 0
260 |                         for offset in offsets:
261 |                             n_pose = pose + offset
262 |                             tar = self.grid.get(*n_pose)
263 |                             if tar is not None:
264 |                                 # check contiguous
265 |                                 if tar.type is 'floor' and tar.color in contig_colors:
266 |                                     flag += 1
267 |                                 # check door
268 |                                 elif tar.type is 'door':
269 |                                     flag += 2
270 |                         if flag > 1:
271 |                             # revert
272 |                             self.grid.set(pose[0], pose[1], None)
273 |                             room.objs.pop(-1)
274 |                         else:
275 |                             # succesfully placed floor
276 |                             break
277 |                 except RecursionError:
278 |                     # print("room %d %d too full" % (i, j))
279 |                     continue
280 | 
281 |             elif f >= 1 - sum(Spawn_rates[:3]) and n_color_2 > 0:
282 |                 c = self._rand_int(0, n_color_2)
283 |                 i = self._rand_int(0, self.num_cols)
284 |                 j = self._rand_int(0, self.num_rows)
285 |                 try:
286 |                     self.place_in_room(i, j, Floor(level_2_cp[c][1]))
287 |                 except RecursionError:
288 |                     # print("room %d %d too full" % (i, j))
289 |                     continue
290 | 
291 |     def get_floor_color(self, i, j):
292 |         o = self.grid.get(i, j)
293 |         if o and o.type == 'floor':
294 |             return o.color
295 |         return None
296 | 
297 |     @property
298 |     def down_pos(self):
299 |         """
300 |         Get the position of the cell that is one cell below agent.
301 |         """
302 | 
303 |         return self.agent_pos + DIR_TO_VEC[1]
304 | 
305 |     def step(self, action):
306 |         c = self.get_floor_color(*self.agent_pos)
307 |         # Deal with different floor tiles.
308 |         floor_property = None
309 |         if c:
310 |             floor_property = self.color_property_map[c]
311 | 
312 |         if floor_property == 'fliplr':
313 |             if action == self.actions.left:
314 |                 action = self.actions.right
315 |             elif action == self.actions.right:
316 |                 action = self.actions.left
317 |         elif floor_property == 'flipud':
318 |             if action == self.actions.forward:
319 |                 self.agent_dir = (self.agent_dir + 2) % 4
320 |         elif floor_property == 'sticky':
321 |             if self.tile_time < 2 and action == self.actions.forward:
322 |                 action = self.actions.done  # Wait action.
323 |         elif floor_property == "slippery":
324 |             self.step_count -= 0.5
325 |         elif floor_property == 'magic':
326 |             if self.color_time > 0 and self.color_time % 2:
327 |                 down_cell = self.grid.get(*self.down_pos)
328 |                 # Move Agent down.
329 |                 if down_cell == None or down_cell.can_overlap():
330 |                     self.agent_pos = self.down_pos
331 |                     # TODO(lts): Allow instructions to be finished via gravity.
332 | 
333 |         self.agent_prev_pos = self.agent_pos
334 | 
335 |         # Actually take action
336 |         obs, reward, done, info = super().step(action)
337 |         c = self.get_floor_color(*self.agent_pos)
338 |         if c:
339 |             floor_property = self.color_property_map[c]
340 |         if floor_property == "trap":
341 |             reward = 0
342 |             done = True
343 | 
344 |         # Keep track of some internal variables.
345 |         # Previous Location
346 |         if not np.array_equal(self.agent_prev_pos, self.agent_pos):
347 |             self.tile_time = 0
348 |         else:
349 |             self.tile_time += 1
350 | 
351 |         # Previous color
352 |         if (self.last_color != self.agent_pos).all():
353 |             #self.last_color = self.get_floor_color(*self.agent_pos)
354 |             self.last_color = self.agent_pos
355 |             self.color_time = 0
356 |         else:
357 |             self.color_time += 1
358 |         if self.with_instruction:
359 |             obs['mission'] += self.desc
360 |         else:
361 |             obs['mission'] = self.desc[2:]
362 | 
363 |         return obs, reward, done, info
364 | 
365 | 
366 | # Goto Red ball Dynamic
367 | class Level_GoTo_RedBallDynamics_Train(DynamicsLevel, Level_GoToRedBallNoDists):
368 |     def __init__(self,
369 |                  seed=None,
370 |                  with_instruction=True,
371 |                  ):
372 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
373 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction)
374 |         Level_GoToRedBallNoDists.__init__(self, seed)
375 | 
376 | 
377 | class Level_GoTo_RedBallDynamics_TargetPairOnly(DynamicsLevel, Level_GoToRedBallNoDists):
378 |     def __init__(self,
379 |                  seed=None,
380 |                  with_instruction=True,
381 |                  ):
382 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
383 |                                color_property_map={'green': ['trap'], 'blue': ['slippery']},
384 |                                with_instruction=with_instruction)
385 |         Level_GoToRedBallNoDists.__init__(self, seed)
386 | 
387 | 
388 | class Level_GoTo_RedBallDynamics_Test(DynamicsLevel, Level_GoToRedBallNoDists):
389 |     def __init__(self,
390 |                  seed=None,
391 |                  with_instruction=True,
392 |                  ):
393 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, with_instruction=with_instruction)
394 |         Level_GoToRedBallNoDists.__init__(self, seed)
395 | 
396 | register_levels(__name__, {'Level_GoTo_RedBallDynamics_Train': Level_GoTo_RedBallDynamics_Train,
397 |                            'Level_GoTo_RedBallDynamics_TargetPairOnly': Level_GoTo_RedBallDynamics_TargetPairOnly,
398 |                            'Level_GoTo_RedBallDynamics_Test': Level_GoTo_RedBallDynamics_Test
399 |                            })
400 | 
401 | # Goto Red ball dynamic hard
402 | class Level_GoTo_RedBallDynamics_Hard_Train(DynamicsLevel, Level_GoToRedBallNoDists):
403 |     def __init__(self,
404 |                  seed=None,
405 |                  with_instruction=True,
406 |                  ):
407 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
408 |                                held_out_cp_pairs=[('green', 0), ('green', 2),
409 |                                                   ('grey', 3), ('grey', 4),
410 |                                                   ('blue', 1), ('blue', 5)], with_instruction=with_instruction, )
411 |         Level_GoToRedBallNoDists.__init__(self, seed)
412 | 
413 | 
414 | class Level_GoTo_RedBallDynamics_Hard_TargetPairOnly(DynamicsLevel, Level_GoToRedBallNoDists):
415 |     def __init__(self,
416 |                  seed=None,
417 |                  with_instruction=True,
418 |                  ):
419 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
420 |                                color_property_map={'green': ['trap', 'flipud'],
421 |                                                    'grey': ['fliplr', 'slippery'],
422 |                                                    'blue': ['sticky', 'magic']}, with_instruction=with_instruction, )
423 |         Level_GoToRedBallNoDists.__init__(self, seed)
424 | 
425 | 
426 | class Level_GoTo_RedBallDynamics_Hard_Test(DynamicsLevel, Level_GoToRedBallNoDists):
427 |     def __init__(self,
428 |                  seed=None,
429 |                  with_instruction=True,
430 |                  ):
431 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
432 |                                with_instruction=with_instruction, )
433 |         Level_GoToRedBallNoDists.__init__(self, seed)
434 | 
435 | class Level_GoTo_RedBallDynamics_Hard_Fixed(DynamicsLevel, Level_GoToRedBallNoDists):
436 |     def __init__(self,
437 |                  seed=None,
438 |                  with_instruction=True,
439 |                  ):
440 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
441 |                                color_property_map={'green': ['slippery', ],
442 |                                                    'grey': ['flipud'],
443 |                                                    'blue': ['fliplr'],
444 |                                                    'red': ['trap'],
445 |                                                    'purple': ['magic'],
446 |                                                    'yellow': ['sticky'],
447 |                                                    },
448 |                                with_instruction=with_instruction, )
449 |         Level_GoToRedBallNoDists.__init__(self, seed)
450 | 
451 | register_levels(__name__, {'Level_GoTo_RedBallDynamics_Hard_Train': Level_GoTo_RedBallDynamics_Hard_Train,
452 |                            'Level_GoTo_RedBallDynamics_Hard_TargetPairOnly': Level_GoTo_RedBallDynamics_Hard_TargetPairOnly,
453 |                            'Level_GoTo_RedBallDynamics_Hard_Test': Level_GoTo_RedBallDynamics_Hard_Test,
454 |                            'Level_GoTo_RedBallDynamics_Hard_Fixed':Level_GoTo_RedBallDynamics_Hard_Fixed
455 |                            })
456 | 
457 | # Goto Redball dynamic maze
458 | class Level_GoToRedBall_Maze(RoomGridLevel):
459 |     """
460 |     Go to the red ball, 3x3 rooms, without distractors.
461 |     """
462 | 
463 |     def __init__(self, seed=None):
464 |         super().__init__(
465 |             num_rows=3,
466 |             num_cols=3,
467 |             room_size=8,
468 |             seed=seed
469 |         )
470 | 
471 |     def gen_mission(self):
472 |         self.place_agent()
473 |         self.connect_all()
474 |         i = self.np_random.randint(3)
475 |         j = self.np_random.randint(3)
476 |         obj, _ = self.add_object(i, j, 'ball', 'red')
477 | 
478 |         # Make sure no unblocking is required
479 |         self.check_objs_reachable()
480 | 
481 |         self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
482 | 
483 | 
484 | class Level_GoTo_RedBallDynamics_Maze_Train(DynamicsLevel, Level_GoToRedBall_Maze):
485 |     def __init__(self,
486 |                  seed=None,
487 |                  with_instruction=True,
488 |                  ):
489 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
490 |                                held_out_cp_pairs=[('green', 0), ('green', 2),
491 |                                                   ('grey', 3), ('grey', 4),
492 |                                                   ('blue', 1), ('blue', 5)], with_instruction=with_instruction, )
493 |         Level_GoToRedBall_Maze.__init__(self, seed)
494 | 
495 | 
496 | class Level_GoTo_RedBallDynamics_Maze_TargetPairOnly(DynamicsLevel, Level_GoToRedBall_Maze):
497 |     def __init__(self,
498 |                  seed=None,
499 |                  with_instruction=True,
500 |                  ):
501 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
502 |                                color_property_map={'green': ['trap', 'flipud'],
503 |                                                    'grey': ['fliplr', 'slippery'],
504 |                                                    'blue': ['sticky', 'magic']}, with_instruction=with_instruction, )
505 |         Level_GoToRedBall_Maze.__init__(self, seed)
506 | 
507 | 
508 | class Level_GoTo_RedBallDynamics_Maze_Test(DynamicsLevel, Level_GoToRedBall_Maze):
509 |     def __init__(self,
510 |                  seed=None,
511 |                  with_instruction=True,
512 |                  ):
513 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
514 |                                with_instruction=with_instruction, )
515 |         Level_GoToRedBall_Maze.__init__(self, seed)
516 | 
517 | 
518 | class Level_GoTo_RedBallDynamics_Maze_Fixed(DynamicsLevel, Level_GoToRedBall_Maze):
519 |     def __init__(self,
520 |                  seed=None,
521 |                  with_instruction=True,
522 |                  ):
523 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
524 |                                color_property_map={'green': ['slippery', ],
525 |                                                    'grey': ['flipud'],
526 |                                                    'blue': ['fliplr'],
527 |                                                    'red': ['trap'],
528 |                                                    'purple': ['magic'],
529 |                                                    'yellow': ['sticky'],
530 |                                                    },
531 |                                with_instruction=with_instruction, )
532 |         Level_GoToRedBall_Maze.__init__(self, seed)
533 | 
534 | register_levels(__name__, {'Level_GoTo_RedBallDynamics_Maze_Train': Level_GoTo_RedBallDynamics_Maze_Train,
535 |                            'Level_GoTo_RedBallDynamics_Maze_TargetPairOnly': Level_GoTo_RedBallDynamics_Maze_TargetPairOnly,
536 |                            'Level_GoTo_RedBallDynamics_Maze_Test': Level_GoTo_RedBallDynamics_Maze_Test,
537 |                            'Level_GoTo_RedBallDynamics_Maze_Fixed':Level_GoTo_RedBallDynamics_Maze_Fixed
538 |                            })
539 | 
540 | # Put Next Local Dynamic
541 | class Level_PutNextLocalDynamics_Train(DynamicsLevel, Level_PutNextLocal):
542 |     def __init__(self, seed=None, with_instruction=True):
543 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
544 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction)
545 |         Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed)
546 | 
547 | 
548 | class Level_PutNextLocalDynamics_TargetPairOnly(DynamicsLevel, Level_PutNextLocal):
549 |     def __init__(self, seed=None, with_instruction=True):
550 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
551 |                                color_property_map={'green': ['trap', ],
552 |                                                    'blue': ['slippery', ]}, with_instruction=with_instruction)
553 |         Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed)
554 | 
555 | 
556 | class Level_PutNextLocalDynamics_Test(DynamicsLevel, Level_PutNextLocal):
557 |     def __init__(self, seed=None, with_instruction=True):
558 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, with_instruction=with_instruction)
559 |         Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed)
560 | 
561 | class Level_PutNextLocalDynamics_Fixed(DynamicsLevel, Level_PutNextLocal):
562 |     def __init__(self, seed=None, with_instruction=True):
563 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
564 |                                color_property_map={'green': ['slippery', ],
565 |                                                    'red': ['trap'],
566 |                                                    'yellow': ['sticky'],
567 |                                                    },
568 |                                with_instruction=with_instruction)
569 |         Level_PutNextLocal.__init__(self, room_size=8, num_objs=4, seed=seed)
570 | 
571 | register_levels(__name__, {'Level_PutNextLocalDynamics_Train': Level_PutNextLocalDynamics_Train,
572 |                            'Level_PutNextLocalDynamics_TargetPairOnly': Level_PutNextLocalDynamics_TargetPairOnly,
573 |                            'Level_PutNextLocalDynamics_Test': Level_PutNextLocalDynamics_Test,
574 |                            'Level_PutNextLocalDynamics_Fixed': Level_PutNextLocalDynamics_Fixed
575 |                            })
576 | 
577 | # Put Next to Dynamic
578 | class Level_PutNextDynamics_Train(DynamicsLevel, Level_PutNext):
579 |     def __init__(self, seed=None, with_instruction=True):
580 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
581 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction)
582 |         Level_PutNext.__init__(self, room_size=8, seed=seed)
583 | 
584 | 
585 | class Level_PutNextDynamics_TargetPairOnly(DynamicsLevel, Level_PutNext):
586 |     def __init__(self, seed=None, with_instruction=True):
587 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
588 |                                color_property_map={'green': ['trap', ],
589 |                                                    'blue': ['slippery', ]}, with_instruction=with_instruction)
590 |         Level_PutNext.__init__(self, room_size=8, seed=seed)
591 | 
592 | 
593 | class Level_PutNextDynamics_Test(DynamicsLevel, Level_PutNext):
594 |     def __init__(self, seed=None, with_instruction=True):
595 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2, with_instruction=with_instruction)
596 |         Level_PutNext.__init__(self, room_size=8, seed=seed)
597 | 
598 | register_levels(__name__, {'Level_PutNextDynamics_Train': Level_PutNextDynamics_Train,
599 |                            'Level_PutNextDynamics_TargetPairOnly': Level_PutNextDynamics_TargetPairOnly,
600 |                            'Level_PutNextDynamics_Test': Level_PutNextDynamics_Test,
601 |                            })
602 | 
603 | #  Put Next to Dynamic Hard
604 | class Level_PutNextDynamics_Hard_Train(DynamicsLevel, Level_PutNext):
605 |     def __init__(self, seed=None, with_instruction=True):
606 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
607 |                                held_out_cp_pairs=[('green', 0), ('green', 2),
608 |                                                   ('grey', 3), ('grey', 4),
609 |                                                   ('blue', 1), ('blue', 5)], with_instruction=with_instruction, )
610 |         Level_PutNext.__init__(self, room_size=8, seed=seed)
611 | 
612 | 
613 | class Level_PutNextDynamics_Hard_TargetPairOnly(DynamicsLevel, Level_PutNext):
614 |     def __init__(self, seed=None, with_instruction=True):
615 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
616 |                                color_property_map={'green': ['trap', 'flipud'],
617 |                                                    'grey': ['fliplr', 'slippery'],
618 |                                                    'blue': ['sticky', 'magic']}, with_instruction=with_instruction,)
619 |         Level_PutNext.__init__(self, room_size=8, seed=seed)
620 | 
621 | 
622 | class Level_PutNextDynamics_Hard_Test(DynamicsLevel, Level_PutNext):
623 |     def __init__(self, seed=None, with_instruction=True):
624 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
625 |                                with_instruction=with_instruction, )
626 |         Level_PutNext.__init__(self, room_size=8, seed=seed)
627 | 
628 | register_levels(__name__, {'Level_PutNextDynamics_Hard_Train': Level_PutNextDynamics_Hard_Train,
629 |                            'Level_PutNextDynamics_Hard_TargetPairOnly': Level_PutNextDynamics_Hard_TargetPairOnly,
630 |                            'Level_PutNextDynamics_Hard_Test': Level_PutNextDynamics_Hard_Test,
631 |                            })
632 | 
633 | # Goto Maze Dynamic
634 | class Level_GoToObjMaze_Dynamics_Train(DynamicsLevel, Level_GoTo):
635 |     def __init__(self,
636 |                  seed=None,
637 |                  with_instruction=True
638 |                  ):
639 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
640 |                                held_out_cp_pairs=[('green', 0), ('green', 2),
641 |                                                   ('grey', 3), ('grey', 4),
642 |                                                   ('blue', 1), ('blue', 5)], with_instruction=with_instruction)
643 |         Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=1,
644 |                             num_cols=3, doors_open=False, seed=seed)
645 | 
646 | class Level_GoToObjMaze_Dynamics_TargetPairOnly(DynamicsLevel, Level_GoTo):
647 |     def __init__(self,
648 |                  seed=None,
649 |                  with_instruction=True
650 |                  ):
651 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
652 |                                color_property_map={'green': ['trap', 'flipud'],
653 |                                                    'grey': ['fliplr', 'slippery'],
654 |                                                    'blue': ['sticky', 'magic']}, with_instruction=with_instruction)
655 |         Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=1,
656 |                             num_cols=3, doors_open=False, seed=seed)
657 | 
658 | class Level_GoToObjMaze_Dynamics_Test(DynamicsLevel, Level_GoTo):
659 |     def __init__(self,
660 |                  seed=None,
661 |                  with_instruction=True
662 |                  ):
663 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
664 |                                with_instruction=with_instruction)
665 |         Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=1,
666 |                             num_cols=3, doors_open=False, seed=seed)
667 | 
668 | register_levels(__name__, {'Level_GoToObjMaze_Dynamics_Train': Level_GoToObjMaze_Dynamics_Train,
669 |                            'Level_GoToObjMaze_Dynamics_TargetPairOnly': Level_GoToObjMaze_Dynamics_TargetPairOnly,
670 |                            'Level_GoToObjMaze_Dynamics_Test': Level_GoToObjMaze_Dynamics_Test,
671 |                            })
672 | 
673 | 
674 | # Goto local Dynamic
675 | class Level_GoToLocal_Dynamics_Train(DynamicsLevel, Level_GoToLocal):
676 |     def __init__(self,
677 |                  seed=None,
678 |                  with_instruction=True
679 |                  ):
680 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
681 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction)
682 |         Level_GoToLocal.__init__(self, room_size=11, num_dists=8, seed=seed)
683 | 
684 | class Level_GoToLocal_Dynamics_TargetPairOnly(DynamicsLevel, Level_GoToLocal):
685 |     def __init__(self,
686 |                  seed=None,
687 |                  with_instruction=True
688 |                  ):
689 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
690 |                                color_property_map={'green': ['trap', ],
691 |                                                    'blue': ['slippery', ]}, with_instruction=with_instruction)
692 |         Level_GoToLocal.__init__(self, room_size=11, num_dists=8, seed=seed)
693 | 
694 | class Level_GoToLocal_Dynamics_Test(DynamicsLevel, Level_GoToLocal):
695 |     def __init__(self,
696 |                  seed=None,
697 |                  with_instruction=True
698 |                  ):
699 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
700 |                                with_instruction=with_instruction)
701 |         Level_GoToLocal.__init__(self, room_size=11, num_dists=8, seed=seed)
702 | 
703 | register_levels(__name__, {'Level_GoToLocal_Dynamics_Train': Level_GoToLocal_Dynamics_Train,
704 |                            'Level_GoToLocal_Dynamics_TargetPairOnly': Level_GoToLocal_Dynamics_TargetPairOnly,
705 |                            'Level_GoToLocal_Dynamics_Test': Level_GoToLocal_Dynamics_Test,
706 |                            })
707 | 
708 | # Goto Dynamic
709 | class Level_GoTo_Dynamics_Train(DynamicsLevel, Level_GoTo):
710 |     def __init__(self,
711 |                  seed=None,
712 |                  with_instruction=True
713 |                  ):
714 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
715 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)], with_instruction=with_instruction)
716 |         Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11,
717 |                             num_cols=3, doors_open=False, seed=seed)
718 | 
719 | class Level_GoTo_Dynamics_TargetPairOnly(DynamicsLevel, Level_GoTo):
720 |     def __init__(self,
721 |                  seed=None,
722 |                  with_instruction=True
723 |                  ):
724 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
725 |                                color_property_map={'green': ['trap', ],
726 |                                                    'blue': ['slippery', ]}, with_instruction=with_instruction)
727 |         Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11,
728 |                             num_cols=3, doors_open=False, seed=seed)
729 | 
730 | class Level_GoTo_Dynamics_Test(DynamicsLevel, Level_GoTo):
731 |     def __init__(self,
732 |                  seed=None,
733 |                  with_instruction=True
734 |                  ):
735 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
736 |                                with_instruction=with_instruction)
737 |         Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11,
738 |                             num_cols=3, doors_open=False, seed=seed)
739 | 
740 | 
741 | register_levels(__name__, {'Level_GoTo_Dynamics_Train': Level_GoTo_Dynamics_Train,
742 |                            'Level_GoTo_Dynamics_TargetPairOnly': Level_GoTo_Dynamics_TargetPairOnly,
743 |                            'Level_GoTo_Dynamics_Test': Level_GoTo_Dynamics_Test,
744 |                            })
745 | 
746 | 
747 | # Goto Dynamic Hard
748 | class Level_GoTo_Dynamics_Hard_Train(DynamicsLevel, Level_GoTo):
749 |     def __init__(self,
750 |                  seed=None,
751 |                  with_instruction=True
752 |                  ):
753 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
754 |                                held_out_cp_pairs=[('green', 0), ('green', 2),
755 |                                                   ('grey', 3), ('grey', 4),
756 |                                                   ('blue', 1), ('blue', 5)], with_instruction=with_instruction)
757 |         Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11,
758 |                             num_cols=3, doors_open=False, seed=seed)
759 | 
760 | class Level_GoTo_Dynamics_Hard_TargetPairOnly(DynamicsLevel, Level_GoTo):
761 |     def __init__(self,
762 |                  seed=None,
763 |                  with_instruction=True
764 |                  ):
765 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
766 |                                color_property_map={'green': ['trap', 'flipud'],
767 |                                                    'grey': ['fliplr', 'slippery'],
768 |                                                    'blue': ['sticky', 'magic']}, with_instruction=with_instruction)
769 |         Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11,
770 |                             num_cols=3, doors_open=False, seed=seed)
771 | 
772 | class Level_GoTo_Dynamics_Hard_Test(DynamicsLevel, Level_GoTo):
773 |     def __init__(self,
774 |                  seed=None,
775 |                  with_instruction=True
776 |                  ):
777 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
778 |                                with_instruction=with_instruction)
779 |         Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11,
780 |                             num_cols=3, doors_open=False, seed=seed)
781 | 
782 | class Level_GoTo_Dynamics_Hard_Fixed(DynamicsLevel, Level_GoTo):
783 |     def __init__(self,
784 |                  seed=None,
785 |                  with_instruction=True
786 |                  ):
787 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
788 |                                color_property_map={'green': ['slippery', ],
789 |                                                    'grey': ['flipud'],
790 |                                                    'blue': ['fliplr'],
791 |                                                    'red': ['trap'],
792 |                                                    'purple': ['magic'],
793 |                                                    'yellow': ['sticky'],
794 |                                                    },
795 |                                with_instruction=with_instruction)
796 |         Level_GoTo.__init__(self, room_size=11, num_rows=3, num_dists=11,
797 |                             num_cols=3, doors_open=False, seed=seed)
798 | 
799 | register_levels(__name__, {'Level_GoTo_Dynamics_Hard_Train': Level_GoTo_Dynamics_Hard_Train,
800 |                            'Level_GoTo_Dynamics_Hard_TargetPairOnly': Level_GoTo_Dynamics_Hard_TargetPairOnly,
801 |                            'Level_GoTo_Dynamics_Hard_Test': Level_GoTo_Dynamics_Hard_Test,
802 |                            'Level_GoTo_Dynamics_Hard_Fixed': Level_GoTo_Dynamics_Hard_Fixed
803 |                            })
804 | 
805 | # Unlock Dynamic
806 | class Level_Unlock_Dynamic_Train(DynamicsLevel, Level_Unlock):
807 |     def __init__(self,
808 |                  seed=None,
809 |                  with_instruction=True
810 |                  ):
811 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
812 |                                held_out_cp_pairs=[('green', 0), ('green', 2),
813 |                                                   ('grey', 3), ('grey', 4),
814 |                                                   ('blue', 1), ('blue', 5)], with_instruction=with_instruction)
815 |         Level_Unlock.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed)
816 | 
817 | class Level_Unlock_Dynamic_TargetPairOnly(DynamicsLevel, Level_Unlock):
818 |     def __init__(self,
819 |                  seed=None,
820 |                  with_instruction=True
821 |                  ):
822 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
823 |                                color_property_map={'green': ['trap', 'flipud'],
824 |                                                    'grey': ['fliplr', 'slippery'],
825 |                                                    'blue': ['sticky', 'magic']}, with_instruction=with_instruction)
826 |         Level_Unlock.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed)
827 | 
828 | class Level_Unlock_Dynamic_Test(DynamicsLevel, Level_Unlock):
829 |     def __init__(self,
830 |                  seed=None,
831 |                  with_instruction=True
832 |                  ):
833 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
834 |                                with_instruction=with_instruction)
835 |         Level_Unlock.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed)
836 | 
837 | class Level_Unlock_Dynamic_Fixed(DynamicsLevel, Level_Unlock):
838 |     def __init__(self,
839 |                  seed=None,
840 |                  with_instruction=True
841 |                  ):
842 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
843 |                                color_property_map={'green': ['slippery', ],
844 |                                                    'grey': ['flipud'],
845 |                                                    'blue': ['fliplr'],
846 |                                                    'red': ['trap'],
847 |                                                    'purple': ['magic'],
848 |                                                    'yellow': ['sticky'],
849 |                                                    },
850 |                                with_instruction=with_instruction)
851 |         Level_Unlock.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed)
852 | 
853 | register_levels(__name__, {'Level_Unlock_Dynamic_Train': Level_Unlock_Dynamic_Train,
854 |                            'Level_Unlock_Dynamic_TargetPairOnly': Level_Unlock_Dynamic_TargetPairOnly,
855 |                            'Level_Unlock_Dynamic_Test': Level_Unlock_Dynamic_Test,
856 |                            'Level_Unlock_Dynamic_Fixed': Level_Unlock_Dynamic_Fixed
857 |                            })
858 | 
859 | 
860 | # Pickup Location Dynamic
861 | class Level_PickupLoc_Dynamic_Train(DynamicsLevel, Level_PickupLoc):
862 |     def __init__(self,
863 |                  seed=None
864 |                  ):
865 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
866 |                                held_out_cp_pairs=[('green', 0), ('blue', 4)])
867 |         Level_PickupLoc.__init__(self, seed=seed)
868 | 
869 | 
870 | class Level_PickupLoc_Dynamic_TargetPairOnly(DynamicsLevel, Level_PickupLoc):
871 |     def __init__(self,
872 |                  seed=None
873 |                  ):
874 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
875 |                                color_property_map={'green': ['trap', ],
876 |                                                    'blue': ['slippery', ]})
877 |         Level_PickupLoc.__init__(self, seed=seed)
878 | 
879 | 
880 | class Level_PickupLoc_Dynamic_Test(DynamicsLevel, Level_PickupLoc):
881 |     def __init__(self,
882 |                  seed=None
883 |                  ):
884 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2)
885 |         Level_PickupLoc.__init__(self, seed=seed)
886 | 
887 | class Level_PickupLoc_Dynamic_Fixed(DynamicsLevel, Level_PickupLoc):
888 |     def __init__(self,
889 |                  seed=None
890 |                  ):
891 |         DynamicsLevel.__init__(self, enabled_properties=[0, 3, 4], n_floor_colors=2,
892 |                                color_property_map={'green': ['slippery', ],
893 |                                                    'red': ['trap'],
894 |                                                    'yellow': ['sticky'],
895 |                                                    },
896 |                                )
897 |         Level_PickupLoc.__init__(self, seed=seed)
898 | 
899 | register_levels(__name__, {'Level_PickupLoc_Dynamic_Train': Level_PickupLoc_Dynamic_Train,
900 |                            'Level_PickupLoc_Dynamic_TargetPairOnly': Level_PickupLoc_Dynamic_TargetPairOnly,
901 |                            'Level_PickupLoc_Dynamic_Test': Level_PickupLoc_Dynamic_Test,
902 |                            'Level_PickupLoc_Dynamic_Fixed': Level_PickupLoc_Dynamic_Fixed
903 |                            })
904 | 
905 | 
906 | # Goto Sequential Dynamic
907 | class Level_GotoSeq_Dynamic_Train(DynamicsLevel, Level_GoToSeq):
908 |     def __init__(self,
909 |                  seed=None
910 |                  ):
911 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
912 |                                held_out_cp_pairs=[('green', 0), ('green', 2),
913 |                                                   ('grey', 3), ('grey', 4),
914 |                                                   ('blue', 1), ('blue', 5)])
915 |         Level_GoToSeq.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed)
916 | 
917 | class Level_GotoSeq_Dynamic_TargetPairOnly(DynamicsLevel, Level_GoToSeq):
918 |     def __init__(self,
919 |                  seed=None,
920 |                  ):
921 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
922 |                                color_property_map={'green': ['trap', 'flipud'],
923 |                                                    'grey': ['fliplr', 'slippery'],
924 |                                                    'blue': ['sticky', 'magic']})
925 |         Level_GoToSeq.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed)
926 | 
927 | class Level_GotoSeq_Dynamic_Test(DynamicsLevel, Level_GoToSeq):
928 |     def __init__(self,
929 |                  seed=None,
930 |                  ):
931 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3)
932 |         Level_GoToSeq.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed)
933 | 
934 | class Level_GotoSeq_Dynamic_Fixed(DynamicsLevel, Level_GoToSeq):
935 |     def __init__(self,
936 |                  seed=None,
937 |                  ):
938 |         DynamicsLevel.__init__(self, enabled_properties=[0, 1, 2, 3, 4, 5], n_floor_colors=3,
939 |                                color_property_map={'green': ['slippery', ],
940 |                                                    'grey': ['flipud'],
941 |                                                    'blue': ['fliplr'],
942 |                                                    'red': ['trap'],
943 |                                                    'purple': ['magic'],
944 |                                                    'yellow': ['sticky'],
945 |                                                    }
946 |                                )
947 |         Level_GoToSeq.__init__(self, room_size=11, num_rows=3, num_cols=3, seed=seed)
948 | 
949 | register_levels(__name__, {'Level_GotoSeq_Dynamic_Train': Level_GotoSeq_Dynamic_Train,
950 |                            'Level_GotoSeq_Dynamic_TargetPairOnly': Level_GotoSeq_Dynamic_TargetPairOnly,
951 |                            'Level_GotoSeq_Dynamic_Test': Level_GotoSeq_Dynamic_Test,
952 |                            'Level_GotoSeq_Dynamic_Fixed':Level_GotoSeq_Dynamic_Fixed
953 |                            })
954 | 
955 | 


--------------------------------------------------------------------------------