├── .gitignore ├── LICENSE.md ├── README.md ├── babyai-text ├── .gitignore ├── README.md ├── babyai │ ├── .gitignore │ ├── .travis.yml │ ├── CONTRIBUTING.md │ ├── LICENSE │ ├── README.md │ ├── babyai │ │ ├── QA.py │ │ ├── QA_simple.py │ │ ├── __init__.py │ │ ├── arguments.py │ │ ├── base.py │ │ ├── batchsampler.py │ │ ├── bot.py │ │ ├── evaluate.py │ │ ├── l_class.py │ │ ├── levels │ │ │ ├── __init__.py │ │ │ ├── bonus_levels.py │ │ │ ├── iclr19_levels.py │ │ │ ├── levelgen.py │ │ │ ├── test_levels.py │ │ │ └── verifier.py │ │ ├── model.py │ │ ├── paral_env_simple.py │ │ ├── plotting.py │ │ ├── plotting_paper.py │ │ ├── rl │ │ │ ├── LICENSE │ │ │ ├── __init__.py │ │ │ ├── algos │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ └── ppo.py │ │ │ ├── format.py │ │ │ ├── model.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── dictlist.py │ │ │ │ ├── penv.py │ │ │ │ └── supervised_losses.py │ │ ├── shaped_env.py │ │ ├── shaped_env_paral.py │ │ ├── test_paral.py │ │ ├── trainer_l_class.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── agent.py │ │ │ ├── demos.py │ │ │ ├── format.py │ │ │ ├── log.py │ │ │ ├── model.py │ │ │ └── viz.py │ ├── docs │ │ ├── bonus_levels.md │ │ ├── codebase.md │ │ ├── iclr19_levels.md │ │ ├── train-eval.md │ │ └── troubleshooting.md │ ├── environment.yaml │ ├── nn │ │ ├── GPTJ_with_value_head.py │ │ ├── __init__.py │ │ ├── dec_QA.py │ │ ├── enc_lang.py │ │ ├── enc_lang_QA.py │ │ ├── enc_visual.py │ │ ├── enc_vl.py │ │ ├── encodings.py │ │ ├── model_util.py │ │ └── transforms.py │ ├── run_tests.py │ ├── scripts │ │ ├── .gitignore │ │ ├── GPR.py │ │ ├── __init__.py │ │ ├── compute_possible_instructions.py │ │ ├── enjoy.py │ │ ├── eval_bot.py │ │ ├── evaluate.py │ │ ├── evaluate_all_demos.py │ │ ├── evaluate_all_models.py │ │ ├── instruction_handler.py │ │ ├── learn_baseline.py │ │ ├── learn_baseline_model.py │ │ ├── make_agent_demos.py │ │ ├── make_subtask_recipe_demos.py │ │ ├── manual_control.py │ │ ├── result_l_class_study.py │ │ ├── show_level_instructions.py │ │ ├── subtask_prediction.py │ │ ├── subtask_prediction_model.py │ │ ├── test_PPO.py │ │ ├── test_rl.py │ │ ├── trace_agent_traj.py │ │ ├── train_il.py │ │ ├── train_intelligent_expert.py │ │ ├── train_l_class.py │ │ ├── train_learn_baseline_model.py │ │ ├── train_rl.py │ │ ├── train_rl_paral.py │ │ └── train_subtask_prediction_model.py │ └── setup.py ├── babyai_text │ ├── __init__.py │ └── levels │ │ ├── __init__.py │ │ └── mixed_seq_levels.py ├── gym-minigrid │ ├── .gitignore │ ├── .travis.yml │ ├── LICENSE │ ├── README.md │ ├── benchmark.py │ ├── figures │ │ ├── BlockedUnlockPickup.png │ │ ├── DistShift1.png │ │ ├── DistShift2.png │ │ ├── KeyCorridorS3R1.png │ │ ├── KeyCorridorS3R2.png │ │ ├── KeyCorridorS3R3.png │ │ ├── KeyCorridorS4R3.png │ │ ├── KeyCorridorS5R3.png │ │ ├── KeyCorridorS6R3.png │ │ ├── LavaCrossingS11N5.png │ │ ├── LavaCrossingS9N1.png │ │ ├── LavaCrossingS9N2.png │ │ ├── LavaCrossingS9N3.png │ │ ├── LavaGapS6.png │ │ ├── ObstructedMaze-1Dl.png │ │ ├── ObstructedMaze-1Dlh.png │ │ ├── ObstructedMaze-1Dlhb.png │ │ ├── ObstructedMaze-1Q.png │ │ ├── ObstructedMaze-2Dl.png │ │ ├── ObstructedMaze-2Dlh.png │ │ ├── ObstructedMaze-2Dlhb.png │ │ ├── ObstructedMaze-2Q.png │ │ ├── ObstructedMaze-4Q.png │ │ ├── SimpleCrossingS11N5.png │ │ ├── SimpleCrossingS9N1.png │ │ ├── SimpleCrossingS9N2.png │ │ ├── SimpleCrossingS9N3.png │ │ ├── Unlock.png │ │ ├── UnlockPickup.png │ │ ├── door-key-curriculum.gif │ │ ├── door-key-env.png │ │ ├── dynamic_obstacles.gif │ │ ├── empty-env.png │ │ ├── fetch-env.png │ │ ├── four-rooms-env.png │ │ ├── gotodoor-6x6.mp4 │ │ ├── gotodoor-6x6.png │ │ └── multi-room.gif │ ├── gym_minigrid │ │ ├── __init__.py │ │ ├── envs │ │ │ ├── __init__.py │ │ │ ├── blockedunlockpickup.py │ │ │ ├── crossing.py │ │ │ ├── distshift.py │ │ │ ├── doorkey.py │ │ │ ├── dynamicobstacles.py │ │ │ ├── empty.py │ │ │ ├── fetch.py │ │ │ ├── fourrooms.py │ │ │ ├── gotodoor.py │ │ │ ├── gotoobject.py │ │ │ ├── keycorridor.py │ │ │ ├── lavagap.py │ │ │ ├── lockedroom.py │ │ │ ├── memory.py │ │ │ ├── multiroom.py │ │ │ ├── obstructedmaze.py │ │ │ ├── playground_v0.py │ │ │ ├── putnear.py │ │ │ ├── redbluedoors.py │ │ │ ├── unlock.py │ │ │ └── unlockpickup.py │ │ ├── minigrid.py │ │ ├── register.py │ │ ├── rendering.py │ │ ├── roomgrid.py │ │ ├── window.py │ │ └── wrappers.py │ ├── manual_control.py │ ├── run_tests.py │ └── setup.py ├── images │ └── babyai-text_schema.png └── setup.py ├── docs └── images │ ├── generalization_tests.png │ └── main_schema.png ├── experiments ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── main.cpython-310.pyc │ └── test_llm.cpython-310.pyc ├── agents │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-310.pyc │ ├── base_agent.py │ ├── bot │ │ ├── __pycache__ │ │ │ └── bot.cpython-310.pyc │ │ └── bot.py │ ├── drrn │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── drrn.cpython-310.pyc │ │ │ └── model.cpython-310.pyc │ │ ├── drrn.py │ │ ├── model.py │ │ ├── spm_models │ │ │ ├── unigram_8k.model │ │ │ └── unigram_8k.vocab │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── memory.cpython-310.pyc │ │ │ └── pad_sequences.cpython-310.pyc │ │ │ ├── memory.py │ │ │ └── pad_sequences.py │ ├── ppo │ │ ├── __init__.py │ │ ├── base_ppo_agent.py │ │ ├── llm_ppo_agent.py │ │ └── symbolic_ppo_agent.py │ └── random_agent │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── random_agent.cpython-310.pyc │ │ └── random_agent.py ├── campaign │ ├── Mixed_BC_finetuning │ │ ├── bc_finetuning_Flan-T5_large.slurm │ │ └── bc_finetuning_from-bot_Flan-T5_large.slurm │ ├── Mixed_tests_no-change │ │ └── GFlan-T5_large.slurm │ └── Mixed_training │ │ ├── DRRN.slurm │ │ ├── GFlan-T5_large.slurm │ │ ├── NPAE-Flan-T5_large.slurm │ │ └── Symbolic-PPO.slurm ├── clm_behavioral-cloning.py ├── configs │ ├── accelerate │ │ └── default_config.yaml │ ├── local_gpu_config.yaml │ ├── multi-node_slurm_cluster_config.yaml │ └── multi-node_slurm_cluster_config_test.yaml ├── plot_utils │ ├── __init__.py │ ├── plotting_paper.py │ └── plotting_results.py ├── post-training_tests.py ├── slurm │ ├── accelerate_launcher.sh │ ├── lamorel_launcher.sh │ └── train_symbolic_ppo.sh ├── test_results.py ├── train_language_agent.py └── train_symbolic_ppo.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | useless/ 2 | storage/ 3 | slurm_logs/ 4 | plots/ 5 | outputs/ 6 | notebooks/* 7 | !notebooks/*.ipynb 8 | /old_slurms/ 9 | .idea 10 | .DS_Store 11 | *.pyc 12 | .hydra/* -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Flowers Team 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /babyai-text/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # pycharm 104 | .idea/ 105 | 106 | # storage 107 | storage/ 108 | 109 | # pytorch weights, training history 110 | *.csv 111 | *.pt 112 | *.json 113 | -------------------------------------------------------------------------------- /babyai-text/README.md: -------------------------------------------------------------------------------- 1 | # BabyAI-Text 2 | BabyAI-Text is a wrapper on top of BabyAI to make it a text-only environment returning a textual description of the agent's observation. 3 | ![Main schema](images/babyai-text_schema.png) 4 | 5 | ## Installation 6 | 1. Install BabyAI 7 | ``` 8 | pip install blosc; cd babyai-text/babyai; pip install -e .; cd .. 9 | ``` 10 | 2. Install gym-minigrid 11 | ``` 12 | cd gym-minigrid; pip install -e.; cd .. 13 | ``` 14 | 3. Install BabyAI-Text 15 | ``` 16 | pip install -e . 17 | ``` 18 | 19 | ## New environment 20 | We introduce two new environments containing a mix of BabyAI's tasks (*Go to*, *Pick up*, *Put next to*, *Unlock*, *Pick up go to*, *Pick up pick up*): 21 | - **BabyAI-MixedTrainLocal**: Training tasks in a single room setup (without some objects and the *Pick up pick up* task) 22 | - **BabyAI-MixedTestLocal**: Test tasks in a single room setup (including never seen objects and the *Pick up pick up* task) 23 | 24 | To use them, import our package and create the GYM environment: 25 | ```python 26 | import gym 27 | import babyai_text 28 | 29 | env = gym.make("BabyAI-MixedTrainLocal") 30 | ``` -------------------------------------------------------------------------------- /babyai-text/babyai/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # pycharm 104 | .idea/ 105 | 106 | # storage 107 | storage/ 108 | 109 | # pytorch weights, training history 110 | *.csv 111 | *.pt 112 | *.json 113 | -------------------------------------------------------------------------------- /babyai-text/babyai/.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | cache: pip 3 | python: 4 | - "3.5" 5 | 6 | before_install: 7 | - pip3 install --upgrade pip 8 | 9 | # command to install dependencies 10 | install: 11 | - pip3 install http://download.pytorch.org/whl/cpu/torch-0.4.1-cp35-cp35m-linux_x86_64.whl 12 | - pip3 install flake8 13 | - pip3 install scikit-build 14 | - pip3 install --editable . 15 | 16 | # command to run tests 17 | script: 18 | # Check the source code for obvious errors 19 | - python3 -m flake8 . --count --show-source --statistics --select=E901,E999,F821,F822,F823 20 | 21 | # Test the BabyAI levels 22 | - ./run_tests.py 23 | 24 | # Quickly exercise the RL training code 25 | - time python3 -m scripts.train_rl --env BabyAI-GoToObj-v0 --algo ppo --procs 4 --batch-size 80 --log-interval 1 --save-interval 2 --val-episodes 10 --frames 300 --arch cnn1 --instr-dim 16 --image-dim 16 --memory-dim 16 26 | 27 | # Check that the bot works on a few episodes of Boss Level 28 | - python3 -m scripts.eval_bot --level BossLevel --num_runs 50 29 | - python3 -m scripts.eval_bot --level BossLevel --num_runs 50 --advise_mode --non_optimal_steps 100 --bad_action_proba .3 30 | # Check that the bot works on a single episode from each level 31 | - python3 -m scripts.eval_bot --num_runs 1 32 | 33 | # Quickly test the generation of bot demos 34 | - python3 -m scripts.make_agent_demos --env BabyAI-GoToRedBallGrey-v0 --episodes 100 --valid-episodes 32 35 | 36 | # Quickly test the evaluation of bot demos 37 | - python3 -m scripts.evaluate --env BabyAI-GoToRedBallGrey-v0 --demos BabyAI-GoToRedBallGrey-v0_agent 38 | 39 | # Quick test for imitation learning 40 | - python3 -m scripts.train_il --env BabyAI-GoToRedBallGrey-v0 --demos BabyAI-GoToRedBallGrey-v0_agent --model GoToRedBallGrey-il --val-interval 1 --patience 0 --episodes 100 --val-episodes 50 41 | 42 | # Quickly test the evaluation of models 43 | - python3 -m scripts.evaluate --env BabyAI-GoToRedBallGrey-v0 --model GoToRedBallGrey-il 44 | 45 | # Quick test for imitation learning with multi env 46 | - python3 -m scripts.train_il --multi-env BabyAI-GoToRedBall-v0 BabyAI-GoToRedBallGrey-v0 --multi-demos BabyAI-GoToRedBallGrey-v0_agent BabyAI-GoToRedBallGrey-v0_agent --val-interval 1 --patience 0 --multi-episodes 100 100 --val-episodes 50 47 | 48 | # Quick test for train_intelligent_expert 49 | - python3 -m scripts.train_intelligent_expert --env BabyAI-GoToRedBallGrey-v0 --demos BabyAI-GoToRedBallGrey-v0_agent --val-interval 1 --patience 0 --val-episodes 50 --start-demos 10 --num-eval-demos 5 --phases 2 50 | -------------------------------------------------------------------------------- /babyai-text/babyai/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Instructions for Contributors 2 | 3 | To contribute to this project, you should first create your own fork, and remember to periodically [sync changes from this repository](https://stackoverflow.com/questions/7244321/how-do-i-update-a-github-forked-repository). You can then create [pull requests](https://yangsu.github.io/pull-request-tutorial/) for modifications you have made. Your changes will be tested and reviewed before they are merged into this repository. If you are not familiar with forks and pull requests, we recommend doing a Google or YouTube search to find many useful tutorials on the topic. 4 | 5 | Also, you can have a look at the [codebase structure](docs/codebase.md) before getting started. 6 | 7 | A suggested flow for contributing would be: 8 | First, open up a new feature branch to solve an existing bug/issue 9 | ```bash 10 | $ git checkout -b upstream/master 11 | ``` 12 | This ensures that the branch is up-to-date with the `master` branch of the main repository, irrespective of the status of your forked repository. 13 | 14 | Once you are done making commits of your changes / adding the feature, you can: 15 | (In case this is the first set of commits from this _new_ local branch) 16 | ```bash 17 | git push --set-upstream origin 18 | ``` 19 | (Assuming the name of your forked repository remote is `origin`), which will create a new branch `` 20 | tracking your local ``, in case it hasn't been created already. 21 | 22 | Then, create a [pull request](https://help.github.com/en/articles/about-pull-requests) in this repository. -------------------------------------------------------------------------------- /babyai-text/babyai/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Maxime Chevalier-Boisvert 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/__init__.py: -------------------------------------------------------------------------------- 1 | # Import levels so that the OpenAI Gym environments get registered 2 | # when the babyai package is imported 3 | from . import levels 4 | from . import utils 5 | -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/base.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | # from alfred.utils import data_util 4 | 5 | 6 | class Model(nn.Module): 7 | def __init__(self, args, emb_ann_size, numb_action, pad): 8 | ''' 9 | Abstract model 10 | ''' 11 | nn.Module.__init__(self) 12 | self.args = args 13 | self.numb_action = numb_action 14 | self.pad = pad 15 | # shape manually given TO IMPROVE as in ET 16 | # self.visual_tensor_shape = data_util.read_dataset_info( 17 | # args.data['train'][0])['feat_shape'][1:] 18 | self.visual_tensor_shape = [128, 2, 2] 19 | # self.visual_tensor_shape = [512, 7, 7] 20 | # create language and action embeddings 21 | 22 | self.emb_ann = nn.Embedding(emb_ann_size, args.demb) 23 | 24 | # dropouts 25 | self.dropout_vis = nn.Dropout(args.dropout['vis'], inplace=True) 26 | self.dropout_lang = nn.Dropout2d(args.dropout['lang']) 27 | 28 | def init_weights(self, init_range=0.1): 29 | ''' 30 | init linear layers in embeddings 31 | ''' 32 | self.emb_ann.weight.data.uniform_(-init_range, init_range) 33 | 34 | def compute_metrics(self, model_out, gt_dict, metrics_dict, verbose): 35 | ''' 36 | compute model-specific metrics and put it to metrics dict 37 | ''' 38 | raise NotImplementedError 39 | 40 | def forward(self, vocab, **inputs): 41 | ''' 42 | forward the model for multiple time-steps (used for training) 43 | ''' 44 | raise NotImplementedError() 45 | 46 | def compute_batch_loss(self, model_out, gt_dict): 47 | ''' 48 | compute the loss function for a single batch 49 | ''' 50 | raise NotImplementedError() 51 | 52 | def compute_loss(self, model_outs, gt_dicts): 53 | ''' 54 | compute the loss function for several batches 55 | ''' 56 | # compute losses for each batch 57 | losses = {} 58 | for dataset_key in model_outs.keys(): 59 | losses[dataset_key] = self.compute_batch_loss( 60 | model_outs[dataset_key], gt_dicts[dataset_key]) 61 | return losses 62 | 63 | def compute_batch_DOE(self, model_out, gt_dict): 64 | ''' 65 | compute the DOE for a single batch 66 | ''' 67 | raise NotImplementedError() 68 | 69 | def compute_DOE(self, model_outs): 70 | ''' 71 | compute the DOE for several batches 72 | ''' 73 | # compute losses for each batch 74 | DOE= {} 75 | for dataset_key in model_outs.keys(): 76 | DOE[dataset_key] = self.compute_batch_DOE(model_outs[dataset_key]) 77 | return DOE 78 | -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/batchsampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import copy 3 | 4 | class BatchSampler(object): 5 | """ 6 | Class used to sample a batch of demonstrations from demonstrations of multiple 7 | environments based on a distribution. 8 | Used for Teacher Student Curriculum setting in imitation learning. 9 | """ 10 | 11 | def __init__(self, demos, batch_size, seed, no_mem=False): 12 | self.num_task = len(demos) 13 | self.dist_task = np.ones(self.num_task) / self.num_task * 1.0 14 | self.demos = demos 15 | self.batch_size = batch_size 16 | self.no_mem = no_mem 17 | self.rng = np.random.RandomState(seed) 18 | 19 | self.total_demos = 0 20 | self.num_used_demos = 0 21 | self.current_demos = [None] * self.num_task 22 | self.current_ids = [None] * self.num_task 23 | for tid in range(self.num_task): 24 | self.total_demos += self.reset(tid) 25 | 26 | self.tracking_total_demos = self.total_demos 27 | 28 | def setDist(self, dist_task): 29 | self.dist_task = dist_task 30 | 31 | def reset(self, tid): 32 | np.random.shuffle(self.demos[tid]) 33 | self.current_demos[tid] = self.demos[tid] 34 | self.current_ids[tid] = 0 35 | 36 | return len(self.demos[tid]) 37 | 38 | def sample(self): 39 | 40 | batch = [] 41 | for i in range(self.batch_size): 42 | tid = self.rng.choice(range(len(self.dist_task)), p=self.dist_task) 43 | cid = self.current_ids[tid] 44 | if cid >= len(self.current_demos[tid]): 45 | self.reset(tid) 46 | cid = self.current_ids[tid] 47 | 48 | batch += [self.current_demos[tid][cid]] 49 | self.current_ids[tid] += 1 50 | 51 | if self.no_mem: 52 | batch = np.array(batch) 53 | 54 | self.num_used_demos += self.batch_size 55 | should_evaluate = self.num_used_demos >= self.tracking_total_demos 56 | if should_evaluate: 57 | self.tracking_total_demos += self.total_demos 58 | return batch, should_evaluate -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/l_class.py: -------------------------------------------------------------------------------- 1 | import gc 2 | import torch 3 | from torch import nn 4 | from torch.nn import functional as F 5 | 6 | from babyai import base 7 | from nn.enc_lang_QA import EncoderLang_QA 8 | from nn.enc_visual import FeatureFlat, SimpleEncoder 9 | from nn.enc_vl import EncoderVL 10 | # from alfred.nn.encodings import DatasetLearnedEncoding 11 | from nn.dec_QA import QAClassifier 12 | 13 | class Model(base.Model): 14 | def __init__(self, args, emb_ann_size, numb_action, pad): 15 | ''' 16 | transformer agent 17 | ''' 18 | super().__init__(args, emb_ann_size, numb_action, pad) 19 | 20 | # pre-encoder for language tokens 21 | self.encoder_lang = EncoderLang_QA(args.encoder_lang['layers'], args) 22 | 23 | # dataset id learned encoding (applied after the encoder_lang) 24 | self.dataset_enc = None 25 | 26 | # decoder parts 27 | encoder_output_size = args.demb 28 | self.dec_QA = QAClassifier(encoder_output_size, args['vocab_path']) 29 | 30 | # final touch 31 | self.init_weights() 32 | self.reset() 33 | 34 | def forward(self, vocab, **inputs): 35 | ''' 36 | forward the model for multiple time-steps (used for training) 37 | ''' 38 | # embed language 39 | indexes = torch.squeeze((inputs['questions'] == 1).nonzero(as_tuple=False)[:, 1:], dim=1) 40 | indexes_3d = torch.unsqueeze(torch.unsqueeze(indexes, dim=1), dim=1) 41 | output = {} 42 | emb_lang, lengths_lang = self.embed_lang(inputs['questions'], vocab) 43 | emb_lang = self.dataset_enc(emb_lang, vocab) if self.dataset_enc else emb_lang 44 | 45 | decoder_input = emb_lang.reshape(-1, self.args.demb) 46 | answer_flat = self.dec_QA(decoder_input) # B*language_seq x voc_size 47 | answers = answer_flat.view( 48 | *emb_lang.shape[:2], *answer_flat.shape[1:]) # B x language_seq x voc_size 49 | 50 | indices = torch.mul(indexes_3d, torch.ones((answers.shape[0], 1, answers.shape[2]), device=torch.device("cuda"))).type(torch.LongTensor).cuda() # B x 1 x voc_size 51 | answers = torch.gather(answers, 1, indices) # B x 1 x voc_size 52 | answers = answers.reshape(-1, answers.shape[2]) # B x voc_size 53 | 54 | output.update({'answers': answers}) 55 | return output 56 | 57 | def embed_lang(self, lang_pad, vocab): 58 | ''' 59 | take a list of annotation tokens and extract embeddings with EncoderLang 60 | ''' 61 | assert lang_pad.max().item() < len(vocab) 62 | embedder_lang = self.emb_ann 63 | emb_lang, lengths_lang = self.encoder_lang( 64 | lang_pad, embedder_lang, vocab, self.pad) 65 | if self.args.detach_lang_emb: 66 | emb_lang = emb_lang.clone().detach() 67 | return emb_lang, lengths_lang 68 | 69 | 70 | def reset(self): 71 | ''' 72 | reset internal states (used for real-time execution during eval) 73 | ''' 74 | self.frames_traj = torch.zeros(1, 0, *self.visual_tensor_shape) 75 | self.action_traj = torch.zeros(1, 0).long() 76 | 77 | 78 | 79 | def compute_batch_loss(self, model_out, gt_dict): 80 | ''' 81 | loss function for Seq2Seq agent 82 | ''' 83 | losses = dict() 84 | 85 | # answer classes loss 86 | answer_pred = model_out['answers'].view(-1, model_out['answers'].shape[-1]) 87 | answer_gt = gt_dict['answers'].view(-1) 88 | answer_loss = F.cross_entropy(answer_pred, answer_gt, reduction='mean') 89 | losses['answers'] = answer_loss 90 | 91 | # prediction of <> loss 92 | no_answer_pred = model_out['no_answers'].view(-1, model_out['no_answers'].shape[-1]) 93 | no_answer_gt = gt_dict['no_answers'].view(-1) 94 | no_answer_loss = F.cross_entropy(no_answer_pred, no_answer_gt, reduction='mean') 95 | losses['no_answers'] = no_answer_loss 96 | 97 | return losses 98 | 99 | 100 | def init_weights(self, init_range=0.1): 101 | ''' 102 | init embeddings uniformly 103 | ''' 104 | super().init_weights(init_range) 105 | 106 | -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/levels/__init__.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from . import iclr19_levels 4 | from . import bonus_levels 5 | from . import test_levels 6 | 7 | from .levelgen import test, level_dict 8 | -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/rl/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Lucas Willems 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/rl/__init__.py: -------------------------------------------------------------------------------- 1 | from babyai.rl.algos.ppo import PPOAlgo 2 | from babyai.rl.utils import DictList 3 | from babyai.rl.model import ACModel, RecurrentACModel, ETModel 4 | -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/rl/algos/__init__.py: -------------------------------------------------------------------------------- 1 | from babyai.rl.algos.ppo import PPOAlgo 2 | -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/rl/format.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def default_preprocess_obss(obss, device=None): 4 | return torch.tensor(obss, device=device) -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/rl/model.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod, abstractproperty 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class ACModel: 6 | recurrent = False 7 | 8 | @abstractmethod 9 | def __init__(self, obs_space, action_space): 10 | pass 11 | 12 | @abstractmethod 13 | def forward(self, obs): 14 | pass 15 | 16 | class RecurrentACModel(ACModel): 17 | recurrent = True 18 | 19 | @abstractmethod 20 | def forward(self, obs, memory): 21 | pass 22 | 23 | @property 24 | @abstractmethod 25 | def memory_size(self): 26 | pass 27 | 28 | class ETModel(nn.Module): 29 | def __init__(self, args, embs_ann, vocab_out, pad, seg): 30 | ''' 31 | Abstract model 32 | ''' 33 | nn.Module.__init__(self) 34 | self.args = args 35 | self.vocab_out = vocab_out 36 | self.pad, self.seg = pad, seg 37 | self.visual_tensor_shape = data_util.read_dataset_info( 38 | args.data['train'][0])['feat_shape'][1:] 39 | 40 | # create language and action embeddings 41 | self.embs_ann = nn.ModuleDict({}) 42 | for emb_name, emb_size in embs_ann.items(): 43 | self.embs_ann[emb_name] = nn.Embedding(emb_size, args.demb) 44 | 45 | # dropouts 46 | self.dropout_vis = nn.Dropout(args.dropout['vis'], inplace=True) 47 | self.dropout_lang = nn.Dropout2d(args.dropout['lang']) 48 | 49 | def init_weights(self, init_range=0.1): 50 | ''' 51 | init linear layers in embeddings 52 | ''' 53 | for emb_ann in self.embs_ann.values(): 54 | emb_ann.weight.data.uniform_(-init_range, init_range) 55 | 56 | 57 | def forward(self, vocab, **inputs): 58 | ''' 59 | forward the model for multiple time-steps (used for training) 60 | ''' 61 | raise NotImplementedError() -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/rl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from babyai.rl.utils.dictlist import DictList 2 | from babyai.rl.utils.penv import ParallelEnv -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/rl/utils/dictlist.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | class DictList(dict): 5 | """A dictionnary of lists of same size. Dictionnary items can be 6 | accessed using `.` notation and list items using `[]` notation. 7 | 8 | Example: 9 | >>> d = DictList({"a": [[1, 2], [3, 4]], "b": [[5], [6]]}) 10 | >>> d.a 11 | [[1, 2], [3, 4]] 12 | >>> d[0] 13 | DictList({"a": [1, 2], "b": [5]}) 14 | """ 15 | 16 | __getattr__ = dict.__getitem__ 17 | __setattr__ = dict.__setitem__ 18 | 19 | def __len__(self): 20 | return len(next(iter(dict.values(self)))) 21 | 22 | def __getitem__(self, index): 23 | return DictList({key: value[index] for key, value in dict.items(self)}) 24 | 25 | def __setitem__(self, index, d): 26 | for key, value in d.items(): 27 | dict.__getitem__(self, key)[index] = value 28 | 29 | def shuffle_lists_same_order(self): 30 | """ 31 | return the dictionnary with each list of the dictionnary shuffled such that: 32 | list_1[i]=list_2[i]=list_1[i_shuffle]=list_2[i_shuffle] 33 | 34 | Example: 35 | >>> d = DictList({"a":[1, 2, 3], "b":[4, 5, 6]}) 36 | >>> d.shuffle_lists_same_order() 37 | DictList({"a":[3, 1, 2], "b":[6, 4, 5]}) 38 | """ 39 | keys = list(dict.keys(self)) 40 | len_keys = len(keys) 41 | map_list = list(zip(*[v for v in dict.values(self)])) 42 | random.shuffle(map_list) 43 | l = list(zip(*map_list)) 44 | return DictList({keys[i]: list(l[i]) for i in range(len_keys)}) 45 | -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/rl/utils/penv.py: -------------------------------------------------------------------------------- 1 | from torch.multiprocessing import Process, Pipe 2 | import gym 3 | from tqdm import tqdm 4 | import logging 5 | import torch 6 | from tqdm import tqdm 7 | logger = logging.getLogger(__name__) 8 | import concurrent.futures 9 | 10 | # For multiprocessing 11 | def worker(conn, env): 12 | while True: 13 | cmd, data = conn.recv() 14 | if cmd == "step": 15 | obs, reward, done, info = env.step(data) 16 | if done: 17 | obs = env.reset() 18 | conn.send((obs, reward, done, info)) 19 | elif cmd == "reset": 20 | obs = env.reset() 21 | conn.send(obs) 22 | else: 23 | raise NotImplementedError 24 | 25 | # For multithreading 26 | def thread(env, cmd, *args): 27 | if cmd == "step": 28 | obs, reward, done, info = env.step(args[0]) 29 | if done: 30 | obs = env.reset() 31 | return obs, reward, done, info 32 | elif cmd == "reset": 33 | obs = env.reset() 34 | return obs 35 | else: 36 | raise NotImplementedError 37 | 38 | class ParallelEnv(gym.Env): 39 | """A concurrent execution of environments in multiple processes.""" 40 | 41 | def __init__(self, envs, use_procs=False): 42 | assert len(envs) >= 1, "No environment given." 43 | 44 | self.envs = envs 45 | self.observation_space = self.envs[0].observation_space 46 | self.action_space = self.envs[0].action_space 47 | self.use_procs = use_procs 48 | 49 | if self.use_procs: 50 | self.locals = [] 51 | self.processes = [] 52 | for env in tqdm(self.envs[1:]): 53 | local, remote = Pipe() 54 | self.locals.append(local) 55 | p = Process(target=worker, args=(remote, env)) 56 | p.daemon = True 57 | p.start() 58 | remote.close() 59 | self.processes.append(p) 60 | 61 | def reset(self): 62 | if self.use_procs: 63 | for local in self.locals: 64 | local.send(("reset", None)) 65 | proc_results = [] 66 | for local in self.locals: 67 | proc_results.append(local.recv()) 68 | results = [self.envs[0].reset()] + proc_results 69 | # results = [self.envs[0].reset()] + [local.recv() for local in self.locals] 70 | else: 71 | with concurrent.futures.ThreadPoolExecutor() as executor: 72 | futures = [executor.submit(thread, self.envs[i], "reset") for i in range(len(self.envs))] 73 | results = [f.result() for f in futures] 74 | return results 75 | 76 | def step(self, actions): 77 | if self.use_procs: 78 | for local, action in zip(self.locals, actions[1:]): 79 | local.send(("step", action)) 80 | obs, reward, done, info = self.envs[0].step(actions[0]) 81 | if done: 82 | obs = self.envs[0].reset() 83 | results = zip(*[(obs, reward, done, info)] + [local.recv() for local in self.locals]) 84 | else: 85 | with concurrent.futures.ThreadPoolExecutor(max_workers=64) as executor: 86 | futures = [executor.submit(thread, self.envs[i], "step", actions[i]) for i in range(len(self.envs))] 87 | results = [f.result() for f in futures] 88 | results = zip(*results) 89 | return results 90 | 91 | def render(self): 92 | raise NotImplementedError 93 | 94 | def __del__(self): 95 | if self.use_procs: 96 | for p in self.processes: 97 | p.terminate() -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy 4 | import torch 5 | from babyai.utils.agent import load_agent, ModelAgent, DemoAgent, BotAgent 6 | from babyai.utils.demos import ( 7 | load_demos, load_voc, save_demos, synthesize_demos, get_demos_path, get_demos_QG_path, get_demos_QG_voc_path) 8 | from babyai.utils.format import ObssPreprocessor, ObssContPreprocessor, ObssDirPreprocessor, IntObssPreprocessor, InstructionOnlyPreprocessor, get_vocab_path 9 | from babyai.utils.log import ( 10 | get_log_path, get_log_dir, synthesize, configure_logging) 11 | from babyai.utils.model import get_model_dir, load_model, save_model, load_stactpredictor_model, save_stactpredictor_model 12 | from babyai.utils.viz import watch, viz, info, clear 13 | 14 | def storage_dir(): 15 | # defines the storage directory to be in the root (Same level as babyai folder) 16 | print(os.environ) 17 | return os.environ.get("DLP_STORAGE", '') 18 | 19 | 20 | def create_folders_if_necessary(path): 21 | dirname = os.path.dirname(path) 22 | if not(os.path.isdir(dirname)): 23 | os.makedirs(dirname) 24 | 25 | 26 | def seed(seed): 27 | random.seed(seed) 28 | numpy.random.seed(seed) 29 | torch.manual_seed(seed) 30 | if torch.cuda.is_available(): 31 | torch.cuda.manual_seed_all(seed) 32 | 33 | -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/utils/log.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy 4 | import logging 5 | 6 | from .. import utils 7 | 8 | 9 | def get_log_dir(log_name): 10 | return os.path.join(utils.storage_dir(), "logs", log_name) 11 | 12 | 13 | def get_log_path(log_name): 14 | return os.path.join(get_log_dir(log_name), "log.log") 15 | 16 | 17 | def synthesize(array): 18 | import collections 19 | d = collections.OrderedDict() 20 | d["mean"] = numpy.mean(array) 21 | d["std"] = numpy.std(array) 22 | if len(array) > 0: 23 | d["min"] = numpy.amin(array) 24 | d["max"] = numpy.amax(array) 25 | else: 26 | d["min"] = numpy.nan 27 | d["max"] = numpy.nan 28 | return d 29 | 30 | 31 | def configure_logging(log_name): 32 | path = get_log_path(log_name) 33 | utils.create_folders_if_necessary(path) 34 | 35 | logging.basicConfig( 36 | level=logging.INFO, 37 | format="%(name)s: %(asctime)s: %(message)s", 38 | handlers=[ 39 | logging.FileHandler(filename=path), 40 | logging.StreamHandler(sys.stdout) 41 | ] 42 | ) 43 | -------------------------------------------------------------------------------- /babyai-text/babyai/babyai/utils/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | from .. import utils 5 | 6 | 7 | def get_model_dir(model_name): 8 | return os.path.join(utils.storage_dir(), "models", model_name) 9 | 10 | 11 | def get_model_path(model_name): 12 | return os.path.join(get_model_dir(model_name), "model.pt") 13 | 14 | def get_stactpredictor_model_path(model_name): 15 | return os.path.join(get_model_dir(model_name), "stactpredictor_model.pt") 16 | 17 | 18 | def load_model(model_name, raise_not_found=True): 19 | path = get_model_path(model_name) 20 | try: 21 | model = torch.load(path) 22 | model.eval() 23 | return model 24 | except FileNotFoundError: 25 | if raise_not_found: 26 | raise FileNotFoundError("No model found at {}".format(path)) 27 | 28 | def load_stactpredictor_model(model_name, raise_not_found=True): 29 | path = get_stactpredictor_model_path(model_name) 30 | try: 31 | model = torch.load(path) 32 | model.eval() 33 | return model 34 | except FileNotFoundError: 35 | if raise_not_found: 36 | raise FileNotFoundError("No model found at {}".format(path)) 37 | 38 | 39 | def save_model(model, model_name, writer): 40 | path = get_model_path(model_name) 41 | utils.create_folders_if_necessary(path) 42 | torch.save(model, path) 43 | if writer: 44 | writer.save(path) 45 | 46 | def save_stactpredictor_model(model, model_name, writer): 47 | path = get_stactpredictor_model_path(model_name) 48 | utils.create_folders_if_necessary(path) 49 | torch.save(model, path) 50 | if writer: 51 | writer.save(path) 52 | -------------------------------------------------------------------------------- /babyai-text/babyai/docs/codebase.md: -------------------------------------------------------------------------------- 1 | # Structure of the Codebase 2 | In `babyai`: 3 | - `levels` contains the code for all levels 4 | - `bot.py` is a heuristic stack-based bot that can solve all levels 5 | - `imitation.py` is an imitation learning implementation 6 | - `rl` contains an implementation of the Proximal Policy Optimization (PPO) RL algorithm 7 | - `model.py` contains the neural network code 8 | 9 | In `scripts`: 10 | - use `train_il.py` to train an agent with imitation learning, using demonstrations from the bot, from another agent or even provided by a human 11 | - use `train_rl.py` to train an agent with reinforcement learning 12 | - use `make_agent_demos.py` to generate demonstrations with the bot or with another agent 13 | - use `make_human_demos.py` to make and save human demonstrations 14 | - use `train_intelligent_expert.py` to train an agent with an interactive imitation learning algorithm that incrementally grows the training set by adding demonstrations for the missions that the agent currently fails 15 | - use `evaluate.py` to evaluate a trained agent 16 | - use `enjoy.py` to visualze an agent's behavior 17 | - use `manual_control.py` to visualize example missions from BabyAI levels 18 | -------------------------------------------------------------------------------- /babyai-text/babyai/docs/iclr19_levels.md: -------------------------------------------------------------------------------- 1 | # ICLR19 Levels 2 | 3 | The levels described in this file were created for the ICLR19 submission. 4 | These form a curriculum that is subdivided according to specific competencies. 5 | 6 | ## GoToObj 7 | 8 | Go to an object, inside a single room with no doors, no distractors. 9 | 10 |

11 | 12 | ## GoToRedBall 13 | 14 | Go to the red ball, single room, with obstacles. 15 | The obstacles/distractors are all the same, to eliminate 16 | perceptual complexity. 17 | 18 |

19 | 20 | ## GoToRedBallGrey 21 | 22 | Go to the red ball, single room, with obstacles. 23 | The obstacles/distractors are all grey boxes, to eliminate 24 | perceptual complexity. No unblocking required. 25 | 26 |

27 | 28 | ## GoToLocal 29 | 30 | Go to an object, inside a single room with no doors, no distractors. 31 | 32 |

33 | 34 | ## PutNextLocal 35 | 36 | Put an object next to another object, inside a single room 37 | with no doors, no distractors. 38 | 39 |

40 | 41 | ## PickUpLoc 42 | 43 | Pick up an object which may be described using its location. This is a 44 | single room environment. 45 | 46 | Competencies: PickUp, Loc. No unblocking. 47 | 48 |

49 | 50 | ## GoToObjMaze 51 | 52 | Go to an object, the object may be in another room. No distractors. 53 | 54 |

55 | 56 | ## GoTo 57 | 58 | Go to an object, the object may be in another room. Many distractors. 59 | 60 |

61 | 62 | ## Pickup 63 | 64 | Pick up an object, the object may be in another room. 65 | 66 |

67 | 68 | ## UnblockPickup 69 | 70 | Pick up an object, the object may be in another room. The path may 71 | be blocked by one or more obstructors. 72 | 73 |

74 | 75 | ## Open 76 | 77 | Open a door, which may be in another room. 78 | 79 |

80 | 81 | ## Unlock 82 | 83 | Maze environment where the agent has to retrieve a key to open a locked door. 84 | 85 | Competencies: Maze, Open, Unlock. No unblocking. 86 | 87 |

88 | 89 | ## PutNext 90 | 91 | Put an object next to another object. Either of these may be in another room. 92 | 93 |

94 | 95 | ## Synth 96 | 97 | Union of all instructions from PutNext, Open, Goto and PickUp. The agent 98 | may need to move objects around. The agent may have to unlock the door, 99 | but only if it is explicitly referred by the instruction. 100 | 101 | Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open 102 | 103 |

104 | 105 | ## SynthLoc 106 | 107 | Like Synth, but a significant share of object descriptions involves 108 | location language like in PickUpLoc. No implicit unlocking. 109 | Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc 110 | 111 |

112 | 113 | ## GoToSeq 114 | 115 | Sequencing of go-to-object commands. 116 | 117 | Competencies: Maze, GoTo, Seq. No locked room. No locations. No unblocking. 118 | 119 |

120 | 121 | ## SynthSeq 122 | 123 | Like SynthLoc, but now with multiple commands, combined just like in GoToSeq. 124 | 125 | Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc, Seq. No implicit unlocking. 126 | 127 |

128 | 129 | ## GoToImpUnlock 130 | 131 | Go to an object, which may be in a locked room. No unblocking. 132 | 133 | Competencies: Maze, GoTo, ImpUnlock 134 | 135 |

136 | 137 | ## BossLevel 138 | 139 | Command can be any sentence drawn from the Baby Language grammar. Union of 140 | all competencies. This level is a superset of all other levels. 141 | 142 |

143 | -------------------------------------------------------------------------------- /babyai-text/babyai/docs/train-eval.md: -------------------------------------------------------------------------------- 1 | # Training 2 | 3 | To train an RL agent run e.g. 4 | 5 | ``` 6 | scripts/train_rl.py --env BabyAI-GoToLocal-v0 7 | ``` 8 | 9 | Folders `logs/` and `models/` will be created in the current directory. The default name 10 | for the model is chosen based on the level name, the current time and the other settings (e.g. 11 | `BabyAI-GoToLocal-v0_ppo_expert_filmcnn_gru_mem_seed1_18-10-12-12-45-02`). You can also choose the model 12 | name by setting `--model`. After 5 hours of training you should be getting a success rate of 97-99\%. 13 | A machine readable log can be found in `logs//log.csv`, a human readable in `logs//log.log`. 14 | 15 | To train an agent with IL (imitation learning) first make sure that you have your demonstrations in 16 | `demos/` (Instructions to load the demos are present [here](demo-dataset.md)). Then run e.g. 17 | 18 | ``` 19 | scripts/train_il.py --env BabyAI-GoToLocal-v0 --demos 20 | ``` 21 | 22 | In the example above we run scripts from the root of the repository, but if you have installed BabyAI as 23 | described above, you can also run all scripts with commands like `/scripts/train_il.py`. 24 | 25 | # Evaluation 26 | 27 | In the same directory where you trained your model run e.g. 28 | 29 | ``` 30 | scripts/evaluate.py --env BabyAI-GoToLocal-v0 --model 31 | ``` 32 | 33 | to evaluate the performance of your model named `` on 1000 episodes. If you want to see 34 | your agent performing, run 35 | 36 | ``` 37 | scripts/enjoy.py --env BabyAI-GoToLocal-v0 --model 38 | ``` -------------------------------------------------------------------------------- /babyai-text/babyai/docs/troubleshooting.md: -------------------------------------------------------------------------------- 1 | # Troubleshooting 2 | 3 | If you run into error messages relating to OpenAI gym or PyQT, it may be that the version of those libraries that you have installed is incompatible. You can try upgrading specific libraries with pip3, eg: `pip3 install --upgrade gym`. If the problem persists, please [open an issue](https://github.com/mila-iqia/babyai/issues/new) on this repository and paste a *complete* error message, along with some information about your platform (are you running Windows, Mac, Linux? Are you running this on a Mila machine?). 4 | 5 | ## If you cannot install PyQT 6 | 7 | If you cannot install PyQT using pip, another option is to install it using conda instead: 8 | 9 | ``` 10 | conda install -c anaconda pyqt 11 | ``` 12 | 13 | Alternatively, it is also possible to install PyQT5 manually: 14 | 15 | ``` 16 | wget https://files.pythonhosted.org/packages/98/61/fcd53201a23dd94a1264c29095821fdd55c58b4cd388dc7115e5288866db/PyQt5-5.12.1-5.12.2-cp35.cp36.cp37.cp38-abi3-manylinux1_x86_64.whl 17 | PYTHONPATH="" 18 | pip3 install --user PyQt5-5.12.1-5.12.2-cp35.cp36.cp37.cp38-abi3-manylinux1_x86_64.whl 19 | ``` 20 | 21 | Finally, if none of the above options work, note that PyQT is only needed to produce graphics for human viewing, and isn't needed during training. As such, it's possible to install BabyAI without PyQT and train a policy. To do so, you can comment out the `gym_minigrid` dependency in `setup.py`, clone the [gym-minigrid repository](https://github.com/maximecb/gym-minigrid) manually, and comment out the `pyqt5` dependency in the `setup.py` of the minigrid repository. 22 | -------------------------------------------------------------------------------- /babyai-text/babyai/environment.yaml: -------------------------------------------------------------------------------- 1 | name: babyai 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - python=3.6 7 | - pytorch=1.4 8 | - numpy 9 | - blosc 10 | - pip 11 | - pip: 12 | - gym 13 | - scikit-build 14 | -------------------------------------------------------------------------------- /babyai-text/babyai/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/babyai/nn/__init__.py -------------------------------------------------------------------------------- /babyai-text/babyai/nn/dec_QA.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle as pkl 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | class QAClassifier(nn.Module): 8 | ''' 9 | object classifier module (a single FF layer) 10 | ''' 11 | def __init__(self, input_size, vocab_path): 12 | super().__init__() 13 | with open(vocab_path, 'rb') as filehandle: 14 | # read the data as binary data stream 15 | vocab_list = pkl.load(filehandle)['answer'] 16 | num_classes = len(vocab_list) 17 | self.linear = nn.Linear(input_size, num_classes) 18 | 19 | def forward(self, x): 20 | out = self.linear(x) 21 | return out -------------------------------------------------------------------------------- /babyai-text/babyai/nn/enc_lang.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | from torch import nn 5 | from torch.nn import functional as F 6 | from torch.nn.utils.rnn import pad_sequence 7 | 8 | from nn.encodings import PosLangEncoding, InstrLangEncoding 9 | 10 | 11 | class EncoderLang(nn.Module): 12 | def __init__(self, num_layers, args, 13 | subgoal_token='<>', goal_token='<>'): 14 | ''' 15 | transformer encoder for language inputs 16 | ''' 17 | super(EncoderLang, self).__init__() 18 | self.subgoal_token = subgoal_token 19 | self.goal_token = goal_token 20 | 21 | # transformer layers 22 | encoder_layer = nn.TransformerEncoderLayer( 23 | args.demb, args.encoder_heads, args.demb, 24 | args.dropout['transformer']['encoder']) 25 | if args.encoder_lang['shared']: 26 | enc_transformer = nn.TransformerEncoder( 27 | encoder_layer, num_layers) 28 | self.enc_transformers = enc_transformer 29 | else: 30 | self.enc_transformers = nn.TransformerEncoder( 31 | encoder_layer, num_layers) 32 | 33 | # encodings 34 | self.enc_pos = PosLangEncoding(args.demb) if args.encoder_lang['pos_enc'] else None 35 | self.enc_instr = InstrLangEncoding(args.demb) if args.encoder_lang['instr_enc'] else None 36 | self.enc_layernorm = nn.LayerNorm(args.demb) 37 | self.enc_dropout = nn.Dropout(args.dropout['lang'], inplace=True) 38 | 39 | def forward(self, lang_pad, embedder, vocab, pad): 40 | ''' 41 | pass embedded inputs through embeddings and encode them using a transformer 42 | ''' 43 | # pad the input language sequences and embed them with a linear layer 44 | mask_pad = (lang_pad == pad) 45 | emb_lang = embedder(lang_pad) 46 | # add positional encodings 47 | mask_token = EncoderLang.mask_token( 48 | lang_pad, vocab, {self.subgoal_token, self.goal_token}) 49 | emb_lang = self.encode_inputs(emb_lang, mask_token, mask_pad) 50 | # pass the inputs through the encoder 51 | hiddens = EncoderLang.encoder( 52 | self.enc_transformers, emb_lang, mask_pad, vocab) 53 | lengths = (lang_pad != pad).sum(dim=1) 54 | return hiddens, lengths 55 | 56 | @staticmethod 57 | def mask_token(lang_pad, vocab, tokens): 58 | ''' 59 | returns mask of the tokens 60 | ''' 61 | tokens_mask = torch.zeros_like(lang_pad).long() 62 | for token in tokens: 63 | tokens_mask += lang_pad == vocab.word2index(token) 64 | return tokens_mask.bool() 65 | 66 | @staticmethod 67 | def encoder(encoders, emb_lang, mask_pad, mask_attn=None): 68 | ''' 69 | compute encodings for all tokens using a normal flat encoder 70 | ''' 71 | # skip mask: mask padded words 72 | if mask_attn is None: 73 | # attention mask: all tokens can attend to all others 74 | mask_attn = torch.zeros( 75 | (mask_pad.shape[1], mask_pad.shape[1]), device=mask_pad.device).float() 76 | # encode the inputs 77 | output = encoders( 78 | emb_lang.transpose(0, 1), 79 | mask_attn, 80 | mask_pad).transpose(0, 1) 81 | return output 82 | 83 | def encode_inputs(self, emb_lang, mask_token, mask_pad): 84 | ''' 85 | add positional encodings, apply layernorm and dropout 86 | ''' 87 | emb_lang = self.enc_pos(emb_lang) if self.enc_pos else emb_lang 88 | emb_lang = self.enc_instr(emb_lang, mask_token) if self.enc_instr else emb_lang 89 | emb_lang = self.enc_dropout(emb_lang) 90 | emb_lang = self.enc_layernorm(emb_lang) 91 | return emb_lang 92 | -------------------------------------------------------------------------------- /babyai-text/babyai/nn/enc_lang_QA.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | from torch import nn 5 | from torch.nn import functional as F 6 | from torch.nn.utils.rnn import pad_sequence 7 | 8 | from nn.enc_lang import EncoderLang 9 | from nn.encodings import PosLangEncoding, InstrLangEncoding 10 | 11 | class EncoderLang_QA(EncoderLang): 12 | def __init__(self, num_layers, args, 13 | subgoal_token='<>', goal_token='<>', question_token='<>'): 14 | ''' 15 | transformer encoder for language inputs 16 | ''' 17 | super(EncoderLang_QA, self).__init__(num_layers, args) 18 | self.subgoal_token = subgoal_token 19 | self.goal_token = goal_token 20 | self.question_token = question_token 21 | 22 | # transofmer layers 23 | encoder_layer = nn.TransformerEncoderLayer( 24 | args.demb, args.encoder_heads, args.demb, 25 | args.dropout['transformer']['encoder']) 26 | if args.encoder_lang['shared']: 27 | enc_transformer = nn.TransformerEncoder( 28 | encoder_layer, num_layers) 29 | self.enc_transformers = enc_transformer 30 | else: 31 | self.enc_transformers = nn.TransformerEncoder( 32 | encoder_layer, num_layers) 33 | 34 | # encodings 35 | self.enc_pos = PosLangEncoding(args.demb) if args.encoder_lang['pos_enc'] else None 36 | self.enc_instr = InstrLangEncoding(args.demb) if args.encoder_lang['instr_enc'] else None 37 | self.enc_layernorm = nn.LayerNorm(args.demb) 38 | self.enc_dropout = nn.Dropout(args.dropout['lang'], inplace=True) 39 | 40 | def forward(self, lang_pad, embedder, vocab, pad): 41 | ''' 42 | pass embedded inputs through embeddings and encode them using a transformer 43 | ''' 44 | # pad the input language sequences and embed them with a linear layer 45 | 46 | mask_pad = (lang_pad == pad) 47 | emb_lang = embedder(lang_pad) 48 | # add positional encodings 49 | mask_token = EncoderLang.mask_token( 50 | lang_pad, vocab, {self.question_token}) 51 | 52 | emb_lang = self.encode_inputs(emb_lang, mask_token, mask_pad) 53 | # pass the inputs through the encoder 54 | hiddens = EncoderLang.encoder( 55 | self.enc_transformers, emb_lang, mask_pad) 56 | 57 | lengths = (lang_pad != pad).sum(dim=1) 58 | 59 | return hiddens, lengths 60 | 61 | @staticmethod 62 | def mask_token(lang_pad, vocab, tokens): 63 | ''' 64 | returns mask of the tokens 65 | ''' 66 | tokens_mask = torch.zeros_like(lang_pad).long() 67 | for token in tokens: 68 | tokens_mask += lang_pad == vocab.word2index(token) 69 | return tokens_mask.bool() 70 | 71 | @staticmethod 72 | def encoder(encoders, emb_lang, mask_pad, mask_attn=None): 73 | ''' 74 | compute encodings for all tokens using a normal flat encoder 75 | ''' 76 | # skip mask: mask padded words 77 | if mask_attn is None: 78 | # attention mask: all tokens can attend to all others 79 | mask_attn = torch.zeros( 80 | (mask_pad.shape[1], mask_pad.shape[1]), device=mask_pad.device).float() 81 | # encode the inputs 82 | output = encoders( 83 | emb_lang.transpose(0, 1), 84 | mask_attn, 85 | mask_pad).transpose(0, 1) 86 | return output 87 | 88 | def encode_inputs(self, emb_lang, mask_token, mask_pad): 89 | ''' 90 | add positional encodings, apply layernorm and dropout 91 | ''' 92 | emb_lang = self.enc_pos(emb_lang) if self.enc_pos else emb_lang 93 | emb_lang = self.enc_instr(emb_lang, mask_token) if self.enc_instr else emb_lang 94 | emb_lang = self.enc_dropout(emb_lang) 95 | emb_lang = self.enc_layernorm(emb_lang) 96 | return emb_lang 97 | -------------------------------------------------------------------------------- /babyai-text/babyai/nn/enc_visual.py: -------------------------------------------------------------------------------- 1 | import os 2 | import types 3 | import torch 4 | import contextlib 5 | import numpy as np 6 | import torch.nn as nn 7 | import PIL 8 | 9 | from PIL import Image 10 | from torchvision import models 11 | from torchvision.transforms import functional as F 12 | 13 | from nn.transforms import Transforms 14 | 15 | class Resnet18(nn.Module): 16 | ''' 17 | pretrained Resnet18 from torchvision 18 | ''' 19 | def __init__(self, 20 | device, 21 | checkpoint_path=None, 22 | share_memory=False): 23 | super().__init__() 24 | self.device = device 25 | self.model = models.resnet18(pretrained=True) 26 | self.model = nn.Sequential(*list(self.model.children())[:-3]) 27 | '''if checkpoint_path is not None: 28 | print('Loading ResNet checkpoint from {}'.format(checkpoint_path)) 29 | model_state_dict = torch.load(checkpoint_path, map_location=device) 30 | model_state_dict = { 31 | key: value for key, value in model_state_dict.items() 32 | if 'GU_' not in key and 'text_pooling' not in key} 33 | model_state_dict = { 34 | key: value for key, value in model_state_dict.items() 35 | if 'fc.' not in key} 36 | model_state_dict = { 37 | key.replace('resnet.', ''): value 38 | for key, value in model_state_dict.items()} 39 | self.model.load_state_dict(model_state_dict) 40 | self.model = self.model.to(torch.device(device))''' 41 | 42 | if self.device == 'cuda': 43 | self.model.cuda() 44 | self.model = self.model.eval() 45 | if share_memory: 46 | self.model.share_memory() 47 | self._transform = Transforms.get_transform('default') 48 | 49 | def extract(self, x): 50 | # small image returned by RGBImgPartialObsWrapper transform with resize not necessary 51 | x = torch.stack([self._transform(Image.fromarray(i.astype('uint8'), 'RGB')).to(torch.device(self.device)) for i in x]) 52 | # x_tensor = torch.tensor(x, dtype=torch.float32) 53 | return self.model(x) 54 | 55 | class FeatureFlat(nn.Module): 56 | ''' 57 | a few conv layers to flatten features that come out of ResNet 58 | ''' 59 | def __init__(self, input_shape, output_size): 60 | super().__init__() 61 | if input_shape[0] == -1: 62 | input_shape = input_shape[1:] 63 | layers, activation_shape = self.init_cnn( 64 | input_shape, channels=[256, 64], kernels=[1, 1], paddings=[0, 0]) 65 | layers += [ 66 | Flatten(), nn.Linear(np.prod(activation_shape), output_size)] 67 | self.layers = nn.Sequential(*layers) 68 | 69 | def init_cnn(self, input_shape, channels, kernels, paddings): 70 | layers = [] 71 | planes_in, spatial = input_shape[0], input_shape[-1] 72 | for planes_out, kernel, padding in zip(channels, kernels, paddings): 73 | # do not use striding 74 | stride = 1 75 | layers += [ 76 | nn.Conv2d(planes_in, planes_out, kernel_size=kernel, 77 | stride=stride, padding=padding), 78 | nn.BatchNorm2d(planes_out), nn.ReLU(inplace=True)] 79 | planes_in = planes_out 80 | 81 | spatial = ((spatial - kernel + 2 * padding) // stride) + 1 82 | activation_shape = (planes_in, spatial, spatial) 83 | 84 | return layers, activation_shape 85 | 86 | def forward(self, frames): 87 | activation = self.layers(frames) 88 | return activation 89 | 90 | 91 | class Flatten(nn.Module): 92 | def forward(self, x): 93 | return x.view(x.size(0), -1) 94 | 95 | class SimpleEncoder(nn.Module): 96 | ''' 97 | a simple image encoder that is not pretrained to replace the use of resnet18 98 | ''' 99 | def __init__(self): 100 | super().__init__() 101 | self.image_conv = nn.Sequential( 102 | nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(2, 2), padding=1), 103 | nn.BatchNorm2d(128), 104 | nn.ReLU(), 105 | nn.MaxPool2d(kernel_size=(2, 2), stride=2), 106 | nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1), 107 | nn.BatchNorm2d(128), 108 | nn.ReLU(), 109 | nn.MaxPool2d(kernel_size=(2, 2), stride=2) 110 | ) 111 | def forward(self, frame): 112 | frame_extracted = self.image_conv(frame) 113 | return frame_extracted 114 | -------------------------------------------------------------------------------- /babyai-text/babyai/nn/enc_vl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import nn.model_util as model_util 4 | from nn.encodings import PosEncoding, PosLearnedEncoding, TokenLearnedEncoding 5 | 6 | 7 | class EncoderVL(nn.Module): 8 | def __init__(self, args): 9 | ''' 10 | transformer encoder for language, frames and action inputs 11 | ''' 12 | super(EncoderVL, self).__init__() 13 | 14 | # transofmer layers 15 | encoder_layer = nn.TransformerEncoderLayer( 16 | args.demb, args.encoder_heads, args.demb, 17 | args.dropout['transformer']['encoder']) 18 | self.enc_transformer = nn.TransformerEncoder( 19 | encoder_layer, args.encoder_layers) 20 | 21 | # how many last actions to attend to 22 | self.num_input_actions = args.num_input_actions 23 | 24 | # encodings 25 | self.enc_pos = PosEncoding(args.demb) if args.enc['pos'] else None 26 | self.enc_pos_learn = PosLearnedEncoding(args.demb) if args.enc['pos_learn'] else None 27 | self.enc_token = TokenLearnedEncoding(args.demb) if args.enc['token'] else None 28 | self.enc_layernorm = nn.LayerNorm(args.demb) 29 | self.enc_dropout = nn.Dropout(args.dropout['emb'], inplace=True) 30 | 31 | def forward(self, 32 | emb_lang, 33 | emb_frames, 34 | emb_actions, 35 | lengths_lang, 36 | lengths_frames, 37 | lengths_actions, 38 | length_frames_max, 39 | attn_masks=True): 40 | ''' 41 | pass embedded inputs through embeddings and encode them using a transformer 42 | ''' 43 | # emb_lang is processed on each GPU separately so they size can vary 44 | length_lang_max = lengths_lang.max().item() 45 | emb_lang = emb_lang[:, :length_lang_max] 46 | # create a mask for padded elements 47 | length_mask_pad = length_lang_max + length_frames_max * ( 48 | 2 if lengths_actions.max() > 0 else 1) 49 | mask_pad = torch.zeros( 50 | (len(emb_lang), length_mask_pad), device=emb_lang.device).bool() 51 | for i, (len_l, len_f, len_a) in enumerate( 52 | zip(lengths_lang, lengths_frames, lengths_actions)): 53 | # mask padded words 54 | mask_pad[i, len_l: length_lang_max] = True 55 | # mask padded frames 56 | mask_pad[i, length_lang_max + len_f: 57 | length_lang_max + length_frames_max] = True 58 | # mask padded actions 59 | mask_pad[i, length_lang_max + length_frames_max + len_a:] = True 60 | 61 | # encode the inputs 62 | emb_all = self.encode_inputs( 63 | emb_lang, emb_frames, emb_actions, lengths_lang, lengths_frames, mask_pad) 64 | 65 | # create a mask for attention (prediction at t should not see frames at >= t+1) 66 | if attn_masks: 67 | # assert length_frames_max == max(lengths_actions) 68 | mask_attn = model_util.generate_attention_mask( 69 | length_lang_max, length_frames_max, 70 | emb_all.device, self.num_input_actions) 71 | else: 72 | # allow every token to attend to all others 73 | mask_attn = torch.zeros( 74 | (mask_pad.shape[1], mask_pad.shape[1]), 75 | device=mask_pad.device).float() 76 | 77 | # encode the inputs 78 | output = self.enc_transformer( 79 | emb_all.transpose(0, 1), mask_attn, mask_pad).transpose(0, 1) 80 | return output, mask_pad 81 | 82 | def encode_inputs(self, emb_lang, emb_frames, emb_actions, 83 | lengths_lang, lengths_frames, mask_pad): 84 | ''' 85 | add encodings (positional, token and so on) 86 | ''' 87 | if self.enc_pos is not None: 88 | emb_lang, emb_frames, emb_actions = self.enc_pos( 89 | emb_lang, emb_frames, emb_actions, lengths_lang, lengths_frames) 90 | if self.enc_pos_learn is not None: 91 | emb_lang, emb_frames, emb_actions = self.enc_pos_learn( 92 | emb_lang, emb_frames, emb_actions, lengths_lang, lengths_frames) 93 | if self.enc_token is not None: 94 | emb_lang, emb_frames, emb_actions = self.enc_token( 95 | emb_lang, emb_frames, emb_actions) 96 | emb_cat = torch.cat((emb_lang, emb_frames, emb_actions), dim=1) 97 | emb_cat = self.enc_layernorm(emb_cat) 98 | emb_cat = self.enc_dropout(emb_cat) 99 | return emb_cat 100 | -------------------------------------------------------------------------------- /babyai-text/babyai/nn/transforms.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | import random 3 | import math 4 | import torch 5 | 6 | from torchvision import transforms 7 | 8 | 9 | class Transforms(object): 10 | @staticmethod 11 | def resize(img_size=224): 12 | # expects a PIL Image 13 | return transforms.Resize((img_size, img_size)) 14 | 15 | @staticmethod 16 | def affine(degree=5, translate=0.04, scale=0.02): 17 | # expects a PIL Image 18 | return transforms.RandomAffine( 19 | degrees=(-degree, degree), 20 | translate=(translate, translate), 21 | scale=(1-scale, 1+scale), 22 | shear=None) 23 | 24 | @staticmethod 25 | def random_crop(img_size=224): 26 | # expects a PIL Image 27 | return transforms.RandomCrop((img_size, img_size)) 28 | 29 | @staticmethod 30 | def normalize(): 31 | # expects a PIL Image 32 | return transforms.Compose([ 33 | transforms.ToTensor(), 34 | transforms.Normalize( 35 | mean=[0.485, 0.456, 0.406], 36 | std=[0.229, 0.224, 0.225], 37 | ) 38 | ]) 39 | 40 | @staticmethod 41 | def cutout(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0.): 42 | # expects a tensor 43 | return transforms.RandomErasing( 44 | p=p, scale=scale, ratio=ratio, value=value) 45 | 46 | @staticmethod 47 | def get_transform(transform='default'): 48 | if transform == 'default': 49 | return transforms.Compose([ 50 | Transforms.resize(224), 51 | Transforms.normalize()]) 52 | 53 | elif transform == 'none': 54 | return transforms.ToTensor() 55 | elif transform == 'crops': 56 | return transforms.Compose([ 57 | Transforms.resize(240), 58 | Transforms.random_crop(224), 59 | Transforms.normalize()]) 60 | elif transform == 'cutout': 61 | return transforms.Compose([ 62 | Transforms.resize(224), 63 | Transforms.normalize(), 64 | Transforms.cutout()]) 65 | elif transform == 'affine': 66 | return transforms.Compose([ 67 | Transforms.resize(224), 68 | Transforms.affine(), 69 | Transforms.normalize()]) 70 | elif transform == 'affine_crops': 71 | return transforms.Compose([ 72 | Transforms.resize(240), 73 | Transforms.random_crop(224), 74 | Transforms.affine(), 75 | Transforms.normalize()]) 76 | elif transform == 'affine_crops_cutout': 77 | return transforms.Compose([ 78 | Transforms.resize(240), 79 | Transforms.random_crop(224), 80 | Transforms.affine(), 81 | Transforms.normalize(), 82 | Transforms.cutout()]) 83 | elif transform == 'affine_cutout': 84 | return transforms.Compose([ 85 | Transforms.resize(224), 86 | Transforms.affine(), 87 | Transforms.normalize(), 88 | Transforms.cutout()]) 89 | else: 90 | raise ValueError('Image augmentation {} is not implemented'.format(transform)) 91 | -------------------------------------------------------------------------------- /babyai-text/babyai/run_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Run basic BabyAI level tests 5 | Note: there are other automated tests in .circleci/config.yml 6 | """ 7 | 8 | from babyai import levels 9 | 10 | # NOTE: please make sure that tests are always deterministic 11 | 12 | print('Testing levels, mission generation') 13 | levels.test() 14 | -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/.gitignore: -------------------------------------------------------------------------------- 1 | *__pycache__ 2 | *egg-info 3 | *.sh 4 | !run_slurm.sh -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/GPR.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from sklearn.gaussian_process import GaussianProcessRegressor 4 | from sklearn.gaussian_process.kernels import RBF 5 | 6 | SR = np.array([0.73, 0.73, 0.73, 0.73, 7 | 0.66, 0.66, 0.66, 0.66, 8 | 0.56, 0.56, 0.56, 0.56, 9 | 0.41, 0.41, 0.41, 0.41, 10 | 0.250, 0.250, 0.250, 0.250]).reshape(-1, 1) 11 | SE = np.array([0.560, 0.548, 0.573, 0.556, 12 | 0.555, 0.556, 0.568, 0.577, 13 | 0.557, 0.563, 0.529, 0.538, 14 | 0.501, 0.488, 0.452, 0.481, 15 | 0.192, 0.214, 0.206, 0.132]).reshape(-1, 1) 16 | print(SR) 17 | print(SE) 18 | kernel = RBF(length_scale_bounds=(1e-05, 100000.0)) 19 | 20 | """alpha_step = np.arange(1e-4, 5e-3, 2e-4) 21 | score = np.zeros_like(alpha_step) 22 | for a in range(len(alpha_step)): 23 | gpr = GaussianProcessRegressor(alpha=alpha_step[a], kernel=kernel, random_state=0).fit(SR, SE) 24 | score[a] = gpr.score(SR, SE) 25 | 26 | plt.plot(alpha_step, score) 27 | plt.show()""" 28 | 29 | gpr = GaussianProcessRegressor(alpha=0.0005, kernel=kernel, random_state=0).fit(SR, SE) 30 | print(gpr.score(SR, SE)) 31 | SR_pred = np.arange(0.250, 0.73, 0.001) 32 | SE_mean, SE_std = gpr.predict(SR_pred.reshape(-1, 1), return_std=True) 33 | SE_mean = SE_mean.reshape(480, ) 34 | plt.scatter(SR, SE, label="Observations") 35 | plt.plot(SR_pred, SE_mean) 36 | plt.fill_between(SR_pred, 37 | SE_mean + 1.96 * SE_std, 38 | SE_mean - 1.96 * SE_std, 39 | alpha=0.5, 40 | label=r"95% confidence interval") 41 | plt.ylabel("Sample Efficiency") 42 | plt.xlabel("Success rate of the QA") 43 | plt.legend() 44 | plt.show() 45 | 46 | high_curve = SE_mean + 1.96 * SE_std 47 | valid_idx = np.where(high_curve >= 0.5)[0][0] 48 | print(SR_pred[valid_idx]) 49 | 50 | low_curve = SE_mean - 1.96 * SE_std 51 | valid_idx = np.where(low_curve >= 0.5)[0][0] 52 | print(SR_pred[valid_idx]) 53 | """print(gpr.get_params(deep=True)) 54 | SR_min = np.arange(0.250, 0.73, 0.001) 55 | proba_SR_min = [] 56 | len_SR_min = len(SR_min) 57 | 58 | print("proba inferior: {}".format(len(SE_pred[SE_pred < 0.5])/len(SE_pred))) 59 | 60 | for i in range(1, 481): 61 | proba_inferior = (len(SE_pred[SE_pred < 0.5])/len(SE_pred))**(i-1) 62 | proba_superior = len(SE_pred[SE_pred > 0.5])/len(SE_pred) 63 | proba_SR_min.append(proba_inferior*proba_superior) 64 | len(SR_min) 65 | print(len(proba_SR_min)) 66 | plt.plot(SR_min, np.array(proba_SR_min)) 67 | plt.show()""" 68 | -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/babyai/scripts/__init__.py -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/compute_possible_instructions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Compute the number of possible instructions in the BabyAI grammar. 5 | """ 6 | 7 | from gym_minigrid.minigrid import COLOR_NAMES 8 | 9 | def count_Sent(): 10 | return ( 11 | count_Sent1() + 12 | # Sent1, then Sent1 13 | count_Sent1() * count_Sent1() + 14 | # Sent1 after you Sent1 15 | count_Sent1() * count_Sent1() 16 | ) 17 | 18 | def count_Sent1(): 19 | return ( 20 | count_Clause() + 21 | # Clause and Clause 22 | count_Clause() * count_Clause() 23 | ) 24 | 25 | def count_Clause(): 26 | return ( 27 | # go to 28 | count_Descr() + 29 | # pick up 30 | count_DescrNotDoor() + 31 | # open 32 | count_DescrDoor() + 33 | # put next 34 | count_DescrNotDoor() * count_Descr() 35 | ) 36 | 37 | def count_DescrDoor(): 38 | # (the|a) Color door Location 39 | return 2 * count_Color() * count_LocSpec() 40 | def count_DescrBall(): 41 | return count_DescrDoor() 42 | def count_DescrBox(): 43 | return count_DescrDoor() 44 | def count_DescrKey(): 45 | return count_DescrDoor() 46 | def count_Descr(): 47 | return count_DescrDoor() + count_DescrBall() + count_DescrBox() + count_DescrKey() 48 | def count_DescrNotDoor(): 49 | return count_DescrBall() + count_DescrBox() + count_DescrKey() 50 | 51 | def count_Color(): 52 | # Empty string or color 53 | return len([None] + COLOR_NAMES) 54 | 55 | def count_LocSpec(): 56 | # Empty string or location 57 | return len([None, 'left', 'right', 'front', 'behind']) 58 | 59 | print('DescrKey: ', count_DescrKey()) 60 | print('Descr: ', count_Descr()) 61 | print('DescrNotDoor: ', count_DescrNotDoor()) 62 | print('Clause: ', count_Clause()) 63 | print('Sent1: ', count_Sent1()) 64 | print('Sent: ', count_Sent()) 65 | print('Sent: {:.3g}'.format(count_Sent())) 66 | -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/enjoy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Visualize the performance of a model on a given environment. 5 | """ 6 | 7 | import argparse 8 | import gym 9 | import time 10 | 11 | import babyai.utils as utils 12 | 13 | # Parse arguments 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("--env", required=True, 17 | help="name of the environment to be run (REQUIRED)") 18 | parser.add_argument("--model", default=None, 19 | help="name of the trained model (REQUIRED or --demos-origin or --demos REQUIRED)") 20 | parser.add_argument("--demos", default=None, 21 | help="demos filename (REQUIRED or --model demos-origin required)") 22 | parser.add_argument("--demos-origin", default=None, 23 | help="origin of the demonstrations: human | agent (REQUIRED or --model or --demos REQUIRED)") 24 | parser.add_argument("--seed", type=int, default=None, 25 | help="random seed (default: 0 if model agent, 1 if demo agent)") 26 | parser.add_argument("--argmax", action="store_true", default=False, 27 | help="action with highest probability is selected for model agent") 28 | parser.add_argument("--pause", type=float, default=0.1, 29 | help="the pause between two consequent actions of an agent") 30 | parser.add_argument("--manual-mode", action="store_true", default=False, 31 | help="Allows you to take control of the agent at any point of time") 32 | 33 | args = parser.parse_args() 34 | 35 | action_map = { 36 | "LEFT" : "left", 37 | "RIGHT" : "right", 38 | "UP" : "forward", 39 | "PAGE_UP": "pickup", 40 | "PAGE_DOWN": "drop", 41 | "SPACE": "toggle" 42 | } 43 | 44 | assert args.model is not None or args.demos is not None, "--model or --demos must be specified." 45 | if args.seed is None: 46 | args.seed = 0 if args.model is not None else 1 47 | 48 | # Set seed for all randomness sources 49 | 50 | utils.seed(args.seed) 51 | 52 | # Generate environment 53 | 54 | env = gym.make(args.env) 55 | env.seed(args.seed) 56 | 57 | global obs 58 | obs = env.reset() 59 | print("Mission: {}".format(obs["mission"])) 60 | 61 | # Define agent 62 | agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, args.env) 63 | 64 | # Run the agent 65 | 66 | done = True 67 | 68 | action = None 69 | 70 | def keyDownCb(keyName): 71 | global obs 72 | # Avoiding processing of observation by agent for wrong key clicks 73 | if keyName not in action_map and keyName != "RETURN": 74 | return 75 | 76 | agent_action = agent.act(obs)['action'] 77 | 78 | if keyName in action_map: 79 | action = env.actions[action_map[keyName]] 80 | 81 | elif keyName == "RETURN": 82 | action = agent_action 83 | 84 | obs, reward, done, _ = env.step(action) 85 | agent.analyze_feedback(reward, done) 86 | if done: 87 | print("Reward:", reward) 88 | obs = env.reset() 89 | print("Mission: {}".format(obs["mission"])) 90 | 91 | step = 0 92 | episode_num = 0 93 | while True: 94 | time.sleep(args.pause) 95 | renderer = env.render("human") 96 | if args.manual_mode and renderer.window is not None: 97 | renderer.window.setKeyDownCb(keyDownCb) 98 | else: 99 | result = agent.act(obs) 100 | obs, reward, done, _ = env.step(result['action']) 101 | agent.analyze_feedback(reward, done) 102 | if 'dist' in result and 'value' in result: 103 | dist, value = result['dist'], result['value'] 104 | dist_str = ", ".join("{:.4f}".format(float(p)) for p in dist.probs[0]) 105 | print("step: {}, mission: {}, dist: {}, entropy: {:.2f}, value: {:.2f}".format( 106 | step, obs["mission"], dist_str, float(dist.entropy()), float(value))) 107 | else: 108 | print("step: {}, mission: {}".format(step, obs['mission'])) 109 | if done: 110 | print("Reward:", reward) 111 | episode_num += 1 112 | env.seed(args.seed + episode_num) 113 | obs = env.reset() 114 | agent.on_reset() 115 | step = 0 116 | else: 117 | step += 1 118 | 119 | if renderer.window is None: 120 | break 121 | -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/evaluate_all_demos.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to evaluate all available demos. 3 | 4 | Assumes all demos (human and agent, except the "valid" ones) 5 | are generated with seed 1 6 | """ 7 | 8 | import os 9 | from subprocess import call 10 | import sys 11 | 12 | import babyai.utils as utils 13 | 14 | folder = os.path.join(utils.storage_dir(), "demos") 15 | for filename in sorted(os.listdir(folder)): 16 | if filename.endswith(".pkl") and 'valid' in filename: 17 | env = 'BabyAI-BossLevel-v0' # It doesn't really matter. The evaluation only considers the lengths of demos. 18 | demo = filename[:-4] # Remove the .pkl part of the name 19 | 20 | print("> Demos: {}".format(demo)) 21 | 22 | command = ["python evaluate.py --env {} --demos {} --worst-episodes-to-show 0".format(env, demo)] + sys.argv[1:] 23 | call(" ".join(command), shell=True) 24 | -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/evaluate_all_models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluate all models in a storage directory. 3 | 4 | In order to use this script make sure to add baby-ai-game/scripts to the $PATH 5 | environment variable. 6 | 7 | Sample usage: 8 | evaluate_all_models.py --episodes 200 --argmax 9 | """ 10 | 11 | import os 12 | from subprocess import call 13 | import sys 14 | 15 | import babyai.utils as utils 16 | from babyai.levels import level_dict 17 | import re 18 | 19 | # List of all levels ordered by length of the level name from longest to shortest 20 | LEVELS = sorted(list(level_dict.keys()), key=len)[::-1] 21 | 22 | 23 | def get_levels_from_model_name(model): 24 | levels = [] 25 | # Assume that our model names are separated with _ or - 26 | model_name_parts = re.split('_|-', model) 27 | for part in model_name_parts: 28 | # Assume that each part contains at most one level name. 29 | # Sorting LEVELS using length of level name is to avoid scenarios like 30 | # extracting 'GoTo' from the model name 'GoToLocal-model' 31 | for level in LEVELS: 32 | if level in part: 33 | levels.append('BabyAI-{}-v0'.format(level)) 34 | break 35 | return list(set(levels)) 36 | 37 | 38 | folder = os.path.join(utils.storage_dir(), "models") 39 | 40 | for model in sorted(os.listdir(folder)): 41 | if model.startswith('.'): 42 | continue 43 | envs = get_levels_from_model_name(model) 44 | print("> Envs: {} > Model: {}".format(envs, model)) 45 | for env in envs: 46 | command = ["evaluate.py --env {} --model {}".format(env, model)] + sys.argv[1:] 47 | print("Command: {}".format(" ".join(command))) 48 | call(" ".join(command), shell=True) 49 | -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/learn_baseline_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | A reimplmentation of the LEARN model (Goyal et al., 2019) 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.autograd import Variable 9 | 10 | 11 | def initialize_parameters(m): 12 | classname = m.__class__.__name__ 13 | if classname.find('Linear') != -1: 14 | torch.nn.init.xavier_uniform_(m.weight) 15 | if m.bias is not None: 16 | m.bias.data.fill_(0.1) 17 | 18 | 19 | class LEARNBaselineModel(nn.Module): 20 | 21 | def __init__(self, obs_space, arch="learn", lang_model="gru", instr_dim=128, action_dim=128, hidden_dim=128, dropout=0): 22 | super().__init__() 23 | 24 | self.arch = arch 25 | self.lang_model = lang_model 26 | self.instr_dim = instr_dim 27 | self.action_dim = action_dim 28 | self.hidden_dim = hidden_dim 29 | 30 | if self.lang_model in ['gru']: 31 | self.word_embedding = nn.Embedding(obs_space["instr"], self.instr_dim) 32 | gru_dim = self.instr_dim 33 | self.instr_rnn = nn.GRU( 34 | self.instr_dim, gru_dim, num_layers=2, 35 | batch_first=True, 36 | bidirectional=False 37 | ) 38 | 39 | action_input_sizes = [obs_space['num_actions'], self.hidden_dim, self.hidden_dim] 40 | action_output_sizes = [self.hidden_dim, self.hidden_dim, self.action_dim] 41 | self.action_mlp = self.mlp(action_input_sizes, action_output_sizes, dropout=dropout) 42 | 43 | cls_input_sizes = [self.action_dim + self.instr_dim, self.hidden_dim, self.hidden_dim] 44 | cls_output_sizes = [self.hidden_dim, self.hidden_dim, 2] 45 | self.classification_mlp = self.mlp(cls_input_sizes, cls_output_sizes, dropout=dropout) 46 | 47 | self.apply(initialize_parameters) 48 | 49 | def mlp(self, in_dim, out_dim, dropout=0, n_layers=3): 50 | layers = [] 51 | for l in range(n_layers - 1): 52 | layers.extend([nn.Linear(in_dim[l], out_dim[l]), 53 | nn.ReLU(), 54 | nn.BatchNorm1d(out_dim[l]), 55 | nn.Dropout(dropout)]) 56 | layers.extend([nn.Linear(in_dim[-1], out_dim[-1])]) 57 | return nn.Sequential(*layers) 58 | 59 | def forward(self, missions, action_frequencies): 60 | action_enc = self.action_mlp(action_frequencies) 61 | text_enc = self._get_instr_embedding(missions) 62 | action_text = torch.cat((action_enc, text_enc,), dim=-1) 63 | 64 | logits = self.classification_mlp(action_text) 65 | 66 | preds = torch.argmax(logits, axis=-1) 67 | return preds, logits 68 | 69 | def _get_instr_embedding(self, instr): 70 | lengths = (instr != 0).sum(1).long() 71 | if self.lang_model == 'gru': 72 | out, _ = self.instr_rnn(self.word_embedding(instr)) 73 | hidden = out[range(len(lengths)), lengths-1, :] 74 | return hidden -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/manual_control.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | import argparse 5 | import numpy as np 6 | import gym 7 | import gym_minigrid 8 | from gym_minigrid.wrappers import * 9 | from gym_minigrid.window import Window 10 | import babyai 11 | 12 | def redraw(img): 13 | if not args.agent_view: 14 | img = env.render('rgb_array', tile_size=args.tile_size) 15 | 16 | window.show_img(img) 17 | 18 | def reset(): 19 | if args.seed != -1: 20 | env.seed(args.seed) 21 | 22 | obs = env.reset() 23 | 24 | if hasattr(env, 'mission'): 25 | print('Mission: %s' % env.mission) 26 | window.set_caption(env.mission) 27 | 28 | redraw(obs) 29 | 30 | def step(action): 31 | obs, reward, done, info = env.step(action) 32 | print('step=%s, reward=%.2f' % (env.step_count, reward)) 33 | 34 | if done: 35 | print('done!') 36 | reset() 37 | else: 38 | redraw(obs) 39 | 40 | def key_handler(event): 41 | print('pressed', event.key) 42 | 43 | if event.key == 'escape': 44 | window.close() 45 | return 46 | 47 | if event.key == 'backspace': 48 | reset() 49 | return 50 | 51 | if event.key == 'left': 52 | step(env.actions.left) 53 | return 54 | if event.key == 'right': 55 | step(env.actions.right) 56 | return 57 | if event.key == 'up': 58 | step(env.actions.forward) 59 | return 60 | 61 | # Spacebar 62 | if event.key == ' ': 63 | step(env.actions.toggle) 64 | return 65 | if event.key == 'pageup': 66 | step(env.actions.pickup) 67 | return 68 | if event.key == 'pagedown': 69 | step(env.actions.drop) 70 | return 71 | 72 | if event.key == 'enter': 73 | step(env.actions.done) 74 | return 75 | 76 | parser = argparse.ArgumentParser() 77 | parser.add_argument( 78 | "--env", 79 | help="gym environment to load", 80 | default='BabyAI-BossLevel-v0' 81 | ) 82 | parser.add_argument( 83 | "--seed", 84 | type=int, 85 | help="random seed to generate the environment with", 86 | default=-1 87 | ) 88 | parser.add_argument( 89 | "--tile_size", 90 | type=int, 91 | help="size at which to render tiles", 92 | default=32 93 | ) 94 | parser.add_argument( 95 | '--agent_view', 96 | default=False, 97 | help="draw the agent sees (partially observable view)", 98 | action='store_true' 99 | ) 100 | 101 | args = parser.parse_args() 102 | 103 | env = gym.make(args.env) 104 | 105 | if args.agent_view: 106 | env = RGBImgPartialObsWrapper(env) 107 | env = ImgObsWrapper(env) 108 | 109 | window = Window('gym_minigrid - ' + args.env) 110 | window.reg_key_handler(key_handler) 111 | 112 | reset() 113 | 114 | # Blocking event loop 115 | window.show(block=True) 116 | -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/result_l_class_study.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pickle as pkl 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | 7 | def learning_curves(name_env, model_number): 8 | print("======== env:{} model:{}=======".format(name_env, model_number)) 9 | log = pkl.load(open('storage/models/' + name_env + '/' + 'model_{}'.format(model_number) + '/log.pkl', "rb")) 10 | 11 | train_error = np.array(log["loss_cross_entropy_train"]) 12 | success_rate_train = np.array(log["success_pred_train"]) 13 | valid_error = np.array(log["loss_cross_entropy_valid"]) 14 | success_rate_valid = np.array(log["success_pred_valid"]) 15 | 16 | print('At epoch {} the CE error for train reach the minimum value of {}'.format(np.argmin(train_error), 17 | min(train_error))) 18 | print(train_error) 19 | print(" ") 20 | print('At epoch {} the CE error for valid reach the minimum value of {}'.format(np.argmin(valid_error), 21 | min(valid_error))) 22 | print(valid_error) 23 | print(" ") 24 | print('At epoch {} the success rate for train reach the maximum value of {}'.format(np.argmax(success_rate_train), 25 | max(success_rate_train))) 26 | print(success_rate_train) 27 | print(" ") 28 | print('At epoch {} the success rate for valid reach the maximum value of {}'.format(np.argmax(success_rate_valid), 29 | max(success_rate_valid))) 30 | print(success_rate_valid) 31 | 32 | """plt.plot(np.arange(len(train_error)), train_error) 33 | plt.title("Train error") 34 | plt.grid(axis='both') 35 | plt.show() 36 | plt.plot(np.arange(len(valid_error)), valid_error) 37 | plt.title("Valid error") 38 | plt.grid(axis='both') 39 | plt.show() 40 | plt.plot(np.arange(len(success_rate_train)), success_rate_train) 41 | plt.title("Success rate train set") 42 | plt.grid(axis='both') 43 | plt.show() 44 | plt.plot(np.arange(len(success_rate_valid)), success_rate_valid) 45 | plt.title("Success rate valid set") 46 | plt.grid(axis='both') 47 | plt.show() 48 | """ 49 | 50 | 51 | 52 | 53 | learning_curves('BabyAI-PutNextLocal-v0_no_answer_l_class', 0) 54 | 55 | -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/show_level_instructions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Randomly sample and print out instructions from a level. 3 | """ 4 | 5 | import argparse 6 | 7 | import babyai 8 | import gym 9 | 10 | 11 | parser = argparse.ArgumentParser("Show level instructions") 12 | parser.add_argument("--n-episodes", type=int, default=10000, 13 | help="Collect instructions from this many episodes") 14 | parser.add_argument("level", 15 | help="The level of interest") 16 | args = parser.parse_args() 17 | 18 | env = gym.make(args.level) 19 | instructions = set(env.reset()['mission'] for i in range(args.n_episodes)) 20 | for instr in sorted(instructions): 21 | print(instr) 22 | -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/subtask_prediction_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | 9 | 10 | def initialize_parameters(m): 11 | classname = m.__class__.__name__ 12 | if classname.find('Linear') != -1: 13 | m.weight.data.normal_(0, 1) 14 | m.weight.data *= 1 / torch.sqrt(m.weight.data.pow(2).sum(1, keepdim=True)) 15 | if m.bias is not None: 16 | m.bias.data.fill_(0) 17 | 18 | 19 | class SubtaskPredictionModel(nn.Module): 20 | 21 | def __init__(self, obs_space, arch="siamese", lang_model="gru", instr_dim=128): 22 | super().__init__() 23 | 24 | self.arch = arch 25 | self.lang_model = lang_model 26 | self.instr_dim = instr_dim 27 | 28 | if self.lang_model in ['gru']: 29 | self.word_embedding = nn.Embedding(obs_space["instr"], self.instr_dim) 30 | gru_dim = self.instr_dim 31 | self.instr_rnn = nn.GRU( 32 | self.instr_dim, gru_dim, batch_first=True, 33 | bidirectional=False 34 | ) 35 | 36 | self.fc1 = nn.Linear(self.instr_dim, self.instr_dim // 2) 37 | self.fc2 = nn.Linear(self.instr_dim, self.instr_dim // 2) 38 | self.dropout1 = nn.Dropout(0.1) 39 | self.dropout2 = nn.Dropout(0.1) 40 | self.fc3 = nn.Linear(self.instr_dim, self.instr_dim // 2) 41 | self.fc4 = nn.Linear(self.instr_dim // 2, 1) 42 | 43 | self.sigmoid = nn.Sigmoid() 44 | 45 | self.apply(initialize_parameters) 46 | 47 | def forward(self, missions, subtasks): 48 | if self.arch == "siamese": 49 | mission_embedding = self._get_instr_embedding(missions) 50 | subtask_embedding = self._get_instr_embedding(subtasks) 51 | 52 | mission_embedding = self.dropout1(self.fc1(mission_embedding)) 53 | subtask_embedding = self.dropout2(self.fc2(subtask_embedding)) 54 | 55 | both_embeddings = torch.cat((mission_embedding, subtask_embedding), dim=-1) 56 | both_embeddings = self.fc3(both_embeddings) 57 | 58 | logits = self.fc4(both_embeddings) 59 | preds = self.sigmoid(logits).squeeze(-1) 60 | elif self.arch == "siamese-l1": 61 | mission_embedding = self._get_instr_embedding(missions) 62 | subtask_embedding = self._get_instr_embedding(subtasks) 63 | 64 | mission_embedding = self.fc1(mission_embedding) 65 | subtask_embedding = self.fc2(subtask_embedding) 66 | 67 | dist = torch.norm(mission_embedding - subtask_embedding, p=1, dim=1) 68 | 69 | preds = torch.exp(-dist) 70 | 71 | return preds 72 | 73 | def _get_instr_embedding(self, instr): 74 | lengths = (instr != 0).sum(1).long() 75 | if self.lang_model == 'gru': 76 | out, _ = self.instr_rnn(self.word_embedding(instr)) 77 | hidden = out[range(len(lengths)), lengths-1, :] 78 | return hidden -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/train_learn_baseline_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Training code for the LEARN model (Goyal et al., 2019) 5 | """ 6 | 7 | import os 8 | import csv 9 | import copy 10 | import gym 11 | import time 12 | import datetime 13 | import numpy as np 14 | import sys 15 | import logging 16 | import torch 17 | import wandb 18 | from babyai.arguments import ArgumentParser 19 | import babyai.utils as utils 20 | 21 | from learn_baseline import LEARNBaseline 22 | from babyai.arguments import ArgumentParser 23 | import babyai.utils as utils 24 | 25 | 26 | parser = ArgumentParser() 27 | 28 | parser.add_argument("--demos", default=None, 29 | help="demos filename (REQUIRED or demos-origin or multi-demos required)") 30 | parser.add_argument("--demos-origin", required=False, 31 | help="origin of the demonstrations: human | agent (REQUIRED or demos or multi-demos required)") 32 | parser.add_argument("--episodes", type=int, default=0, 33 | help="number of high-level episodes of demonstrations to use" 34 | "(default: 0, meaning all demos)") 35 | parser.add_argument("--save-interval", type=int, default=1, 36 | help="number of epochs between two saves (default: 1, 0 means no saving)") 37 | 38 | 39 | def main(args): 40 | 41 | args.model = args.model or LEARNBaseline.default_model_name(args) 42 | utils.configure_logging(args.model) 43 | logger = logging.getLogger(__name__) 44 | 45 | learn_baseline = LEARNBaseline(args) 46 | 47 | header = (["update", "frames", "fps", "duration", "train_loss", "train_accuracy", "train_precision", "train_recall"] 48 | + ["validation_loss", "validation_accuracy", "validation_precision", "validation_recall"]) 49 | 50 | writer = None 51 | if args.wb: 52 | wandb.init(project="ella", name=args.model) 53 | wandb.config.update(args) 54 | writer = wandb 55 | 56 | # Define csv writer 57 | csv_writer = None 58 | csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') 59 | first_created = not os.path.exists(csv_path) 60 | # we don't buffer data going in the csv log, cause we assume 61 | # that one update will take much longer that one write to the log 62 | csv_writer = csv.writer(open(csv_path, 'a', 1)) 63 | if first_created: 64 | csv_writer.writerow(header) 65 | 66 | # Get the status path 67 | status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') 68 | 69 | # Log command, availability of CUDA, and model 70 | logger.info(args) 71 | logger.info("CUDA available: {}".format(torch.cuda.is_available())) 72 | logger.info(learn_baseline.model) 73 | 74 | learn_baseline.train(learn_baseline.train_demos, writer, csv_writer, status_path, header) 75 | 76 | 77 | if __name__ == "__main__": 78 | args = parser.parse_args() 79 | main(args) -------------------------------------------------------------------------------- /babyai-text/babyai/scripts/train_subtask_prediction_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Pre-training code for the subtask prediction model (relevance classifier). 5 | """ 6 | 7 | import os 8 | import csv 9 | import copy 10 | import gym 11 | import time 12 | import datetime 13 | import numpy as np 14 | import sys 15 | import logging 16 | import torch 17 | from babyai.arguments import ArgumentParser 18 | import babyai.utils as utils 19 | 20 | from subtask_prediction import SubtaskPrediction 21 | from babyai.arguments import ArgumentParser 22 | import babyai.utils as utils 23 | 24 | 25 | parser = ArgumentParser() 26 | 27 | parser.add_argument("--demos", default=None, 28 | help="demos filename (REQUIRED or demos-origin or multi-demos required)") 29 | parser.add_argument("--demos-origin", required=False, 30 | help="origin of the demonstrations: human | agent (REQUIRED or demos or multi-demos required)") 31 | parser.add_argument("--episodes", type=int, default=0, 32 | help="number of high-level episodes of demonstrations to use" 33 | "(default: 0, meaning all demos)") 34 | parser.add_argument("--low-level-demos", default=None, 35 | help="low-level demos filename") 36 | parser.add_argument("--ll-episodes", type=int, default=0, 37 | help="number of low-level episodes of demonstrations to use" 38 | "(default: 0, meaning all demos)") 39 | parser.add_argument("--save-interval", type=int, default=1, 40 | help="number of epochs between two saves (default: 1, 0 means no saving)") 41 | parser.add_argument("--denoise", action="store_true", 42 | help="whether or not to denoise the data") 43 | parser.add_argument("--denoise-k", type=int, default=1, 44 | help="how many examples of each instruction to use") 45 | parser.add_argument("--denoise-total", type=int, default=100, 46 | help="total number of instructions in the denoised dataset") 47 | parser.add_argument("--augment", action="store_true", 48 | help="whether or not to augment the data") 49 | parser.add_argument("--augment-total", type=int, default=100, 50 | help="total number of instructions in the augmented dataset") 51 | parser.add_argument("--wait-finetune", type=int, default=50, 52 | help="how long to wait to fine-tune") 53 | parser.add_argument("--ones", action="store_true", default=False, 54 | help="whether to ignore labels") 55 | 56 | def main(args): 57 | 58 | args.model = args.model or SubtaskPrediction.default_model_name(args) 59 | utils.configure_logging(args.model) 60 | logger = logging.getLogger(__name__) 61 | 62 | subtask_prediction = SubtaskPrediction(args) 63 | 64 | header = (["update", "frames", "fps", "duration", "train_loss", "train_accuracy", "train_precision", "train_recall"] 65 | + ["validation_loss", "validation_accuracy"] 66 | + ["ground_truth_validation_accuracy", "ground_truth_validation_precision", "ground_truth_validation_recall"]) 67 | 68 | writer = None 69 | if args.wb: 70 | import wandb 71 | wandb.init(project="ella") 72 | wandb.config.update(args) 73 | writer = wandb 74 | 75 | # Define csv writer 76 | csv_writer = None 77 | csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') 78 | first_created = not os.path.exists(csv_path) 79 | # we don't buffer data going in the csv log, cause we assume 80 | # that one update will take much longer that one write to the log 81 | csv_writer = csv.writer(open(csv_path, 'a', 1)) 82 | if first_created: 83 | csv_writer.writerow(header) 84 | 85 | # Get the status path 86 | status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') 87 | 88 | # Log command, availability of CUDA, and model 89 | logger.info(args) 90 | logger.info("CUDA available: {}".format(torch.cuda.is_available())) 91 | logger.info(subtask_prediction.model) 92 | 93 | subtask_prediction.train(subtask_prediction.train_demos, writer, csv_writer, status_path, header) 94 | 95 | 96 | if __name__ == "__main__": 97 | args = parser.parse_args() 98 | main(args) -------------------------------------------------------------------------------- /babyai-text/babyai/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='babyai', 5 | version='0.1.0', 6 | license='BSD 3-clause', 7 | keywords='memory, environment, agent, rl, openaigym, openai-gym, gym', 8 | packages=['babyai', 'babyai.levels', 'babyai.utils', 'babyai.rl'], 9 | install_requires=[ 10 | 'gym>=0.9.6,<0.26.2', 11 | 'numpy>=1.17.0', 12 | "torch>=0.4.1", 13 | 'blosc>=1.5.1', 14 | # 'gym_minigrid @ https://github.com/maximecb/gym-minigrid/archive/master.zip' 15 | ], 16 | ) 17 | -------------------------------------------------------------------------------- /babyai-text/babyai_text/__init__.py: -------------------------------------------------------------------------------- 1 | from .levels.mixed_seq_levels import * -------------------------------------------------------------------------------- /babyai-text/babyai_text/levels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/babyai_text/levels/__init__.py -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *__pycache__ 3 | gym_minigrid.egg-info 4 | trained_models 5 | 6 | # PyPI 7 | build/* 8 | dist/* 9 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | 5 | # command to install dependencies 6 | install: 7 | - pip3 install -e . 8 | 9 | # command to run tests 10 | script: ./run_tests.py 11 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/benchmark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import time 4 | import argparse 5 | from gym_minigrid.wrappers import * 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument( 9 | "--env-name", 10 | dest="env_name", 11 | help="gym environment to load", 12 | default='MiniGrid-LavaGapS7-v0' 13 | ) 14 | parser.add_argument("--num_resets", default=200) 15 | parser.add_argument("--num_frames", default=5000) 16 | args = parser.parse_args() 17 | 18 | env = gym.make(args.env_name) 19 | 20 | # Benchmark env.reset 21 | t0 = time.time() 22 | for i in range(args.num_resets): 23 | env.reset() 24 | t1 = time.time() 25 | dt = t1 - t0 26 | reset_time = (1000 * dt) / args.num_resets 27 | 28 | # Benchmark rendering 29 | t0 = time.time() 30 | for i in range(args.num_frames): 31 | env.render('rgb_array') 32 | t1 = time.time() 33 | dt = t1 - t0 34 | frames_per_sec = args.num_frames / dt 35 | 36 | # Create an environment with an RGB agent observation 37 | env = gym.make(args.env_name) 38 | env = RGBImgPartialObsWrapper(env) 39 | env = ImgObsWrapper(env) 40 | 41 | # Benchmark rendering 42 | t0 = time.time() 43 | for i in range(args.num_frames): 44 | obs, reward, done, info = env.step(0) 45 | t1 = time.time() 46 | dt = t1 - t0 47 | agent_view_fps = args.num_frames / dt 48 | 49 | print('Env reset time: {:.1f} ms'.format(reset_time)) 50 | print('Rendering FPS : {:.0f}'.format(frames_per_sec)) 51 | print('Agent view FPS: {:.0f}'.format(agent_view_fps)) 52 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/BlockedUnlockPickup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/BlockedUnlockPickup.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/DistShift1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/DistShift1.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/DistShift2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/DistShift2.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/KeyCorridorS3R1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS3R1.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/KeyCorridorS3R2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS3R2.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/KeyCorridorS3R3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS3R3.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/KeyCorridorS4R3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS4R3.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/KeyCorridorS5R3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS5R3.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/KeyCorridorS6R3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS6R3.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/LavaCrossingS11N5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/LavaCrossingS11N5.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/LavaCrossingS9N1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/LavaCrossingS9N1.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/LavaCrossingS9N2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/LavaCrossingS9N2.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/LavaCrossingS9N3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/LavaCrossingS9N3.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/LavaGapS6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/LavaGapS6.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/ObstructedMaze-1Dl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-1Dl.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/ObstructedMaze-1Dlh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-1Dlh.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/ObstructedMaze-1Dlhb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-1Dlhb.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/ObstructedMaze-1Q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-1Q.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/ObstructedMaze-2Dl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-2Dl.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/ObstructedMaze-2Dlh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-2Dlh.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/ObstructedMaze-2Dlhb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-2Dlhb.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/ObstructedMaze-2Q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-2Q.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/ObstructedMaze-4Q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-4Q.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/SimpleCrossingS11N5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/SimpleCrossingS11N5.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/SimpleCrossingS9N1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/SimpleCrossingS9N1.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/SimpleCrossingS9N2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/SimpleCrossingS9N2.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/SimpleCrossingS9N3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/SimpleCrossingS9N3.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/Unlock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/Unlock.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/UnlockPickup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/UnlockPickup.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/door-key-curriculum.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/door-key-curriculum.gif -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/door-key-env.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/door-key-env.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/dynamic_obstacles.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/dynamic_obstacles.gif -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/empty-env.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/empty-env.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/fetch-env.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/fetch-env.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/four-rooms-env.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/four-rooms-env.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/gotodoor-6x6.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/gotodoor-6x6.mp4 -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/gotodoor-6x6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/gotodoor-6x6.png -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/figures/multi-room.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/multi-room.gif -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/__init__.py: -------------------------------------------------------------------------------- 1 | # Import the envs module so that envs register themselves 2 | import gym_minigrid.envs 3 | 4 | # Import wrappers so it's accessible when installing with pip 5 | import gym_minigrid.wrappers 6 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.envs.empty import * 2 | from gym_minigrid.envs.doorkey import * 3 | from gym_minigrid.envs.multiroom import * 4 | from gym_minigrid.envs.fetch import * 5 | from gym_minigrid.envs.gotoobject import * 6 | from gym_minigrid.envs.gotodoor import * 7 | from gym_minigrid.envs.putnear import * 8 | from gym_minigrid.envs.lockedroom import * 9 | from gym_minigrid.envs.keycorridor import * 10 | from gym_minigrid.envs.unlock import * 11 | from gym_minigrid.envs.unlockpickup import * 12 | from gym_minigrid.envs.blockedunlockpickup import * 13 | from gym_minigrid.envs.playground_v0 import * 14 | from gym_minigrid.envs.redbluedoors import * 15 | from gym_minigrid.envs.obstructedmaze import * 16 | from gym_minigrid.envs.memory import * 17 | from gym_minigrid.envs.fourrooms import * 18 | from gym_minigrid.envs.crossing import * 19 | from gym_minigrid.envs.lavagap import * 20 | from gym_minigrid.envs.dynamicobstacles import * 21 | from gym_minigrid.envs.distshift import * 22 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/blockedunlockpickup.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import Ball 2 | from gym_minigrid.roomgrid import RoomGrid 3 | from gym_minigrid.register import register 4 | 5 | class BlockedUnlockPickup(RoomGrid): 6 | """ 7 | Unlock a door blocked by a ball, then pick up a box 8 | in another room 9 | """ 10 | 11 | def __init__(self, seed=None): 12 | room_size = 6 13 | super().__init__( 14 | num_rows=1, 15 | num_cols=2, 16 | room_size=room_size, 17 | max_steps=16*room_size**2, 18 | seed=seed 19 | ) 20 | 21 | def _gen_grid(self, width, height): 22 | super()._gen_grid(width, height) 23 | 24 | # Add a box to the room on the right 25 | obj, _ = self.add_object(1, 0, kind="box") 26 | # Make sure the two rooms are directly connected by a locked door 27 | door, pos = self.add_door(0, 0, 0, locked=True) 28 | # Block the door with a ball 29 | color = self._rand_color() 30 | self.grid.set(pos[0]-1, pos[1], Ball(color)) 31 | # Add a key to unlock the door 32 | self.add_object(0, 0, 'key', door.color) 33 | 34 | self.place_agent(0, 0) 35 | 36 | self.obj = obj 37 | self.mission = "pick up the %s %s" % (obj.color, obj.type) 38 | 39 | def step(self, action): 40 | obs, reward, done, info = super().step(action) 41 | 42 | if action == self.actions.pickup: 43 | if self.carrying and self.carrying == self.obj: 44 | reward = self._reward() 45 | done = True 46 | 47 | return obs, reward, done, info 48 | 49 | register( 50 | id='MiniGrid-BlockedUnlockPickup-v0', 51 | entry_point='gym_minigrid.envs:BlockedUnlockPickup' 52 | ) 53 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/distshift.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class DistShiftEnv(MiniGridEnv): 5 | """ 6 | Distributional shift environment. 7 | """ 8 | 9 | def __init__( 10 | self, 11 | width=9, 12 | height=7, 13 | agent_start_pos=(1,1), 14 | agent_start_dir=0, 15 | strip2_row=2 16 | ): 17 | self.agent_start_pos = agent_start_pos 18 | self.agent_start_dir = agent_start_dir 19 | self.goal_pos = (width-2, 1) 20 | self.strip2_row = strip2_row 21 | 22 | super().__init__( 23 | width=width, 24 | height=height, 25 | max_steps=4*width*height, 26 | # Set this to True for maximum speed 27 | see_through_walls=True 28 | ) 29 | 30 | def _gen_grid(self, width, height): 31 | # Create an empty grid 32 | self.grid = Grid(width, height) 33 | 34 | # Generate the surrounding walls 35 | self.grid.wall_rect(0, 0, width, height) 36 | 37 | # Place a goal square in the bottom-right corner 38 | self.put_obj(Goal(), *self.goal_pos) 39 | 40 | # Place the lava rows 41 | for i in range(self.width - 6): 42 | self.grid.set(3+i, 1, Lava()) 43 | self.grid.set(3+i, self.strip2_row, Lava()) 44 | 45 | # Place the agent 46 | if self.agent_start_pos is not None: 47 | self.agent_pos = self.agent_start_pos 48 | self.agent_dir = self.agent_start_dir 49 | else: 50 | self.place_agent() 51 | 52 | self.mission = "get to the green goal square" 53 | 54 | class DistShift1(DistShiftEnv): 55 | def __init__(self): 56 | super().__init__(strip2_row=2) 57 | 58 | class DistShift2(DistShiftEnv): 59 | def __init__(self): 60 | super().__init__(strip2_row=5) 61 | 62 | register( 63 | id='MiniGrid-DistShift1-v0', 64 | entry_point='gym_minigrid.envs:DistShift1' 65 | ) 66 | 67 | register( 68 | id='MiniGrid-DistShift2-v0', 69 | entry_point='gym_minigrid.envs:DistShift2' 70 | ) 71 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/doorkey.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class DoorKeyEnv(MiniGridEnv): 5 | """ 6 | Environment with a door and key, sparse reward 7 | """ 8 | 9 | def __init__(self, size=8): 10 | super().__init__( 11 | grid_size=size, 12 | max_steps=10*size*size 13 | ) 14 | 15 | def _gen_grid(self, width, height): 16 | # Create an empty grid 17 | self.grid = Grid(width, height) 18 | 19 | # Generate the surrounding walls 20 | self.grid.wall_rect(0, 0, width, height) 21 | 22 | # Place a goal in the bottom-right corner 23 | self.put_obj(Goal(), width - 2, height - 2) 24 | 25 | # Create a vertical splitting wall 26 | splitIdx = self._rand_int(2, width-2) 27 | self.grid.vert_wall(splitIdx, 0) 28 | 29 | # Place the agent at a random position and orientation 30 | # on the left side of the splitting wall 31 | self.place_agent(size=(splitIdx, height)) 32 | 33 | # Place a door in the wall 34 | doorIdx = self._rand_int(1, width-2) 35 | self.put_obj(Door('yellow', is_locked=True), splitIdx, doorIdx) 36 | 37 | # Place a yellow key on the left side 38 | self.place_obj( 39 | obj=Key('yellow'), 40 | top=(0, 0), 41 | size=(splitIdx, height) 42 | ) 43 | 44 | self.mission = "use the key to open the door and then get to the goal" 45 | 46 | class DoorKeyEnv5x5(DoorKeyEnv): 47 | def __init__(self): 48 | super().__init__(size=5) 49 | 50 | class DoorKeyEnv6x6(DoorKeyEnv): 51 | def __init__(self): 52 | super().__init__(size=6) 53 | 54 | class DoorKeyEnv16x16(DoorKeyEnv): 55 | def __init__(self): 56 | super().__init__(size=16) 57 | 58 | register( 59 | id='MiniGrid-DoorKey-5x5-v0', 60 | entry_point='gym_minigrid.envs:DoorKeyEnv5x5' 61 | ) 62 | 63 | register( 64 | id='MiniGrid-DoorKey-6x6-v0', 65 | entry_point='gym_minigrid.envs:DoorKeyEnv6x6' 66 | ) 67 | 68 | register( 69 | id='MiniGrid-DoorKey-8x8-v0', 70 | entry_point='gym_minigrid.envs:DoorKeyEnv' 71 | ) 72 | 73 | register( 74 | id='MiniGrid-DoorKey-16x16-v0', 75 | entry_point='gym_minigrid.envs:DoorKeyEnv16x16' 76 | ) 77 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/empty.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class EmptyEnv(MiniGridEnv): 5 | """ 6 | Empty grid environment, no obstacles, sparse reward 7 | """ 8 | 9 | def __init__( 10 | self, 11 | size=8, 12 | agent_start_pos=(1,1), 13 | agent_start_dir=0, 14 | ): 15 | self.agent_start_pos = agent_start_pos 16 | self.agent_start_dir = agent_start_dir 17 | 18 | super().__init__( 19 | grid_size=size, 20 | max_steps=4*size*size, 21 | # Set this to True for maximum speed 22 | see_through_walls=True 23 | ) 24 | 25 | def _gen_grid(self, width, height): 26 | # Create an empty grid 27 | self.grid = Grid(width, height) 28 | 29 | # Generate the surrounding walls 30 | self.grid.wall_rect(0, 0, width, height) 31 | 32 | # Place a goal square in the bottom-right corner 33 | self.put_obj(Goal(), width - 2, height - 2) 34 | 35 | # Place the agent 36 | if self.agent_start_pos is not None: 37 | self.agent_pos = self.agent_start_pos 38 | self.agent_dir = self.agent_start_dir 39 | else: 40 | self.place_agent() 41 | 42 | self.mission = "get to the green goal square" 43 | 44 | class EmptyEnv5x5(EmptyEnv): 45 | def __init__(self): 46 | super().__init__(size=5) 47 | 48 | class EmptyRandomEnv5x5(EmptyEnv): 49 | def __init__(self): 50 | super().__init__(size=5, agent_start_pos=None) 51 | 52 | class EmptyEnv6x6(EmptyEnv): 53 | def __init__(self): 54 | super().__init__(size=6) 55 | 56 | class EmptyRandomEnv6x6(EmptyEnv): 57 | def __init__(self): 58 | super().__init__(size=6, agent_start_pos=None) 59 | 60 | class EmptyEnv16x16(EmptyEnv): 61 | def __init__(self): 62 | super().__init__(size=16) 63 | 64 | register( 65 | id='MiniGrid-Empty-5x5-v0', 66 | entry_point='gym_minigrid.envs:EmptyEnv5x5' 67 | ) 68 | 69 | register( 70 | id='MiniGrid-Empty-Random-5x5-v0', 71 | entry_point='gym_minigrid.envs:EmptyRandomEnv5x5' 72 | ) 73 | 74 | register( 75 | id='MiniGrid-Empty-6x6-v0', 76 | entry_point='gym_minigrid.envs:EmptyEnv6x6' 77 | ) 78 | 79 | register( 80 | id='MiniGrid-Empty-Random-6x6-v0', 81 | entry_point='gym_minigrid.envs:EmptyRandomEnv6x6' 82 | ) 83 | 84 | register( 85 | id='MiniGrid-Empty-8x8-v0', 86 | entry_point='gym_minigrid.envs:EmptyEnv' 87 | ) 88 | 89 | register( 90 | id='MiniGrid-Empty-16x16-v0', 91 | entry_point='gym_minigrid.envs:EmptyEnv16x16' 92 | ) 93 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/fetch.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class FetchEnv(MiniGridEnv): 5 | """ 6 | Environment in which the agent has to fetch a random object 7 | named using English text strings 8 | """ 9 | 10 | def __init__( 11 | self, 12 | size=8, 13 | numObjs=3 14 | ): 15 | self.numObjs = numObjs 16 | 17 | super().__init__( 18 | grid_size=size, 19 | max_steps=5*size**2, 20 | # Set this to True for maximum speed 21 | see_through_walls=True 22 | ) 23 | 24 | def _gen_grid(self, width, height): 25 | self.grid = Grid(width, height) 26 | 27 | # Generate the surrounding walls 28 | self.grid.horz_wall(0, 0) 29 | self.grid.horz_wall(0, height-1) 30 | self.grid.vert_wall(0, 0) 31 | self.grid.vert_wall(width-1, 0) 32 | 33 | types = ['key', 'ball'] 34 | 35 | objs = [] 36 | 37 | # For each object to be generated 38 | while len(objs) < self.numObjs: 39 | objType = self._rand_elem(types) 40 | objColor = self._rand_elem(COLOR_NAMES) 41 | 42 | if objType == 'key': 43 | obj = Key(objColor) 44 | elif objType == 'ball': 45 | obj = Ball(objColor) 46 | 47 | self.place_obj(obj) 48 | objs.append(obj) 49 | 50 | # Randomize the player start position and orientation 51 | self.place_agent() 52 | 53 | # Choose a random object to be picked up 54 | target = objs[self._rand_int(0, len(objs))] 55 | self.targetType = target.type 56 | self.targetColor = target.color 57 | 58 | descStr = '%s %s' % (self.targetColor, self.targetType) 59 | 60 | # Generate the mission string 61 | idx = self._rand_int(0, 5) 62 | if idx == 0: 63 | self.mission = 'get a %s' % descStr 64 | elif idx == 1: 65 | self.mission = 'go get a %s' % descStr 66 | elif idx == 2: 67 | self.mission = 'fetch a %s' % descStr 68 | elif idx == 3: 69 | self.mission = 'go fetch a %s' % descStr 70 | elif idx == 4: 71 | self.mission = 'you must fetch a %s' % descStr 72 | assert hasattr(self, 'mission') 73 | 74 | def step(self, action): 75 | obs, reward, done, info = MiniGridEnv.step(self, action) 76 | 77 | if self.carrying: 78 | if self.carrying.color == self.targetColor and \ 79 | self.carrying.type == self.targetType: 80 | reward = self._reward() 81 | done = True 82 | else: 83 | reward = 0 84 | done = True 85 | 86 | return obs, reward, done, info 87 | 88 | class FetchEnv5x5N2(FetchEnv): 89 | def __init__(self): 90 | super().__init__(size=5, numObjs=2) 91 | 92 | class FetchEnv6x6N2(FetchEnv): 93 | def __init__(self): 94 | super().__init__(size=6, numObjs=2) 95 | 96 | register( 97 | id='MiniGrid-Fetch-5x5-N2-v0', 98 | entry_point='gym_minigrid.envs:FetchEnv5x5N2' 99 | ) 100 | 101 | register( 102 | id='MiniGrid-Fetch-6x6-N2-v0', 103 | entry_point='gym_minigrid.envs:FetchEnv6x6N2' 104 | ) 105 | 106 | register( 107 | id='MiniGrid-Fetch-8x8-N3-v0', 108 | entry_point='gym_minigrid.envs:FetchEnv' 109 | ) 110 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/fourrooms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from gym_minigrid.minigrid import * 5 | from gym_minigrid.register import register 6 | 7 | 8 | class FourRoomsEnv(MiniGridEnv): 9 | """ 10 | Classic 4 rooms gridworld environment. 11 | Can specify agent and goal position, if not it set at random. 12 | """ 13 | 14 | def __init__(self, agent_pos=None, goal_pos=None): 15 | self._agent_default_pos = agent_pos 16 | self._goal_default_pos = goal_pos 17 | super().__init__(grid_size=19, max_steps=100) 18 | 19 | def _gen_grid(self, width, height): 20 | # Create the grid 21 | self.grid = Grid(width, height) 22 | 23 | # Generate the surrounding walls 24 | self.grid.horz_wall(0, 0) 25 | self.grid.horz_wall(0, height - 1) 26 | self.grid.vert_wall(0, 0) 27 | self.grid.vert_wall(width - 1, 0) 28 | 29 | room_w = width // 2 30 | room_h = height // 2 31 | 32 | # For each row of rooms 33 | for j in range(0, 2): 34 | 35 | # For each column 36 | for i in range(0, 2): 37 | xL = i * room_w 38 | yT = j * room_h 39 | xR = xL + room_w 40 | yB = yT + room_h 41 | 42 | # Bottom wall and door 43 | if i + 1 < 2: 44 | self.grid.vert_wall(xR, yT, room_h) 45 | pos = (xR, self._rand_int(yT + 1, yB)) 46 | self.grid.set(*pos, None) 47 | 48 | # Bottom wall and door 49 | if j + 1 < 2: 50 | self.grid.horz_wall(xL, yB, room_w) 51 | pos = (self._rand_int(xL + 1, xR), yB) 52 | self.grid.set(*pos, None) 53 | 54 | # Randomize the player start position and orientation 55 | if self._agent_default_pos is not None: 56 | self.agent_pos = self._agent_default_pos 57 | self.grid.set(*self._agent_default_pos, None) 58 | self.agent_dir = self._rand_int(0, 4) # assuming random start direction 59 | else: 60 | self.place_agent() 61 | 62 | if self._goal_default_pos is not None: 63 | goal = Goal() 64 | self.put_obj(goal, *self._goal_default_pos) 65 | goal.init_pos, goal.cur_pos = self._goal_default_pos 66 | else: 67 | self.place_obj(Goal()) 68 | 69 | self.mission = 'Reach the goal' 70 | 71 | def step(self, action): 72 | obs, reward, done, info = MiniGridEnv.step(self, action) 73 | return obs, reward, done, info 74 | 75 | register( 76 | id='MiniGrid-FourRooms-v0', 77 | entry_point='gym_minigrid.envs:FourRoomsEnv' 78 | ) 79 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/gotodoor.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class GoToDoorEnv(MiniGridEnv): 5 | """ 6 | Environment in which the agent is instructed to go to a given object 7 | named using an English text string 8 | """ 9 | 10 | def __init__( 11 | self, 12 | size=5 13 | ): 14 | assert size >= 5 15 | 16 | super().__init__( 17 | grid_size=size, 18 | max_steps=5*size**2, 19 | # Set this to True for maximum speed 20 | see_through_walls=True 21 | ) 22 | 23 | def _gen_grid(self, width, height): 24 | # Create the grid 25 | self.grid = Grid(width, height) 26 | 27 | # Randomly vary the room width and height 28 | width = self._rand_int(5, width+1) 29 | height = self._rand_int(5, height+1) 30 | 31 | # Generate the surrounding walls 32 | self.grid.wall_rect(0, 0, width, height) 33 | 34 | # Generate the 4 doors at random positions 35 | doorPos = [] 36 | doorPos.append((self._rand_int(2, width-2), 0)) 37 | doorPos.append((self._rand_int(2, width-2), height-1)) 38 | doorPos.append((0, self._rand_int(2, height-2))) 39 | doorPos.append((width-1, self._rand_int(2, height-2))) 40 | 41 | # Generate the door colors 42 | doorColors = [] 43 | while len(doorColors) < len(doorPos): 44 | color = self._rand_elem(COLOR_NAMES) 45 | if color in doorColors: 46 | continue 47 | doorColors.append(color) 48 | 49 | # Place the doors in the grid 50 | for idx, pos in enumerate(doorPos): 51 | color = doorColors[idx] 52 | self.grid.set(*pos, Door(color)) 53 | 54 | # Randomize the agent start position and orientation 55 | self.place_agent(size=(width, height)) 56 | 57 | # Select a random target door 58 | doorIdx = self._rand_int(0, len(doorPos)) 59 | self.target_pos = doorPos[doorIdx] 60 | self.target_color = doorColors[doorIdx] 61 | 62 | # Generate the mission string 63 | self.mission = 'go to the %s door' % self.target_color 64 | 65 | def step(self, action): 66 | obs, reward, done, info = super().step(action) 67 | 68 | ax, ay = self.agent_pos 69 | tx, ty = self.target_pos 70 | 71 | # Don't let the agent open any of the doors 72 | if action == self.actions.toggle: 73 | done = True 74 | 75 | # Reward performing done action in front of the target door 76 | if action == self.actions.done: 77 | if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1): 78 | reward = self._reward() 79 | done = True 80 | 81 | return obs, reward, done, info 82 | 83 | class GoToDoor8x8Env(GoToDoorEnv): 84 | def __init__(self): 85 | super().__init__(size=8) 86 | 87 | class GoToDoor6x6Env(GoToDoorEnv): 88 | def __init__(self): 89 | super().__init__(size=6) 90 | 91 | register( 92 | id='MiniGrid-GoToDoor-5x5-v0', 93 | entry_point='gym_minigrid.envs:GoToDoorEnv' 94 | ) 95 | 96 | register( 97 | id='MiniGrid-GoToDoor-6x6-v0', 98 | entry_point='gym_minigrid.envs:GoToDoor6x6Env' 99 | ) 100 | 101 | register( 102 | id='MiniGrid-GoToDoor-8x8-v0', 103 | entry_point='gym_minigrid.envs:GoToDoor8x8Env' 104 | ) 105 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/gotoobject.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class GoToObjectEnv(MiniGridEnv): 5 | """ 6 | Environment in which the agent is instructed to go to a given object 7 | named using an English text string 8 | """ 9 | 10 | def __init__( 11 | self, 12 | size=6, 13 | numObjs=2 14 | ): 15 | self.numObjs = numObjs 16 | 17 | super().__init__( 18 | grid_size=size, 19 | max_steps=5*size**2, 20 | # Set this to True for maximum speed 21 | see_through_walls=True 22 | ) 23 | 24 | def _gen_grid(self, width, height): 25 | self.grid = Grid(width, height) 26 | 27 | # Generate the surrounding walls 28 | self.grid.wall_rect(0, 0, width, height) 29 | 30 | # Types and colors of objects we can generate 31 | types = ['key', 'ball', 'box'] 32 | 33 | objs = [] 34 | objPos = [] 35 | 36 | # Until we have generated all the objects 37 | while len(objs) < self.numObjs: 38 | objType = self._rand_elem(types) 39 | objColor = self._rand_elem(COLOR_NAMES) 40 | 41 | # If this object already exists, try again 42 | if (objType, objColor) in objs: 43 | continue 44 | 45 | if objType == 'key': 46 | obj = Key(objColor) 47 | elif objType == 'ball': 48 | obj = Ball(objColor) 49 | elif objType == 'box': 50 | obj = Box(objColor) 51 | 52 | pos = self.place_obj(obj) 53 | objs.append((objType, objColor)) 54 | objPos.append(pos) 55 | 56 | # Randomize the agent start position and orientation 57 | self.place_agent() 58 | 59 | # Choose a random object to be picked up 60 | objIdx = self._rand_int(0, len(objs)) 61 | self.targetType, self.target_color = objs[objIdx] 62 | self.target_pos = objPos[objIdx] 63 | 64 | descStr = '%s %s' % (self.target_color, self.targetType) 65 | self.mission = 'go to the %s' % descStr 66 | #print(self.mission) 67 | 68 | def step(self, action): 69 | obs, reward, done, info = MiniGridEnv.step(self, action) 70 | 71 | ax, ay = self.agent_pos 72 | tx, ty = self.target_pos 73 | 74 | # Toggle/pickup action terminates the episode 75 | if action == self.actions.toggle: 76 | done = True 77 | 78 | # Reward performing the done action next to the target object 79 | if action == self.actions.done: 80 | if abs(ax - tx) <= 1 and abs(ay - ty) <= 1: 81 | reward = self._reward() 82 | done = True 83 | 84 | return obs, reward, done, info 85 | 86 | class GotoEnv8x8N2(GoToObjectEnv): 87 | def __init__(self): 88 | super().__init__(size=8, numObjs=2) 89 | 90 | register( 91 | id='MiniGrid-GoToObject-6x6-N2-v0', 92 | entry_point='gym_minigrid.envs:GoToObjectEnv' 93 | ) 94 | 95 | register( 96 | id='MiniGrid-GoToObject-8x8-N2-v0', 97 | entry_point='gym_minigrid.envs:GotoEnv8x8N2' 98 | ) 99 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/keycorridor.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.roomgrid import RoomGrid 2 | from gym_minigrid.register import register 3 | 4 | class KeyCorridor(RoomGrid): 5 | """ 6 | A ball is behind a locked door, the key is placed in a 7 | random room. 8 | """ 9 | 10 | def __init__( 11 | self, 12 | num_rows=3, 13 | obj_type="ball", 14 | room_size=6, 15 | seed=None 16 | ): 17 | self.obj_type = obj_type 18 | 19 | super().__init__( 20 | room_size=room_size, 21 | num_rows=num_rows, 22 | max_steps=30*room_size**2, 23 | seed=seed, 24 | ) 25 | 26 | def _gen_grid(self, width, height): 27 | super()._gen_grid(width, height) 28 | 29 | # Connect the middle column rooms into a hallway 30 | for j in range(1, self.num_rows): 31 | self.remove_wall(1, j, 3) 32 | 33 | # Add a locked door on the bottom right 34 | # Add an object behind the locked door 35 | room_idx = self._rand_int(0, self.num_rows) 36 | door, _ = self.add_door(2, room_idx, 2, locked=True) 37 | obj, _ = self.add_object(2, room_idx, kind=self.obj_type) 38 | 39 | # Add a key in a random room on the left side 40 | self.add_object(0, self._rand_int(0, self.num_rows), 'key', door.color) 41 | 42 | # Place the agent in the middle 43 | self.place_agent(1, self.num_rows // 2) 44 | 45 | # Make sure all rooms are accessible 46 | self.connect_all() 47 | 48 | self.obj = obj 49 | self.mission = "pick up the %s %s" % (obj.color, obj.type) 50 | 51 | def step(self, action): 52 | obs, reward, done, info = super().step(action) 53 | 54 | if action == self.actions.pickup: 55 | if self.carrying and self.carrying == self.obj: 56 | reward = self._reward() 57 | done = True 58 | 59 | return obs, reward, done, info 60 | 61 | class KeyCorridorS3R1(KeyCorridor): 62 | def __init__(self, seed=None): 63 | super().__init__( 64 | room_size=3, 65 | num_rows=1, 66 | seed=seed 67 | ) 68 | 69 | class KeyCorridorS3R2(KeyCorridor): 70 | def __init__(self, seed=None): 71 | super().__init__( 72 | room_size=3, 73 | num_rows=2, 74 | seed=seed 75 | ) 76 | 77 | class KeyCorridorS3R3(KeyCorridor): 78 | def __init__(self, seed=None): 79 | super().__init__( 80 | room_size=3, 81 | num_rows=3, 82 | seed=seed 83 | ) 84 | 85 | class KeyCorridorS4R3(KeyCorridor): 86 | def __init__(self, seed=None): 87 | super().__init__( 88 | room_size=4, 89 | num_rows=3, 90 | seed=seed 91 | ) 92 | 93 | class KeyCorridorS5R3(KeyCorridor): 94 | def __init__(self, seed=None): 95 | super().__init__( 96 | room_size=5, 97 | num_rows=3, 98 | seed=seed 99 | ) 100 | 101 | class KeyCorridorS6R3(KeyCorridor): 102 | def __init__(self, seed=None): 103 | super().__init__( 104 | room_size=6, 105 | num_rows=3, 106 | seed=seed 107 | ) 108 | 109 | register( 110 | id='MiniGrid-KeyCorridorS3R1-v0', 111 | entry_point='gym_minigrid.envs:KeyCorridorS3R1' 112 | ) 113 | 114 | register( 115 | id='MiniGrid-KeyCorridorS3R2-v0', 116 | entry_point='gym_minigrid.envs:KeyCorridorS3R2' 117 | ) 118 | 119 | register( 120 | id='MiniGrid-KeyCorridorS3R3-v0', 121 | entry_point='gym_minigrid.envs:KeyCorridorS3R3' 122 | ) 123 | 124 | register( 125 | id='MiniGrid-KeyCorridorS4R3-v0', 126 | entry_point='gym_minigrid.envs:KeyCorridorS4R3' 127 | ) 128 | 129 | register( 130 | id='MiniGrid-KeyCorridorS5R3-v0', 131 | entry_point='gym_minigrid.envs:KeyCorridorS5R3' 132 | ) 133 | 134 | register( 135 | id='MiniGrid-KeyCorridorS6R3-v0', 136 | entry_point='gym_minigrid.envs:KeyCorridorS6R3' 137 | ) 138 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/lavagap.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class LavaGapEnv(MiniGridEnv): 5 | """ 6 | Environment with one wall of lava with a small gap to cross through 7 | This environment is similar to LavaCrossing but simpler in structure. 8 | """ 9 | 10 | def __init__(self, size, obstacle_type=Lava, seed=None): 11 | self.obstacle_type = obstacle_type 12 | super().__init__( 13 | grid_size=size, 14 | max_steps=4*size*size, 15 | # Set this to True for maximum speed 16 | see_through_walls=False, 17 | seed=None 18 | ) 19 | 20 | def _gen_grid(self, width, height): 21 | assert width >= 5 and height >= 5 22 | 23 | # Create an empty grid 24 | self.grid = Grid(width, height) 25 | 26 | # Generate the surrounding walls 27 | self.grid.wall_rect(0, 0, width, height) 28 | 29 | # Place the agent in the top-left corner 30 | self.agent_pos = (1, 1) 31 | self.agent_dir = 0 32 | 33 | # Place a goal square in the bottom-right corner 34 | self.goal_pos = np.array((width - 2, height - 2)) 35 | self.put_obj(Goal(), *self.goal_pos) 36 | 37 | # Generate and store random gap position 38 | self.gap_pos = np.array(( 39 | self._rand_int(2, width - 2), 40 | self._rand_int(1, height - 1), 41 | )) 42 | 43 | # Place the obstacle wall 44 | self.grid.vert_wall(self.gap_pos[0], 1, height - 2, self.obstacle_type) 45 | 46 | # Put a hole in the wall 47 | self.grid.set(*self.gap_pos, None) 48 | 49 | self.mission = ( 50 | "avoid the lava and get to the green goal square" 51 | if self.obstacle_type == Lava 52 | else "find the opening and get to the green goal square" 53 | ) 54 | 55 | class LavaGapS5Env(LavaGapEnv): 56 | def __init__(self): 57 | super().__init__(size=5) 58 | 59 | class LavaGapS6Env(LavaGapEnv): 60 | def __init__(self): 61 | super().__init__(size=6) 62 | 63 | class LavaGapS7Env(LavaGapEnv): 64 | def __init__(self): 65 | super().__init__(size=7) 66 | 67 | register( 68 | id='MiniGrid-LavaGapS5-v0', 69 | entry_point='gym_minigrid.envs:LavaGapS5Env' 70 | ) 71 | 72 | register( 73 | id='MiniGrid-LavaGapS6-v0', 74 | entry_point='gym_minigrid.envs:LavaGapS6Env' 75 | ) 76 | 77 | register( 78 | id='MiniGrid-LavaGapS7-v0', 79 | entry_point='gym_minigrid.envs:LavaGapS7Env' 80 | ) 81 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/lockedroom.py: -------------------------------------------------------------------------------- 1 | from gym import spaces 2 | from gym_minigrid.minigrid import * 3 | from gym_minigrid.register import register 4 | 5 | class Room: 6 | def __init__(self, 7 | top, 8 | size, 9 | doorPos 10 | ): 11 | self.top = top 12 | self.size = size 13 | self.doorPos = doorPos 14 | self.color = None 15 | self.locked = False 16 | 17 | def rand_pos(self, env): 18 | topX, topY = self.top 19 | sizeX, sizeY = self.size 20 | return env._rand_pos( 21 | topX + 1, topX + sizeX - 1, 22 | topY + 1, topY + sizeY - 1 23 | ) 24 | 25 | class LockedRoom(MiniGridEnv): 26 | """ 27 | Environment in which the agent is instructed to go to a given object 28 | named using an English text string 29 | """ 30 | 31 | def __init__( 32 | self, 33 | size=19 34 | ): 35 | super().__init__(grid_size=size, max_steps=10*size) 36 | 37 | def _gen_grid(self, width, height): 38 | # Create the grid 39 | self.grid = Grid(width, height) 40 | 41 | # Generate the surrounding walls 42 | for i in range(0, width): 43 | self.grid.set(i, 0, Wall()) 44 | self.grid.set(i, height-1, Wall()) 45 | for j in range(0, height): 46 | self.grid.set(0, j, Wall()) 47 | self.grid.set(width-1, j, Wall()) 48 | 49 | # Hallway walls 50 | lWallIdx = width // 2 - 2 51 | rWallIdx = width // 2 + 2 52 | for j in range(0, height): 53 | self.grid.set(lWallIdx, j, Wall()) 54 | self.grid.set(rWallIdx, j, Wall()) 55 | 56 | self.rooms = [] 57 | 58 | # Room splitting walls 59 | for n in range(0, 3): 60 | j = n * (height // 3) 61 | for i in range(0, lWallIdx): 62 | self.grid.set(i, j, Wall()) 63 | for i in range(rWallIdx, width): 64 | self.grid.set(i, j, Wall()) 65 | 66 | roomW = lWallIdx + 1 67 | roomH = height // 3 + 1 68 | self.rooms.append(Room( 69 | (0, j), 70 | (roomW, roomH), 71 | (lWallIdx, j + 3) 72 | )) 73 | self.rooms.append(Room( 74 | (rWallIdx, j), 75 | (roomW, roomH), 76 | (rWallIdx, j + 3) 77 | )) 78 | 79 | # Choose one random room to be locked 80 | lockedRoom = self._rand_elem(self.rooms) 81 | lockedRoom.locked = True 82 | goalPos = lockedRoom.rand_pos(self) 83 | self.grid.set(*goalPos, Goal()) 84 | 85 | # Assign the door colors 86 | colors = set(COLOR_NAMES) 87 | for room in self.rooms: 88 | color = self._rand_elem(sorted(colors)) 89 | colors.remove(color) 90 | room.color = color 91 | if room.locked: 92 | self.grid.set(*room.doorPos, Door(color, is_locked=True)) 93 | else: 94 | self.grid.set(*room.doorPos, Door(color)) 95 | 96 | # Select a random room to contain the key 97 | while True: 98 | keyRoom = self._rand_elem(self.rooms) 99 | if keyRoom != lockedRoom: 100 | break 101 | keyPos = keyRoom.rand_pos(self) 102 | self.grid.set(*keyPos, Key(lockedRoom.color)) 103 | 104 | # Randomize the player start position and orientation 105 | self.agent_pos = self.place_agent( 106 | top=(lWallIdx, 0), 107 | size=(rWallIdx-lWallIdx, height) 108 | ) 109 | 110 | # Generate the mission string 111 | self.mission = ( 112 | 'get the %s key from the %s room, ' 113 | 'unlock the %s door and ' 114 | 'go to the goal' 115 | ) % (lockedRoom.color, keyRoom.color, lockedRoom.color) 116 | 117 | def step(self, action): 118 | obs, reward, done, info = MiniGridEnv.step(self, action) 119 | return obs, reward, done, info 120 | 121 | register( 122 | id='MiniGrid-LockedRoom-v0', 123 | entry_point='gym_minigrid.envs:LockedRoom' 124 | ) 125 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/playground_v0.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class PlaygroundV0(MiniGridEnv): 5 | """ 6 | Environment with multiple rooms and random objects. 7 | This environment has no specific goals or rewards. 8 | """ 9 | 10 | def __init__(self): 11 | super().__init__(grid_size=19, max_steps=100) 12 | 13 | def _gen_grid(self, width, height): 14 | # Create the grid 15 | self.grid = Grid(width, height) 16 | 17 | # Generate the surrounding walls 18 | self.grid.horz_wall(0, 0) 19 | self.grid.horz_wall(0, height-1) 20 | self.grid.vert_wall(0, 0) 21 | self.grid.vert_wall(width-1, 0) 22 | 23 | roomW = width // 3 24 | roomH = height // 3 25 | 26 | # For each row of rooms 27 | for j in range(0, 3): 28 | 29 | # For each column 30 | for i in range(0, 3): 31 | xL = i * roomW 32 | yT = j * roomH 33 | xR = xL + roomW 34 | yB = yT + roomH 35 | 36 | # Bottom wall and door 37 | if i+1 < 3: 38 | self.grid.vert_wall(xR, yT, roomH) 39 | pos = (xR, self._rand_int(yT+1, yB-1)) 40 | color = self._rand_elem(COLOR_NAMES) 41 | self.grid.set(*pos, Door(color)) 42 | 43 | # Bottom wall and door 44 | if j+1 < 3: 45 | self.grid.horz_wall(xL, yB, roomW) 46 | pos = (self._rand_int(xL+1, xR-1), yB) 47 | color = self._rand_elem(COLOR_NAMES) 48 | self.grid.set(*pos, Door(color)) 49 | 50 | # Randomize the player start position and orientation 51 | self.place_agent() 52 | 53 | # Place random objects in the world 54 | types = ['key', 'ball', 'box'] 55 | for i in range(0, 12): 56 | objType = self._rand_elem(types) 57 | objColor = self._rand_elem(COLOR_NAMES) 58 | if objType == 'key': 59 | obj = Key(objColor) 60 | elif objType == 'ball': 61 | obj = Ball(objColor) 62 | elif objType == 'box': 63 | obj = Box(objColor) 64 | self.place_obj(obj) 65 | 66 | # No explicit mission in this environment 67 | self.mission = '' 68 | 69 | def step(self, action): 70 | obs, reward, done, info = MiniGridEnv.step(self, action) 71 | return obs, reward, done, info 72 | 73 | register( 74 | id='MiniGrid-Playground-v0', 75 | entry_point='gym_minigrid.envs:PlaygroundV0' 76 | ) 77 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/putnear.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class PutNearEnv(MiniGridEnv): 5 | """ 6 | Environment in which the agent is instructed to place an object near 7 | another object through a natural language string. 8 | """ 9 | 10 | def __init__( 11 | self, 12 | size=6, 13 | numObjs=2 14 | ): 15 | self.numObjs = numObjs 16 | 17 | super().__init__( 18 | grid_size=size, 19 | max_steps=5*size, 20 | # Set this to True for maximum speed 21 | see_through_walls=True 22 | ) 23 | 24 | def _gen_grid(self, width, height): 25 | self.grid = Grid(width, height) 26 | 27 | # Generate the surrounding walls 28 | self.grid.horz_wall(0, 0) 29 | self.grid.horz_wall(0, height-1) 30 | self.grid.vert_wall(0, 0) 31 | self.grid.vert_wall(width-1, 0) 32 | 33 | # Types and colors of objects we can generate 34 | types = ['key', 'ball', 'box'] 35 | 36 | objs = [] 37 | objPos = [] 38 | 39 | def near_obj(env, p1): 40 | for p2 in objPos: 41 | dx = p1[0] - p2[0] 42 | dy = p1[1] - p2[1] 43 | if abs(dx) <= 1 and abs(dy) <= 1: 44 | return True 45 | return False 46 | 47 | # Until we have generated all the objects 48 | while len(objs) < self.numObjs: 49 | objType = self._rand_elem(types) 50 | objColor = self._rand_elem(COLOR_NAMES) 51 | 52 | # If this object already exists, try again 53 | if (objType, objColor) in objs: 54 | continue 55 | 56 | if objType == 'key': 57 | obj = Key(objColor) 58 | elif objType == 'ball': 59 | obj = Ball(objColor) 60 | elif objType == 'box': 61 | obj = Box(objColor) 62 | 63 | pos = self.place_obj(obj, reject_fn=near_obj) 64 | 65 | objs.append((objType, objColor)) 66 | objPos.append(pos) 67 | 68 | # Randomize the agent start position and orientation 69 | self.place_agent() 70 | 71 | # Choose a random object to be moved 72 | objIdx = self._rand_int(0, len(objs)) 73 | self.move_type, self.moveColor = objs[objIdx] 74 | self.move_pos = objPos[objIdx] 75 | 76 | # Choose a target object (to put the first object next to) 77 | while True: 78 | targetIdx = self._rand_int(0, len(objs)) 79 | if targetIdx != objIdx: 80 | break 81 | self.target_type, self.target_color = objs[targetIdx] 82 | self.target_pos = objPos[targetIdx] 83 | 84 | self.mission = 'put the %s %s near the %s %s' % ( 85 | self.moveColor, 86 | self.move_type, 87 | self.target_color, 88 | self.target_type 89 | ) 90 | 91 | def step(self, action): 92 | preCarrying = self.carrying 93 | 94 | obs, reward, done, info = super().step(action) 95 | 96 | u, v = self.dir_vec 97 | ox, oy = (self.agent_pos[0] + u, self.agent_pos[1] + v) 98 | tx, ty = self.target_pos 99 | 100 | # If we picked up the wrong object, terminate the episode 101 | if action == self.actions.pickup and self.carrying: 102 | if self.carrying.type != self.move_type or self.carrying.color != self.moveColor: 103 | done = True 104 | 105 | # If successfully dropping an object near the target 106 | if action == self.actions.drop and preCarrying: 107 | if self.grid.get(ox, oy) is preCarrying: 108 | if abs(ox - tx) <= 1 and abs(oy - ty) <= 1: 109 | reward = self._reward() 110 | done = True 111 | 112 | return obs, reward, done, info 113 | 114 | class PutNear8x8N3(PutNearEnv): 115 | def __init__(self): 116 | super().__init__(size=8, numObjs=3) 117 | 118 | register( 119 | id='MiniGrid-PutNear-6x6-N2-v0', 120 | entry_point='gym_minigrid.envs:PutNearEnv' 121 | ) 122 | 123 | register( 124 | id='MiniGrid-PutNear-8x8-N3-v0', 125 | entry_point='gym_minigrid.envs:PutNear8x8N3' 126 | ) 127 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/redbluedoors.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import * 2 | from gym_minigrid.register import register 3 | 4 | class RedBlueDoorEnv(MiniGridEnv): 5 | """ 6 | Single room with red and blue doors on opposite sides. 7 | The red door must be opened before the blue door to 8 | obtain a reward. 9 | """ 10 | 11 | def __init__(self, size=8): 12 | self.size = size 13 | 14 | super().__init__( 15 | width=2*size, 16 | height=size, 17 | max_steps=20*size*size 18 | ) 19 | 20 | def _gen_grid(self, width, height): 21 | # Create an empty grid 22 | self.grid = Grid(width, height) 23 | 24 | # Generate the grid walls 25 | self.grid.wall_rect(0, 0, 2*self.size, self.size) 26 | self.grid.wall_rect(self.size//2, 0, self.size, self.size) 27 | 28 | # Place the agent in the top-left corner 29 | self.place_agent(top=(self.size//2, 0), size=(self.size, self.size)) 30 | 31 | # Add a red door at a random position in the left wall 32 | pos = self._rand_int(1, self.size - 1) 33 | self.red_door = Door("red") 34 | self.grid.set(self.size//2, pos, self.red_door) 35 | 36 | # Add a blue door at a random position in the right wall 37 | pos = self._rand_int(1, self.size - 1) 38 | self.blue_door = Door("blue") 39 | self.grid.set(self.size//2 + self.size - 1, pos, self.blue_door) 40 | 41 | # Generate the mission string 42 | self.mission = "open the red door then the blue door" 43 | 44 | def step(self, action): 45 | red_door_opened_before = self.red_door.is_open 46 | blue_door_opened_before = self.blue_door.is_open 47 | 48 | obs, reward, done, info = MiniGridEnv.step(self, action) 49 | 50 | red_door_opened_after = self.red_door.is_open 51 | blue_door_opened_after = self.blue_door.is_open 52 | 53 | if blue_door_opened_after: 54 | if red_door_opened_before: 55 | reward = self._reward() 56 | done = True 57 | else: 58 | reward = 0 59 | done = True 60 | 61 | elif red_door_opened_after: 62 | if blue_door_opened_before: 63 | reward = 0 64 | done = True 65 | 66 | return obs, reward, done, info 67 | 68 | class RedBlueDoorEnv6x6(RedBlueDoorEnv): 69 | def __init__(self): 70 | super().__init__(size=6) 71 | 72 | register( 73 | id='MiniGrid-RedBlueDoors-6x6-v0', 74 | entry_point='gym_minigrid.envs:RedBlueDoorEnv6x6' 75 | ) 76 | 77 | register( 78 | id='MiniGrid-RedBlueDoors-8x8-v0', 79 | entry_point='gym_minigrid.envs:RedBlueDoorEnv' 80 | ) 81 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/unlock.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import Ball 2 | from gym_minigrid.roomgrid import RoomGrid 3 | from gym_minigrid.register import register 4 | 5 | class Unlock(RoomGrid): 6 | """ 7 | Unlock a door 8 | """ 9 | 10 | def __init__(self, seed=None): 11 | room_size = 6 12 | super().__init__( 13 | num_rows=1, 14 | num_cols=2, 15 | room_size=room_size, 16 | max_steps=8*room_size**2, 17 | seed=seed 18 | ) 19 | 20 | def _gen_grid(self, width, height): 21 | super()._gen_grid(width, height) 22 | 23 | # Make sure the two rooms are directly connected by a locked door 24 | door, _ = self.add_door(0, 0, 0, locked=True) 25 | # Add a key to unlock the door 26 | self.add_object(0, 0, 'key', door.color) 27 | 28 | self.place_agent(0, 0) 29 | 30 | self.door = door 31 | self.mission = "open the door" 32 | 33 | def step(self, action): 34 | obs, reward, done, info = super().step(action) 35 | 36 | if action == self.actions.toggle: 37 | if self.door.is_open: 38 | reward = self._reward() 39 | done = True 40 | 41 | return obs, reward, done, info 42 | 43 | register( 44 | id='MiniGrid-Unlock-v0', 45 | entry_point='gym_minigrid.envs:Unlock' 46 | ) 47 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/envs/unlockpickup.py: -------------------------------------------------------------------------------- 1 | from gym_minigrid.minigrid import Ball 2 | from gym_minigrid.roomgrid import RoomGrid 3 | from gym_minigrid.register import register 4 | 5 | class UnlockPickup(RoomGrid): 6 | """ 7 | Unlock a door, then pick up a box in another room 8 | """ 9 | 10 | def __init__(self, seed=None): 11 | room_size = 6 12 | super().__init__( 13 | num_rows=1, 14 | num_cols=2, 15 | room_size=room_size, 16 | max_steps=8*room_size**2, 17 | seed=seed 18 | ) 19 | 20 | def _gen_grid(self, width, height): 21 | super()._gen_grid(width, height) 22 | 23 | # Add a box to the room on the right 24 | obj, _ = self.add_object(1, 0, kind="box") 25 | # Make sure the two rooms are directly connected by a locked door 26 | door, _ = self.add_door(0, 0, 0, locked=True) 27 | # Add a key to unlock the door 28 | self.add_object(0, 0, 'key', door.color) 29 | 30 | self.place_agent(0, 0) 31 | 32 | self.obj = obj 33 | self.mission = "pick up the %s %s" % (obj.color, obj.type) 34 | 35 | def step(self, action): 36 | obs, reward, done, info = super().step(action) 37 | 38 | if action == self.actions.pickup: 39 | if self.carrying and self.carrying == self.obj: 40 | reward = self._reward() 41 | done = True 42 | 43 | return obs, reward, done, info 44 | 45 | register( 46 | id='MiniGrid-UnlockPickup-v0', 47 | entry_point='gym_minigrid.envs:UnlockPickup' 48 | ) 49 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/register.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register as gym_register 2 | 3 | env_list = [] 4 | 5 | def register( 6 | id, 7 | entry_point, 8 | reward_threshold=0.95 9 | ): 10 | assert id.startswith("MiniGrid-") 11 | assert id not in env_list 12 | 13 | # Register the environment with OpenAI gym 14 | gym_register( 15 | id=id, 16 | entry_point=entry_point, 17 | reward_threshold=reward_threshold 18 | ) 19 | 20 | # Add the environment to the set 21 | env_list.append(id) 22 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/rendering.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | def downsample(img, factor): 5 | """ 6 | Downsample an image along both dimensions by some factor 7 | """ 8 | 9 | assert img.shape[0] % factor == 0 10 | assert img.shape[1] % factor == 0 11 | 12 | img = img.reshape([img.shape[0]//factor, factor, img.shape[1]//factor, factor, 3]) 13 | img = img.mean(axis=3) 14 | img = img.mean(axis=1) 15 | 16 | return img 17 | 18 | def fill_coords(img, fn, color): 19 | """ 20 | Fill pixels of an image with coordinates matching a filter function 21 | """ 22 | 23 | for y in range(img.shape[0]): 24 | for x in range(img.shape[1]): 25 | yf = (y + 0.5) / img.shape[0] 26 | xf = (x + 0.5) / img.shape[1] 27 | if fn(xf, yf): 28 | img[y, x] = color 29 | 30 | return img 31 | 32 | def rotate_fn(fin, cx, cy, theta): 33 | def fout(x, y): 34 | x = x - cx 35 | y = y - cy 36 | 37 | x2 = cx + x * math.cos(-theta) - y * math.sin(-theta) 38 | y2 = cy + y * math.cos(-theta) + x * math.sin(-theta) 39 | 40 | return fin(x2, y2) 41 | 42 | return fout 43 | 44 | def point_in_line(x0, y0, x1, y1, r): 45 | p0 = np.array([x0, y0]) 46 | p1 = np.array([x1, y1]) 47 | dir = p1 - p0 48 | dist = np.linalg.norm(dir) 49 | dir = dir / dist 50 | 51 | xmin = min(x0, x1) - r 52 | xmax = max(x0, x1) + r 53 | ymin = min(y0, y1) - r 54 | ymax = max(y0, y1) + r 55 | 56 | def fn(x, y): 57 | # Fast, early escape test 58 | if x < xmin or x > xmax or y < ymin or y > ymax: 59 | return False 60 | 61 | q = np.array([x, y]) 62 | pq = q - p0 63 | 64 | # Closest point on line 65 | a = np.dot(pq, dir) 66 | a = np.clip(a, 0, dist) 67 | p = p0 + a * dir 68 | 69 | dist_to_line = np.linalg.norm(q - p) 70 | return dist_to_line <= r 71 | 72 | return fn 73 | 74 | def point_in_circle(cx, cy, r): 75 | def fn(x, y): 76 | return (x-cx)*(x-cx) + (y-cy)*(y-cy) <= r * r 77 | return fn 78 | 79 | def point_in_rect(xmin, xmax, ymin, ymax): 80 | def fn(x, y): 81 | return x >= xmin and x <= xmax and y >= ymin and y <= ymax 82 | return fn 83 | 84 | def point_in_triangle(a, b, c): 85 | a = np.array(a) 86 | b = np.array(b) 87 | c = np.array(c) 88 | 89 | def fn(x, y): 90 | v0 = c - a 91 | v1 = b - a 92 | v2 = np.array((x, y)) - a 93 | 94 | # Compute dot products 95 | dot00 = np.dot(v0, v0) 96 | dot01 = np.dot(v0, v1) 97 | dot02 = np.dot(v0, v2) 98 | dot11 = np.dot(v1, v1) 99 | dot12 = np.dot(v1, v2) 100 | 101 | # Compute barycentric coordinates 102 | inv_denom = 1 / (dot00 * dot11 - dot01 * dot01) 103 | u = (dot11 * dot02 - dot01 * dot12) * inv_denom 104 | v = (dot00 * dot12 - dot01 * dot02) * inv_denom 105 | 106 | # Check if point is in triangle 107 | return (u >= 0) and (v >= 0) and (u + v) < 1 108 | 109 | return fn 110 | 111 | def highlight_img(img, color=(255, 255, 255), alpha=0.30): 112 | """ 113 | Add highlighting to an image 114 | """ 115 | 116 | blend_img = img + alpha * (np.array(color, dtype=np.uint8) - img) 117 | blend_img = blend_img.clip(0, 255).astype(np.uint8) 118 | img[:, :, :] = blend_img 119 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/gym_minigrid/window.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | # Only ask users to install matplotlib if they actually need it 5 | try: 6 | import matplotlib.pyplot as plt 7 | except: 8 | print('To display the environment in a window, please install matplotlib, eg:') 9 | print('pip3 install --user matplotlib') 10 | sys.exit(-1) 11 | 12 | class Window: 13 | """ 14 | Window to draw a gridworld instance using Matplotlib 15 | """ 16 | 17 | def __init__(self, title): 18 | self.fig = None 19 | 20 | self.imshow_obj = None 21 | 22 | # Create the figure and axes 23 | self.fig, self.ax = plt.subplots() 24 | 25 | # Show the env name in the window title 26 | self.fig.canvas.set_window_title(title) 27 | 28 | # Turn off x/y axis numbering/ticks 29 | self.ax.set_xticks([], []) 30 | self.ax.set_yticks([], []) 31 | 32 | # Flag indicating the window was closed 33 | self.closed = False 34 | 35 | def close_handler(evt): 36 | self.closed = True 37 | 38 | self.fig.canvas.mpl_connect('close_event', close_handler) 39 | 40 | def show_img(self, img): 41 | """ 42 | Show an image or update the image being shown 43 | """ 44 | 45 | # Show the first image of the environment 46 | if self.imshow_obj is None: 47 | self.imshow_obj = self.ax.imshow(img, interpolation='bilinear') 48 | 49 | self.imshow_obj.set_data(img) 50 | self.fig.canvas.draw() 51 | 52 | # Let matplotlib process UI events 53 | # This is needed for interactive mode to work properly 54 | plt.pause(0.001) 55 | 56 | def set_caption(self, text): 57 | """ 58 | Set/update the caption text below the image 59 | """ 60 | 61 | plt.xlabel(text) 62 | 63 | def reg_key_handler(self, key_handler): 64 | """ 65 | Register a keyboard event handler 66 | """ 67 | 68 | # Keyboard handler 69 | self.fig.canvas.mpl_connect('key_press_event', key_handler) 70 | 71 | def show(self, block=True): 72 | """ 73 | Show the window, and start an event loop 74 | """ 75 | 76 | # If not blocking, trigger interactive mode 77 | if not block: 78 | plt.ion() 79 | 80 | # Show the plot 81 | # In non-interative mode, this enters the matplotlib event loop 82 | # In interactive mode, this call does not block 83 | plt.show() 84 | 85 | def close(self): 86 | """ 87 | Close the window 88 | """ 89 | 90 | plt.close() 91 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/manual_control.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | from gym_minigrid.wrappers import * 5 | from gym_minigrid.window import Window 6 | 7 | def redraw(img): 8 | if not args.agent_view: 9 | img = env.render('rgb_array', tile_size=args.tile_size) 10 | 11 | window.show_img(img) 12 | 13 | def reset(): 14 | if args.seed != -1: 15 | env.seed(args.seed) 16 | 17 | obs = env.reset() 18 | 19 | if hasattr(env, 'mission'): 20 | print('Mission: %s' % env.mission) 21 | window.set_caption(env.mission) 22 | 23 | redraw(obs) 24 | 25 | def step(action): 26 | obs, reward, done, info = env.step(action) 27 | print('step=%s, reward=%.2f' % (env.step_count, reward)) 28 | 29 | if done: 30 | print('done!') 31 | reset() 32 | else: 33 | redraw(obs) 34 | 35 | def key_handler(event): 36 | print('pressed', event.key) 37 | 38 | if event.key == 'escape': 39 | window.close() 40 | return 41 | 42 | if event.key == 'backspace': 43 | reset() 44 | return 45 | 46 | if event.key == 'left': 47 | step(env.actions.left) 48 | return 49 | if event.key == 'right': 50 | step(env.actions.right) 51 | return 52 | if event.key == 'up': 53 | step(env.actions.forward) 54 | return 55 | 56 | # Spacebar 57 | if event.key == ' ': 58 | step(env.actions.toggle) 59 | return 60 | if event.key == 'pageup': 61 | step(env.actions.pickup) 62 | return 63 | if event.key == 'pagedown': 64 | step(env.actions.drop) 65 | return 66 | 67 | if event.key == 'enter': 68 | step(env.actions.done) 69 | return 70 | 71 | parser = argparse.ArgumentParser() 72 | parser.add_argument( 73 | "--env", 74 | help="gym environment to load", 75 | default='MiniGrid-MultiRoom-N6-v0' 76 | ) 77 | parser.add_argument( 78 | "--seed", 79 | type=int, 80 | help="random seed to generate the environment with", 81 | default=-1 82 | ) 83 | parser.add_argument( 84 | "--tile_size", 85 | type=int, 86 | help="size at which to render tiles", 87 | default=32 88 | ) 89 | parser.add_argument( 90 | '--agent_view', 91 | default=False, 92 | help="draw the agent sees (partially observable view)", 93 | action='store_true' 94 | ) 95 | 96 | args = parser.parse_args() 97 | 98 | env = gym.make(args.env) 99 | 100 | if args.agent_view: 101 | env = RGBImgPartialObsWrapper(env) 102 | env = ImgObsWrapper(env) 103 | 104 | window = Window('gym_minigrid - ' + args.env) 105 | window.reg_key_handler(key_handler) 106 | 107 | reset() 108 | 109 | # Blocking event loop 110 | window.show(block=True) 111 | -------------------------------------------------------------------------------- /babyai-text/gym-minigrid/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='gym_minigrid', 5 | version='1.0.1', 6 | keywords='memory, environment, agent, rl, openaigym, openai-gym, gym', 7 | url='https://github.com/maximecb/gym-minigrid', 8 | description='Minimalistic gridworld package for OpenAI Gym', 9 | packages=['gym_minigrid', 'gym_minigrid.envs'], 10 | install_requires=[ 11 | 'gym>=0.9.6', 12 | 'numpy>=1.15.0' 13 | ] 14 | ) 15 | -------------------------------------------------------------------------------- /babyai-text/images/babyai-text_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/images/babyai-text_schema.png -------------------------------------------------------------------------------- /babyai-text/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='babyai_text', 5 | version='0.1.0', 6 | keywords='babyai, text environment', 7 | description='A text-only extension of BabyAI', 8 | packages=['babyai_text', 'babyai_text.levels'], 9 | install_requires=[ 10 | 'colorama', 11 | 'termcolor', 12 | 'matplotlib', 13 | 'ipython', 14 | 'numpy==1.23.1' 15 | ] 16 | ) -------------------------------------------------------------------------------- /docs/images/generalization_tests.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/docs/images/generalization_tests.png -------------------------------------------------------------------------------- /docs/images/main_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/docs/images/main_schema.png -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/__init__.py -------------------------------------------------------------------------------- /experiments/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/__pycache__/main.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/__pycache__/main.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/__pycache__/test_llm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/__pycache__/test_llm.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/__init__.py -------------------------------------------------------------------------------- /experiments/agents/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/agents/base_agent.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | class BaseAgent(ABC): 4 | def __init__(self, envs): 5 | self.env = envs 6 | self.dict_translation_actions = {'turn left': "tourner à gauche", 7 | "turn right": "tourner à droite", 8 | "go forward": "aller tout droit", 9 | "pick up": "attraper", 10 | "drop": "lâcher", 11 | "toggle": "basculer", 12 | "eat": "manger", 13 | "dance": "dancer", 14 | "sleep": "dormir", 15 | "do nothing": "ne rien faire", 16 | "cut": "couper", 17 | "think": "penser"} 18 | 19 | @abstractmethod 20 | def generate_trajectories(self, dict_modifier, n_tests, language='english'): 21 | raise NotImplementedError() 22 | 23 | @abstractmethod 24 | def update_parameters(self): 25 | raise NotImplementedError() 26 | 27 | def generate_prompt(self, goal, subgoals, deque_obs, deque_actions): 28 | ldo = len(deque_obs) 29 | lda = len(deque_actions) 30 | 31 | head_prompt = "Possible action of the agent:" 32 | for sg in subgoals: 33 | head_prompt += " {},".format(sg) 34 | head_prompt = head_prompt[:-1] 35 | 36 | g = " \n Goal of the agent: {}".format(goal) 37 | obs = "" 38 | for i in range(ldo): 39 | obs += " \n Observation {}: ".format(i) 40 | for d_obs in deque_obs[i]: 41 | obs += "{}, ".format(d_obs) 42 | obs += "\n Action {}: ".format(i) 43 | if i < lda: 44 | obs += "{}".format(deque_actions[i]) 45 | return head_prompt + g + obs 46 | 47 | def generate_prompt_french(self, goal, subgoals, deque_obs, deque_actions): 48 | ldo = len(deque_obs) 49 | lda = len(deque_actions) 50 | head_prompt = "Actions possibles pour l'agent:" 51 | for sg in subgoals: 52 | head_prompt += " {},".format(sg) 53 | head_prompt = head_prompt[:-1] 54 | 55 | # translate goal in French 56 | dict_translation_det = {"the": "la", 57 | 'a': 'une'} 58 | dict_translation_names = {"box": "boîte", 59 | "ball": "balle", 60 | "key": "clef"} 61 | dict_translation_adjs = {'red': 'rouge', 62 | 'green': 'verte', 63 | 'blue': 'bleue', 64 | 'purple': 'violette', 65 | 'yellow': 'jaune', 66 | 'grey': 'grise'} 67 | 68 | det = '' 69 | name = '' 70 | adj = '' 71 | 72 | for k in dict_translation_det.keys(): 73 | if k in goal: 74 | det = dict_translation_det[k] 75 | for k in dict_translation_names.keys(): 76 | if k in goal: 77 | name = dict_translation_names[k] 78 | for k in dict_translation_adjs.keys(): 79 | if k in goal: 80 | adj = dict_translation_adjs[k] 81 | translation_goal = 'aller à ' + det + ' ' + name + ' ' + adj 82 | 83 | g = " \n But de l'agent: {}".format(translation_goal) 84 | obs = "" 85 | for i in range(ldo): 86 | obs += " \n Observation {}: ".format(i) 87 | for d_obs in deque_obs[i]: 88 | obs += "{}, ".format(d_obs) 89 | obs += "\n Action {}: ".format(i) 90 | if i < lda: 91 | obs += "{}".format(deque_actions[i]) 92 | return head_prompt + g + obs 93 | 94 | def prompt_modifier(self, prompt: str, dict_changes: dict) -> str: 95 | """use a dictionary of equivalence to modify the prompt accordingly 96 | ex: 97 | prompt= 'green box red box', dict_changes={'box':'tree'} 98 | promp_modifier(prompt, dict_changes)='green tree red tree' """ 99 | 100 | for key, value in dict_changes.items(): 101 | prompt = prompt.replace(key, value) 102 | return prompt 103 | 104 | -------------------------------------------------------------------------------- /experiments/agents/bot/__pycache__/bot.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/bot/__pycache__/bot.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/agents/bot/bot.py: -------------------------------------------------------------------------------- 1 | from babyai.bot import Bot 2 | from babyai.rl.utils import DictList 3 | from collections import deque 4 | from tqdm import tqdm 5 | import numpy as np 6 | 7 | from agents.base_agent import BaseAgent 8 | 9 | class BotAgent(BaseAgent): 10 | def __init__(self, envs, subgoals): 11 | """An agent based on BabyAI's GOFAI bot.""" 12 | self.env = envs.envs[0] 13 | self.subgoals = subgoals[0] 14 | self.logs = { 15 | "return_per_episode": [], 16 | } 17 | self.obs, self.infos = self.env.reset() 18 | self.bot = Bot(self.env) 19 | 20 | self.obs_queue = deque([], maxlen=3) 21 | self.acts_queue = deque([], maxlen=2) 22 | 23 | self.obs_queue.append(self.infos['descriptions']) 24 | 25 | self.prompts = [] 26 | self.actions = [] 27 | 28 | self.log_done_counter = 0 29 | 30 | def act(self, action_choosen=None): 31 | actions = self.bot.replan(action_choosen) 32 | return actions 33 | 34 | def generate_trajectories(self, dict_modifier, n_tests, language='english'): 35 | assert language == "english" 36 | 37 | nbr_frames = 1 38 | pbar = tqdm(range(n_tests), ascii=" " * 9 + ">", ncols=100) 39 | previous_action = None 40 | while self.log_done_counter < n_tests: 41 | nbr_frames += 1 42 | prompt = self.prompt_modifier(self.generate_prompt(goal=self.obs['mission'], subgoals=self.subgoals, 43 | deque_obs=self.obs_queue, 44 | deque_actions=self.acts_queue), dict_modifier) 45 | 46 | action = self.act(previous_action) 47 | # previous_action = action 48 | self.actions.append(self.subgoals[int(action)]) 49 | self.acts_queue.append(self.subgoals[int(action)]) 50 | self.prompts.append(prompt) 51 | 52 | self.obs, reward, done, self.infos = self.env.step(action) 53 | 54 | if done: 55 | self.log_done_counter += 1 56 | pbar.update(1) 57 | self.logs["return_per_episode"].append(reward) 58 | self.obs_queue.clear() 59 | self.acts_queue.clear() 60 | self.obs, infos = self.env.reset() 61 | self.bot = Bot(self.env) 62 | self.obs_queue.append(self.infos['descriptions']) 63 | pbar.close() 64 | 65 | exps = DictList() 66 | exps.prompts = np.array(self.prompts) 67 | exps.actions = np.array(self.actions) 68 | 69 | self.logs["episodes_done"] = self.log_done_counter 70 | self.logs["nbr_frames"] = nbr_frames 71 | self.log_done_counter = 0 72 | return exps, self.logs 73 | 74 | def update_parameters(self): 75 | pass 76 | 77 | -------------------------------------------------------------------------------- /experiments/agents/drrn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/__init__.py -------------------------------------------------------------------------------- /experiments/agents/drrn/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/agents/drrn/__pycache__/drrn.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/__pycache__/drrn.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/agents/drrn/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/agents/drrn/model.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This code has been taken from https://github.com/microsoft/tdqn and modified to match our needs 3 | ''' 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import itertools 8 | from .utils.pad_sequences import pad_sequences 9 | from .utils.memory import State 10 | 11 | from accelerate import Accelerator 12 | 13 | accelerator = Accelerator() 14 | device = accelerator.state.device 15 | 16 | class DRRN(torch.nn.Module): 17 | """ 18 | Deep Reinforcement Relevance Network - He et al. '16 19 | 20 | """ 21 | def __init__(self, vocab_size, embedding_dim, hidden_dim): 22 | super(DRRN, self).__init__() 23 | self.embedding = nn.Embedding(vocab_size, embedding_dim) 24 | self.obs_encoder = nn.GRU(embedding_dim, hidden_dim) 25 | self.act_encoder = nn.GRU(embedding_dim, hidden_dim) 26 | self.hidden = nn.Linear(2*hidden_dim, hidden_dim) 27 | self.act_scorer = nn.Linear(hidden_dim, 1) 28 | 29 | 30 | def packed_rnn(self, x, rnn): 31 | """ Runs the provided rnn on the input x. Takes care of packing/unpacking. 32 | 33 | x: list of unpadded input sequences 34 | Returns a tensor of size: len(x) x hidden_dim 35 | """ 36 | lengths = torch.tensor([len(n) for n in x], dtype=torch.long, device=device) 37 | # Sort this batch in descending order by seq length 38 | lengths, idx_sort = torch.sort(lengths, dim=0, descending=True) 39 | _, idx_unsort = torch.sort(idx_sort, dim=0) 40 | idx_sort = torch.autograd.Variable(idx_sort) 41 | idx_unsort = torch.autograd.Variable(idx_unsort) 42 | padded_x = pad_sequences(x) 43 | x_tt = torch.from_numpy(padded_x).type(torch.long).to(device) 44 | x_tt = x_tt.index_select(0, idx_sort) 45 | # Run the embedding layer 46 | embed = self.embedding(x_tt).permute(1,0,2) # Time x Batch x EncDim 47 | # Pack padded batch of sequences for RNN module 48 | packed = nn.utils.rnn.pack_padded_sequence(embed, lengths.cpu()) 49 | # Run the RNN 50 | out, _ = rnn(packed) 51 | # Unpack 52 | out, _ = nn.utils.rnn.pad_packed_sequence(out) 53 | # Get the last step of each sequence 54 | idx = (lengths-1).view(-1,1).expand(len(lengths), out.size(2)).unsqueeze(0) 55 | out = out.gather(0, idx).squeeze(0) 56 | # Unsort 57 | out = out.index_select(0, idx_unsort) 58 | return out 59 | 60 | 61 | def forward(self, state_batch, act_batch): 62 | """ 63 | Batched forward pass. 64 | obs_id_batch: iterable of unpadded sequence ids 65 | act_batch: iterable of lists of unpadded admissible command ids 66 | 67 | Returns a tuple of tensors containing q-values for each item in the batch 68 | """ 69 | # Zip the state_batch into an easy access format 70 | state = State(*zip(*state_batch)) 71 | # This is number of admissible commands in each element of the batch 72 | act_sizes = [len(a) for a in act_batch] 73 | # Combine next actions into one long list 74 | act_batch = list(itertools.chain.from_iterable(act_batch)) 75 | act_out = self.packed_rnn(act_batch, self.act_encoder) 76 | # Encode the various aspects of the state 77 | state_out = self.packed_rnn(state.obs, self.obs_encoder) 78 | # Expand the state to match the batches of actions 79 | state_out = torch.cat([state_out[i].repeat(j,1) for i,j in enumerate(act_sizes)], dim=0) 80 | z = torch.cat((state_out, act_out), dim=1) # Concat along hidden_dim 81 | z = F.relu(self.hidden(z)) 82 | act_values = self.act_scorer(z).squeeze(-1) 83 | # Split up the q-values by batch 84 | return act_values.split(act_sizes) -------------------------------------------------------------------------------- /experiments/agents/drrn/spm_models/unigram_8k.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/spm_models/unigram_8k.model -------------------------------------------------------------------------------- /experiments/agents/drrn/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/utils/__init__.py -------------------------------------------------------------------------------- /experiments/agents/drrn/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/agents/drrn/utils/__pycache__/memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/utils/__pycache__/memory.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/agents/drrn/utils/__pycache__/pad_sequences.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/utils/__pycache__/pad_sequences.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/agents/drrn/utils/memory.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import numpy as np 3 | import json 4 | import sys 5 | 6 | State = namedtuple('State', ('obs')) #, 'description', 'inventory')) 7 | Transition = namedtuple('Transition', ('state', 'act', 'reward', 'next_state', 'next_acts', 'done')) 8 | 9 | 10 | def sample(rng: np.random.RandomState, data: list, k: int): 11 | """ Chooses k unique random elements from a list. """ 12 | return [data[i] for i in rng.choice(len(data), k, replace=False)] 13 | 14 | 15 | class ReplayMemory(object): 16 | def __init__(self, capacity, seed=20210824): 17 | self.capacity = capacity 18 | self.memory = [] 19 | self.position = 0 20 | self.rng = np.random.RandomState(seed) 21 | 22 | def push(self, *args): 23 | if len(self.memory) < self.capacity: 24 | self.memory.append(None) 25 | self.memory[self.position] = Transition(*args) 26 | self.position = (self.position + 1) % self.capacity 27 | 28 | def sample(self, batch_size): 29 | return sample(self.rng, self.memory, batch_size) 30 | 31 | def __len__(self): 32 | return len(self.memory) 33 | 34 | 35 | 36 | class PrioritizedReplayMemory(object): 37 | def __init__(self, capacity=100000, priority_fraction=0.0, seed=20210824): 38 | # Stored 39 | self.capacity = capacity 40 | self.priority_fraction = priority_fraction 41 | self.seed = seed 42 | 43 | # Calculated at init 44 | self.alpha_capacity = int(capacity * priority_fraction) 45 | self.beta_capacity = capacity - self.alpha_capacity 46 | 47 | # Declared 48 | self.alpha_memory, self.beta_memory = [], [] 49 | self.alpha_position, self.beta_position = 0, 0 50 | 51 | # Initialized 52 | self.rng = np.random.RandomState(seed) 53 | 54 | def push(self, is_prior=False, *args): 55 | """Saves a transition.""" 56 | if self.priority_fraction == 0.0: 57 | is_prior = False 58 | if is_prior: 59 | if len(self.alpha_memory) < self.alpha_capacity: 60 | self.alpha_memory.append(None) 61 | self.alpha_memory[self.alpha_position] = Transition(*args) 62 | self.alpha_position = (self.alpha_position + 1) % self.alpha_capacity 63 | else: 64 | if len(self.beta_memory) < self.beta_capacity: 65 | self.beta_memory.append(None) 66 | self.beta_memory[self.beta_position] = Transition(*args) 67 | self.beta_position = (self.beta_position + 1) % self.beta_capacity 68 | 69 | def sample(self, batch_size): 70 | if self.priority_fraction == 0.0: 71 | from_beta = min(batch_size, len(self.beta_memory)) 72 | res = sample(self.rng, self.beta_memory, from_beta) 73 | else: 74 | from_alpha = min(int(self.priority_fraction * batch_size), len(self.alpha_memory)) 75 | from_beta = min(batch_size - int(self.priority_fraction * batch_size), len(self.beta_memory)) 76 | res = sample(self.rng, self.alpha_memory, from_alpha) + sample(self.rng, self.beta_memory, from_beta) 77 | 78 | self.rng.shuffle(res) 79 | return res 80 | 81 | def __len__(self): 82 | return len(self.alpha_memory) + len(self.beta_memory) 83 | 84 | def serializeToJSON(self, filenameOut): 85 | print("Serializing to JSON... ") 86 | sys.stdout.flush() 87 | 88 | packed = { 89 | "capacity": self.capacity, 90 | "priority_fraction": self.priority_fraction, 91 | "alpha_memory": self.alpha_memory, 92 | "alpha_position": self.alpha_position, 93 | "beta_memory": self.beta_memory, 94 | "beta_position": self.beta_position, 95 | } 96 | 97 | print(packed) 98 | sys.stdout.flush() 99 | 100 | with open(filenameOut, 'w') as outfile: 101 | outfile.write(json.dumps(packed, cls=NpEncoder, indent=2)) 102 | 103 | print("Completed...") 104 | sys.stdout.flush() 105 | 106 | 107 | class NpEncoder(json.JSONEncoder): 108 | def default(self, obj): 109 | if isinstance(obj, np.integer): 110 | return int(obj) 111 | if isinstance(obj, np.floating): 112 | return float(obj) 113 | if isinstance(obj, np.ndarray): 114 | return obj.tolist() 115 | if isinstance(obj, np.bool_): 116 | return bool(obj) 117 | return super(NpEncoder, self).default(obj) -------------------------------------------------------------------------------- /experiments/agents/drrn/utils/pad_sequences.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def pad_sequences(sequences, maxlen=None, dtype='int32', value=0.): 5 | ''' 6 | Partially borrowed from Keras 7 | # Arguments 8 | sequences: list of lists where each element is a sequence 9 | maxlen: int, maximum length 10 | dtype: type to cast the resulting sequence. 11 | value: float, value to pad the sequences to the desired value. 12 | # Returns 13 | x: numpy array with dimensions (number_of_sequences, maxlen) 14 | ''' 15 | lengths = [len(s) for s in sequences] 16 | nb_samples = len(sequences) 17 | if maxlen is None: 18 | maxlen = np.max(lengths) 19 | # take the sample shape from the first non empty sequence 20 | # checking for consistency in the main loop below. 21 | sample_shape = tuple() 22 | for s in sequences: 23 | if len(s) > 0: 24 | sample_shape = np.asarray(s).shape[1:] 25 | break 26 | x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype) 27 | for idx, s in enumerate(sequences): 28 | if len(s) == 0: 29 | continue # empty list was found 30 | # pre truncating 31 | trunc = s[-maxlen:] 32 | # check `trunc` has expected shape 33 | trunc = np.asarray(trunc, dtype=dtype) 34 | if trunc.shape[1:] != sample_shape: 35 | raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' % 36 | (trunc.shape[1:], idx, sample_shape)) 37 | # post padding 38 | x[idx, :len(trunc)] = trunc 39 | return x 40 | -------------------------------------------------------------------------------- /experiments/agents/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/ppo/__init__.py -------------------------------------------------------------------------------- /experiments/agents/ppo/base_ppo_agent.py: -------------------------------------------------------------------------------- 1 | from agents.base_agent import BaseAgent 2 | 3 | from babyai.rl.utils.supervised_losses import ExtraInfoCollector 4 | 5 | import torch 6 | 7 | class BasePPOAgent(BaseAgent): 8 | def __init__(self, envs, num_frames_per_proc, discount, lr, gae_lambda, entropy_coef, value_loss_coef, 9 | max_grad_norm, reshape_reward, aux_info, device): 10 | """ 11 | Initializes a `BaseAlgo` instance. 12 | 13 | Parameters: 14 | ---------- 15 | envs : list 16 | a list of environments that will be run in parallel 17 | num_frames_per_proc : int 18 | the number of frames collected by every process for an update 19 | discount : float 20 | the discount for future rewards 21 | lr : float 22 | the learning rate for optimizers 23 | gae_lambda : float 24 | the lambda coefficient in the GAE formula 25 | ([Schulman et al., 2015](https://arxiv.org/abs/1506.02438)) 26 | entropy_coef : float 27 | the weight of the entropy cost in the final objective 28 | value_loss_coef : float 29 | the weight of the value loss in the final objective 30 | max_grad_norm : float 31 | gradient will be clipped to be at most this value 32 | reshape_reward : function 33 | a function that shapes the reward, takes an 34 | (observation, action, reward, done) tuple as an input 35 | aux_info : list 36 | a list of strings corresponding to the name of the extra information 37 | retrieved from the environment for supervised auxiliary losses 38 | 39 | """ 40 | super().__init__(envs) 41 | self.num_frames_per_proc = num_frames_per_proc 42 | self.discount = discount 43 | self.lr = lr 44 | self.gae_lambda = gae_lambda 45 | self.entropy_coef = entropy_coef 46 | self.value_loss_coef = value_loss_coef 47 | self.max_grad_norm = max_grad_norm 48 | self.reshape_reward = reshape_reward 49 | self.aux_info = aux_info 50 | 51 | # Store helpers values 52 | self.device = device 53 | self.num_procs = len(envs) 54 | self.num_frames = self.num_frames_per_proc * self.num_procs 55 | 56 | # Initialize experience values 57 | shape = (self.num_frames_per_proc, self.num_procs) 58 | self.obss = [None] * (shape[0]) 59 | 60 | self.mask = torch.ones(shape[1], device=self.device) 61 | self.masks = torch.zeros(*shape, device=self.device) 62 | 63 | self.values = torch.zeros(*shape, device=self.device) 64 | self.rewards = torch.zeros(*shape, device=self.device) 65 | self.rewards_bonus = torch.zeros(*shape, device=self.device) 66 | self.advantages = torch.zeros(*shape, device=self.device) 67 | self.log_probs = torch.zeros(*shape, device=self.device) 68 | 69 | if self.aux_info: 70 | self.aux_info_collector = ExtraInfoCollector(self.aux_info, shape, self.device) 71 | 72 | # Initialize log values 73 | self.log_episode_return = torch.zeros(self.num_procs, device=self.device) 74 | self.log_episode_reshaped_return = torch.zeros(self.num_procs, device=self.device) 75 | self.log_episode_reshaped_return_bonus = torch.zeros(self.num_procs, device=self.device) 76 | self.log_episode_num_frames = torch.zeros(self.num_procs, device=self.device) 77 | 78 | self.log_done_counter = 0 79 | self.log_return = [0] * self.num_procs 80 | self.log_reshaped_return = [0] * self.num_procs 81 | self.log_reshaped_return_bonus = [0] * self.num_procs 82 | self.log_num_frames = [0] * self.num_procs 83 | -------------------------------------------------------------------------------- /experiments/agents/random_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/random_agent/__init__.py -------------------------------------------------------------------------------- /experiments/agents/random_agent/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/random_agent/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/agents/random_agent/__pycache__/random_agent.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/random_agent/__pycache__/random_agent.cpython-310.pyc -------------------------------------------------------------------------------- /experiments/agents/random_agent/random_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | 4 | from agents.base_agent import BaseAgent 5 | 6 | class Random_agent(BaseAgent): 7 | def __init__(self, envs, subgoals): 8 | super().__init__(envs) 9 | self.env.reset() 10 | self.subgoals = subgoals 11 | self.returns = [0 for _ in range(self.env.num_envs)] 12 | self.logs = { 13 | "return_per_episode": [], 14 | } 15 | 16 | def generate_trajectories(self, dict_modifier, n_tests, language='english'): 17 | episodes_done = 0 18 | pbar = tqdm(range(n_tests), ascii=" " * 9 + ">", ncols=100) 19 | while episodes_done < n_tests: 20 | actions = np.random.randint(low=0, high=len(self.subgoals[0]), size=(self.env.num_envs,)) 21 | 22 | if len(self.subgoals[0]) > 6: 23 | # only useful when we test the impact of the number of actions 24 | real_a = np.copy(actions) 25 | real_a[real_a > 6] = 6 26 | obs, rewards, dones, infos = self.env.step(real_a) 27 | else: 28 | obs, rewards, dones, infos = self.env.step(actions) 29 | 30 | for j in range(self.env.num_envs): 31 | self.returns[j] += rewards[j] 32 | if dones[j]: 33 | episodes_done += 1 34 | pbar.update(1) 35 | self.logs["return_per_episode"].append(self.returns[j]) 36 | self.returns[j] = 0 37 | pbar.close() 38 | 39 | self.logs["episodes_done"] = episodes_done 40 | return None, self.logs 41 | 42 | def update_parameters(self): 43 | pass 44 | -------------------------------------------------------------------------------- /experiments/campaign/Mixed_BC_finetuning/bc_finetuning_Flan-T5_large.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=bc_finetuning_Flan-T5_large_seed_%a # job name 3 | #SBATCH --time=04:00:00 # maximum execution time (HH:MM:SS) 4 | #SBATCH --output=slurm_logs/bc_finetuning_Flan-T5_large_seed_%a-%j.out # output 5 | #SBATCH --error=slurm_logs/bc_finetuning_Flan-T5_large_seed_%a-%j.err # err 6 | #SBATCH --account= # SLURM ACCOUNT 7 | #SBATCH --qos=qos_gpu-t3 8 | #SBATCH -C a100 9 | #SBATCH --gres=gpu:8 10 | #SBATCH --cpus-per-task=64 11 | #SBATCH --hint=nomultithread 12 | #SBATCH --ntasks-per-node=1 13 | #SBATCH --nodes=1 14 | 15 | #SBATCH --array=1-2 16 | 17 | module purge 18 | module load python/3.8.2 19 | conda activate dlp 20 | 21 | chmod +x experiments/slurm/accelerate_launcher.sh 22 | 23 | srun experiments/slurm/accelerate_launcher.sh \ 24 | --config_file $WORK/Grounding_LLMs/experiments/configs/accelerate/default_config.yaml \ 25 | --multi_gpu \ 26 | --num_processes 8 \ 27 | --num_machines 1 \ 28 | experiments/clm_behavioral-cloning.py \ 29 | --output_dir=$WORK/Grounding_LLMs/storage/logs/bc_finetuning_Flan-T5_large_seed_${SLURM_ARRAY_TASK_ID} \ 30 | --model_dir=$SCRATCH/Grounding_LLMs/llms/flan-t5-large \ 31 | --data_dir=$WORK/Grounding_LLMs/storage/logs/GFlan-T5_large_GoToLocal_seed_${SLURM_ARRAY_TASK_ID}/test/BabyAI-GoToLocal-v0/return_per_episode \ 32 | --per_device_batch_size=8 \ 33 | --gradient_accumulation_steps=1 \ 34 | --seed=${SLURM_ARRAY_TASK_ID} 35 | 36 | -------------------------------------------------------------------------------- /experiments/campaign/Mixed_BC_finetuning/bc_finetuning_from-bot_Flan-T5_large.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=bc_finetuning_from-bot_Flan-T5_large_seed_%a # job name 3 | #SBATCH --time=04:00:00 # maximum execution time (HH:MM:SS) 4 | #SBATCH --output=slurm_logs/bc_finetuning_from-bot_Flan-T5_large_seed_%a-%j.out # output 5 | #SBATCH --error=slurm_logs/bc_finetuning_from-bot_Flan-T5_large_seed_%a-%j.err # err 6 | #SBATCH --account= # SLURM ACCOUNT 7 | #SBATCH --qos=qos_gpu-t3 8 | #SBATCH -C a100 9 | #SBATCH --gres=gpu:8 10 | #SBATCH --cpus-per-task=64 11 | #SBATCH --hint=nomultithread 12 | #SBATCH --ntasks-per-node=1 13 | #SBATCH --nodes=1 14 | 15 | #SBATCH --array=1-2 16 | 17 | module purge 18 | module load python/3.8.2 19 | conda activate dlp 20 | 21 | chmod +x experiments/slurm/accelerate_launcher.sh 22 | 23 | srun experiments/slurm/accelerate_launcher.sh \ 24 | --config_file $WORK/Grounding_LLMs/experiments/configs/accelerate/default_config.yaml \ 25 | --multi_gpu \ 26 | --num_processes 8 \ 27 | --num_machines 1 \ 28 | experiments/clm_behavioral-cloning.py \ 29 | --output_dir=$WORK/Grounding_LLMs/storage/logs/bc_finetuning_Flan-T5_large_seed_${SLURM_ARRAY_TASK_ID} \ 30 | --model_dir=$SCRATCH/Grounding_LLMs/llms/flan-t5-large \ 31 | --data_dir=$WORK/Grounding_LLMs/storage/logs/bot_GoToLocal_seed_${SLURM_ARRAY_TASK_ID}/test/BabyAI-GoToLocal-v0/return_per_episode \ 32 | --per_device_batch_size=8 \ 33 | --gradient_accumulation_steps=1 \ 34 | --seed=${SLURM_ARRAY_TASK_ID} \ 35 | --file_name=bot_trajectories \ 36 | --file_id=1 37 | 38 | -------------------------------------------------------------------------------- /experiments/campaign/Mixed_tests_no-change/GFlan-T5_large.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=test_GFlan-T5_large_seed_%a # job name 3 | #SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) 4 | #SBATCH --output=slurm_logs/test_GFlan-T5_large_seed_%a-%j.out # output file name 5 | #SBATCH --error=slurm_logs/test_GFlan-T5_large_seed_%a-%j.err # err file name 6 | #SBATCH --account= # SLURM ACCOUNT 7 | #SBATCH --qos=qos_gpu-t3 8 | #SBATCH -C a100 9 | #SBATCH --gres=gpu:8 10 | #SBATCH --cpus-per-task=32 11 | #SBATCH --hint=nomultithread 12 | #SBATCH --ntasks-per-node=1 13 | #SBATCH --nodes=1 14 | 15 | #SBATCH --array=1-2 16 | 17 | module purge 18 | module load python/3.8.2 19 | conda activate dlp 20 | 21 | chmod +x experiments/slurm/launcher.sh 22 | 23 | srun experiments/slurm/launcher.sh \ 24 | rl_script_args.path=$WORK/Grounding_LLMs/experiments/post-training_tests.py \ 25 | rl_script_args.seed=${SLURM_ARRAY_TASK_ID} \ 26 | rl_script_args.number_envs=32 \ 27 | rl_script_args.number_episodes=1000 \ 28 | rl_script_args.action_space=["turn_left","turn_right","go_forward","pick_up","drop","toggle"] \ 29 | rl_script_args.saving_path_logs=$WORK/Grounding_LLMs/storage/logs \ 30 | rl_script_args.name_experiment='llm_mtrl' \ 31 | rl_script_args.name_model='Flan_T5large' \ 32 | rl_script_args.name_environment='BabyAI-MixedTrainLocal-v0' \ 33 | rl_script_args.zero_shot=False \ 34 | rl_script_args.saving_path_model=$SCRATCH/Grounding_LLMs/models \ 35 | lamorel_args.llm_args.model_type=seq2seq \ 36 | lamorel_args.llm_args.model_path=$SCRATCH/Grounding_LLMs/llms/flan-t5-large \ 37 | lamorel_args.llm_args.parallelism.model_parallelism_size=2 \ 38 | lamorel_args.llm_args.minibatch_size=3 \ 39 | lamorel_args.accelerate_args.num_machines=1 \ 40 | --config-path=$WORK/Grounding_LLMs/experiments/configs \ 41 | --config-name=multi-node_slurm_cluster_config 42 | -------------------------------------------------------------------------------- /experiments/campaign/Mixed_training/DRRN.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --time=10:00:00 3 | #SBATCH --account= # SLURM ACCOUNT 4 | #SBATCH --job-name=DRRN_seed_%a 5 | #SBATCH -o slurm_logs/DRRN_seed_%a.out 6 | #SBATCH -e slurm_logs/DRRN_seed_%a.err 7 | #SBATCH --ntasks-per-node=1 8 | #SBATCH --nodes=1 9 | #SBATCH --cpus-per-task=20 10 | #SBATCH --gres=gpu:1 11 | #SBATCH --hint=nomultithread 12 | #SBATCH --array=1-2 13 | #SBATCH --qos=qos_gpu-t3 14 | #SBATCH -C v100-32g 15 | 16 | module purge 17 | module load python/3.8.2 18 | conda activate dlp 19 | 20 | srun python experiments/train_language_agent.py \ 21 | rl_script_args.seed=${SLURM_ARRAY_TASK_ID} \ 22 | rl_script_args.number_envs=32 \ 23 | rl_script_args.num_steps=1500000 \ 24 | rl_script_args.action_space=["turn_left","turn_right","go_forward","pick_up","drop","toggle"] \ 25 | rl_script_args.saving_path_logs=$WORK/Grounding_LLMs/storage/logs \ 26 | rl_script_args.name_experiment='drrn_mtrl' \ 27 | rl_script_args.name_model='DRRN' \ 28 | rl_script_args.name_environment='BabyAI-MixedTrainLocal-v0' \ 29 | rl_script_args.saving_path_model=$SCRATCH/Grounding_LLMs/models \ 30 | rl_script_args.spm_path=$SCRATCH/Grounding_LLMs/experiments/agents/drrn/spm_models/unigram_8k.model \ 31 | lamorel_args.distributed_setup_args.n_llm_processes=0 \ 32 | --config-path=$WORK/Grounding_LLMs/experiments/configs \ 33 | --config-name=multi-node_slurm_cluster_config 34 | -------------------------------------------------------------------------------- /experiments/campaign/Mixed_training/GFlan-T5_large.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=GFlan-T5_large_seed_%a # job name 3 | #SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) 4 | #SBATCH --output=slurm_logs/GFlan-T5_large_seed_%a-%j.out # output file name 5 | #SBATCH --error=slurm_logs/GFlan-T5_large_seed_%a-%j.err # err file name 6 | #SBATCH --account= # SLURM ACCOUNT 7 | #SBATCH --qos=qos_gpu-t3 8 | #SBATCH -C a100 9 | #SBATCH --gres=gpu:8 10 | #SBATCH --cpus-per-task=32 11 | #SBATCH --hint=nomultithread 12 | #SBATCH --ntasks-per-node=1 13 | #SBATCH --nodes=1 14 | 15 | #SBATCH --array=1-2 16 | 17 | module purge 18 | module load python/3.8.2 19 | conda activate dlp 20 | 21 | chmod +x experiments/slurm/launcher.sh 22 | 23 | srun experiments/slurm/launcher.sh \ 24 | rl_script_args.path=$WORK/Grounding_LLMs/experiments/train_language_agent.py \ 25 | rl_script_args.seed=${SLURM_ARRAY_TASK_ID} \ 26 | rl_script_args.number_envs=32 \ 27 | rl_script_args.num_steps=1500000 \ 28 | rl_script_args.action_space=["turn_left","turn_right","go_forward","pick_up","drop","toggle"] \ 29 | rl_script_args.saving_path_logs=$WORK/Grounding_LLMs/storage/logs \ 30 | rl_script_args.name_experiment='llm_mtrl' \ 31 | rl_script_args.name_model='Flan_T5large' \ 32 | rl_script_args.name_environment='BabyAI-MixedTrainLocal-v0' \ 33 | rl_script_args.template_test=1 \ 34 | rl_script_args.saving_path_model=$SCRATCH/Grounding_LLMs/models \ 35 | lamorel_args.llm_args.model_type=seq2seq \ 36 | lamorel_args.llm_args.model_path=$SCRATCH/Grounding_LLMs/llms/flan-t5-large \ 37 | lamorel_args.llm_args.parallelism.model_parallelism_size=2 \ 38 | lamorel_args.llm_args.minibatch_size=3 \ 39 | lamorel_args.accelerate_args.num_machines=1 \ 40 | --config-path=$WORK/Grounding_LLMs/experiments/configs \ 41 | --config-name=multi-node_slurm_cluster_config 42 | -------------------------------------------------------------------------------- /experiments/campaign/Mixed_training/NPAE-Flan-T5_large.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=NPAE-Flan-T5_large_seed_%a # job name 3 | #SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS) 4 | #SBATCH --output=slurm_logs/NPAE-Flan-T5_large_seed_%a-%j.out # output file name 5 | #SBATCH --error=slurm_logs/NPAE-Flan-T5_large_seed_%a-%j.err # err file name 6 | #SBATCH --account= # SLURM ACCOUNT 7 | #SBATCH --qos=qos_gpu-t3 8 | #SBATCH -C a100 9 | #SBATCH --gres=gpu:8 10 | #SBATCH --cpus-per-task=32 11 | #SBATCH --hint=nomultithread 12 | #SBATCH --ntasks-per-node=1 13 | #SBATCH --nodes=1 14 | 15 | #SBATCH --array=1-2 16 | 17 | module purge 18 | module load python/3.8.2 19 | conda activate dlp 20 | 21 | chmod +x experiments/slurm/launcher.sh 22 | 23 | srun experiments/slurm/launcher.sh \ 24 | rl_script_args.path=$WORK/Grounding_LLMs/experiments/train_language_agent.py \ 25 | rl_script_args.seed=${SLURM_ARRAY_TASK_ID} \ 26 | rl_script_args.number_envs=32 \ 27 | rl_script_args.num_steps=1500000 \ 28 | rl_script_args.action_space=["turn_left","turn_right","go_forward","pick_up","drop","toggle"] \ 29 | rl_script_args.saving_path_logs=$WORK/Grounding_LLMs/storage/logs \ 30 | rl_script_args.name_experiment='llm_mtrl' \ 31 | rl_script_args.name_model='Flan_T5large' \ 32 | rl_script_args.name_environment='BabyAI-MixedTrainLocal-v0' \ 33 | rl_script_args.template_test=1 \ 34 | rl_script_args.saving_path_model=$SCRATCH/Grounding_LLMs/models \ 35 | rl_script_args.load_embedding=true \ 36 | rl_script_args.use_action_heads=true \ 37 | lamorel_args.llm_args.model_type=seq2seq \ 38 | lamorel_args.llm_args.model_path=$SCRATCH/Grounding_LLMs/llms/flan-t5-large \ 39 | lamorel_args.llm_args.pretrained=false \ 40 | lamorel_args.llm_args.parallelism.model_parallelism_size=2 \ 41 | lamorel_args.llm_args.minibatch_size=3 \ 42 | lamorel_args.accelerate_args.num_machines=1 \ 43 | --config-path=$WORK/Grounding_LLMs/experiments/configs \ 44 | --config-name=multi-node_slurm_cluster_config 45 | -------------------------------------------------------------------------------- /experiments/campaign/Mixed_training/Symbolic-PPO.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --time=10:00:00 3 | #SBATCH --account= # SLURM ACCOUNT 4 | #SBATCH --job-name=Symbolic-PPO_seed_%a 5 | #SBATCH --ntasks-per-node=1 6 | #SBATCH --nodes=1 7 | #SBATCH --cpus-per-task=20 8 | #SBATCH --gres=gpu:1 9 | #SBATCH --hint=nomultithread 10 | #SBATCH -o slurm_logs/Symbolic-PPO_seed_%a.out 11 | #SBATCH -e slurm_logs/Symbolic-PPO_seed_%a.err 12 | #SBATCH --array=1-2 13 | #SBATCH --qos=qos_gpu-t3 14 | #SBATCH -C v100-32g 15 | 16 | module purge 17 | module load python/3.8.2 18 | conda activate dlp 19 | 20 | srun experiments/slurm/train_symbolic_ppo.sh BabyAI-MixedTrainLocal-v0 MTRL 6 ${SLURM_ARRAY_TASK_ID} 21 | -------------------------------------------------------------------------------- /experiments/configs/accelerate/default_config.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | deepspeed_config: { } 3 | distributed_type: MULTI_GPU 4 | fsdp_config: { } 5 | machine_rank: 0 6 | main_process_ip: 127.0.0.1 7 | main_process_port: 12345 8 | main_training_function: main 9 | mixed_precision: 'no' 10 | num_machines: 1 11 | num_processes: 2 12 | use_cpu: false -------------------------------------------------------------------------------- /experiments/configs/local_gpu_config.yaml: -------------------------------------------------------------------------------- 1 | lamorel_args: 2 | log_level: info 3 | allow_subgraph_use_whith_gradient: true 4 | distributed_setup_args: 5 | n_rl_processes: 1 6 | n_llm_processes: 1 7 | accelerate_args: 8 | config_file: accelerate/default_config.yaml 9 | machine_rank: 0 10 | num_machines: 2 11 | llm_args: 12 | model_type: seq2seq 13 | model_path: t5-small 14 | pretrained: true 15 | minibatch_size: 4 16 | pre_encode_inputs: true 17 | parallelism: 18 | use_gpu: false 19 | model_parallelism_size: 1 20 | synchronize_gpus_after_scoring: false 21 | empty_cuda_cache_after_scoring: false 22 | rl_script_args: 23 | path: ??? 24 | seed: 1 25 | number_envs: 2 26 | num_steps: 1000 27 | max_episode_steps: 3 28 | frames_per_proc: 40 29 | reward_shaping_beta: 0 30 | discount: 0.99 31 | lr: 1e-6 32 | beta1: 0.9 33 | beta2: 0.999 34 | gae_lambda: 0.99 35 | entropy_coef: 0.01 36 | value_loss_coef: 0.5 37 | max_grad_norm: 0.5 38 | adam_eps: 1e-5 39 | clip_eps: 0.2 40 | epochs: 4 41 | batch_size: 16 42 | action_space: ["turn_left","turn_right","go_forward","pick_up","drop","toggle"] 43 | saving_path_logs: ??? 44 | name_experiment: 'llm_mtrl' 45 | name_model: 'T5small' 46 | saving_path_model: ??? 47 | name_environment: 'BabyAI-MixedTestLocal-v0' 48 | number_episodes: 10 49 | language: 'english' 50 | load_embedding: true 51 | use_action_heads: false 52 | template_test: 1 53 | zero_shot: true 54 | modified_action_space: false 55 | new_action_space: #["rotate_left","rotate_right","move_ahead","take","release","switch"] 56 | spm_path: "YOUR_PATH_TO_PROJECT/experiments/agents/drrn/spm_models/unigram_8k.model" 57 | random_agent: true 58 | get_example_trajectories: false 59 | nbr_obs: 3 60 | im_learning: false 61 | im_path: "" 62 | bot: false 63 | -------------------------------------------------------------------------------- /experiments/configs/multi-node_slurm_cluster_config.yaml: -------------------------------------------------------------------------------- 1 | lamorel_args: 2 | log_level: info 3 | allow_subgraph_use_whith_gradient: true 4 | distributed_setup_args: 5 | n_rl_processes: 1 6 | n_llm_processes: 4 7 | accelerate_args: 8 | config_file: accelerate/default_config.yaml 9 | machine_rank: 0 10 | num_machines: ??? 11 | num_processes: ??? 12 | main_process_ip: ??? 13 | main_process_port: 12345 14 | llm_args: 15 | model_type: ??? 16 | model_path: ??? 17 | pretrained: true 18 | minibatch_size: ??? 19 | pre_encode_inputs: true 20 | parallelism: 21 | use_gpu: true 22 | model_parallelism_size: ??? 23 | synchronize_gpus_after_scoring: false 24 | empty_cuda_cache_after_scoring: false 25 | updater_args: 26 | rl_script_args: 27 | path: ??? 28 | seed: ??? 29 | number_envs: ??? 30 | num_steps: ??? 31 | max_episode_steps: 3 32 | frames_per_proc: 40 33 | reward_shaping_beta: 0 34 | discount: 0.99 35 | lr: 1e-6 36 | beta1: 0.9 37 | beta2: 0.999 38 | gae_lambda: 0.99 39 | entropy_coef: 0.01 40 | value_loss_coef: 0.5 41 | max_grad_norm: 0.5 42 | adam_eps: 1e-5 43 | clip_eps: 0.2 44 | epochs: 4 45 | batch_size: 64 46 | action_space: ??? 47 | saving_path_logs: ??? 48 | name_experiment: ??? 49 | name_model: ??? 50 | saving_path_model: ??? 51 | name_environment: ??? 52 | nbr_obs: 3 53 | language: 'english' 54 | load_embedding: false 55 | use_action_heads: false 56 | template_test: 1 57 | spm_path: '' 58 | -------------------------------------------------------------------------------- /experiments/configs/multi-node_slurm_cluster_config_test.yaml: -------------------------------------------------------------------------------- 1 | lamorel_args: 2 | log_level: info 3 | allow_subgraph_use_whith_gradient: false 4 | distributed_setup_args: 5 | n_rl_processes: 1 6 | n_llm_processes: 4 7 | accelerate_args: 8 | config_file: accelerate/default_config.yaml 9 | machine_rank: 0 10 | num_machines: ??? 11 | num_processes: ??? 12 | main_process_ip: ??? 13 | llm_args: 14 | model_type: ??? 15 | model_path: ??? 16 | pretrained: true 17 | minibatch_size: ??? 18 | pre_encode_inputs: true 19 | parallelism: 20 | use_gpu: true 21 | model_parallelism_size: ??? 22 | synchronize_gpus_after_scoring: false 23 | empty_cuda_cache_after_scoring: false 24 | updater_args: 25 | rl_script_args: 26 | path: ??? 27 | seed: ??? 28 | number_envs: ??? 29 | num_steps: 100 30 | max_episode_steps: 3 31 | frames_per_proc: 40 32 | reward_shaping_beta: 0 33 | discount: 0.99 34 | lr: 1e-6 35 | beta1: 0.9 36 | beta2: 0.999 37 | gae_lambda: 0.99 38 | entropy_coef: 0.01 39 | value_loss_coef: 0.5 40 | max_grad_norm: 0.5 41 | adam_eps: 1e-5 42 | clip_eps: 0.2 43 | epochs: 4 44 | batch_size: 64 45 | action_space: ??? 46 | saving_path_logs: ??? 47 | name_experiment: ??? 48 | name_model: ??? 49 | saving_path_model: ??? 50 | name_environment: ??? 51 | nbr_obs: 3 52 | number_episodes: ??? 53 | zero_shot: ??? 54 | language: 'english' 55 | modified_action_space: false 56 | new_action_space: [] 57 | spm_path: ??? 58 | random_agent: false 59 | im_learning: false 60 | im_path: ??? 61 | get_example_trajectories: false 62 | bot: false 63 | -------------------------------------------------------------------------------- /experiments/plot_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/plot_utils/__init__.py -------------------------------------------------------------------------------- /experiments/slurm/accelerate_launcher.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | sed -n "1p") 4 | echo "running on node $(hostname)" 5 | accelerate launch --machine_rank $SLURM_PROCID --main_process_ip $MASTER_ADDR --main_process_port 13370 $* -------------------------------------------------------------------------------- /experiments/slurm/lamorel_launcher.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | sed -n "1p") 4 | echo "running process ${SLURM_PROCID} on node $(hostname) with master ${MASTER_ADDR}" 5 | export "DLP_STORAGE"='storage' 6 | python -m lamorel_launcher.launch lamorel_args.accelerate_args.machine_rank=$SLURM_PROCID lamorel_args.accelerate_args.main_process_ip=$MASTER_ADDR $* -------------------------------------------------------------------------------- /experiments/slurm/train_symbolic_ppo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export BABYAI_STORAGE='storage' 3 | export DLP_STORAGE='storage' 4 | python -m experiments.train_symbolic_ppo \ 5 | --arch expert_filmcnn \ 6 | --env $1 \ 7 | --hrl vanilla \ 8 | --log-interval 1 --save-interval 15 --val-interval 15 --val-episodes 128 \ 9 | --procs 64 --frames-per-proc 40 --recurrence 20 \ 10 | --seed $4 \ 11 | --number-actions $3 \ 12 | --frames 400000 \ 13 | --model $2-nbr_actions-$3-PPO-NoPre-$4 \ 14 | #--wb 15 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece>=0.1.91 2 | tensorboard==2.7.0 3 | tensorboard-data-server==0.6.1 4 | tensorboard-plugin-wit==1.8.0 5 | tensorboardX==1.8 6 | torch>1.8.1 7 | protobuf==3.20.* 8 | pyyaml 9 | transformers 10 | accelerate 11 | scipy 12 | openai 13 | matplotlib 14 | colorama 15 | termcolor 16 | imageio 17 | wandb 18 | ipython 19 | tqdm==4.64.0 20 | datasets 21 | --------------------------------------------------------------------------------