├── .gitignore
├── LICENSE.md
├── README.md
├── babyai-text
    ├── .gitignore
    ├── README.md
    ├── babyai
    │   ├── .gitignore
    │   ├── .travis.yml
    │   ├── CONTRIBUTING.md
    │   ├── LICENSE
    │   ├── README.md
    │   ├── babyai
    │   │   ├── QA.py
    │   │   ├── QA_simple.py
    │   │   ├── __init__.py
    │   │   ├── arguments.py
    │   │   ├── base.py
    │   │   ├── batchsampler.py
    │   │   ├── bot.py
    │   │   ├── evaluate.py
    │   │   ├── l_class.py
    │   │   ├── levels
    │   │   │   ├── __init__.py
    │   │   │   ├── bonus_levels.py
    │   │   │   ├── iclr19_levels.py
    │   │   │   ├── levelgen.py
    │   │   │   ├── test_levels.py
    │   │   │   └── verifier.py
    │   │   ├── model.py
    │   │   ├── paral_env_simple.py
    │   │   ├── plotting.py
    │   │   ├── plotting_paper.py
    │   │   ├── rl
    │   │   │   ├── LICENSE
    │   │   │   ├── __init__.py
    │   │   │   ├── algos
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   └── ppo.py
    │   │   │   ├── format.py
    │   │   │   ├── model.py
    │   │   │   └── utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── dictlist.py
    │   │   │   │   ├── penv.py
    │   │   │   │   └── supervised_losses.py
    │   │   ├── shaped_env.py
    │   │   ├── shaped_env_paral.py
    │   │   ├── test_paral.py
    │   │   ├── trainer_l_class.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── agent.py
    │   │   │   ├── demos.py
    │   │   │   ├── format.py
    │   │   │   ├── log.py
    │   │   │   ├── model.py
    │   │   │   └── viz.py
    │   ├── docs
    │   │   ├── bonus_levels.md
    │   │   ├── codebase.md
    │   │   ├── iclr19_levels.md
    │   │   ├── train-eval.md
    │   │   └── troubleshooting.md
    │   ├── environment.yaml
    │   ├── nn
    │   │   ├── GPTJ_with_value_head.py
    │   │   ├── __init__.py
    │   │   ├── dec_QA.py
    │   │   ├── enc_lang.py
    │   │   ├── enc_lang_QA.py
    │   │   ├── enc_visual.py
    │   │   ├── enc_vl.py
    │   │   ├── encodings.py
    │   │   ├── model_util.py
    │   │   └── transforms.py
    │   ├── run_tests.py
    │   ├── scripts
    │   │   ├── .gitignore
    │   │   ├── GPR.py
    │   │   ├── __init__.py
    │   │   ├── compute_possible_instructions.py
    │   │   ├── enjoy.py
    │   │   ├── eval_bot.py
    │   │   ├── evaluate.py
    │   │   ├── evaluate_all_demos.py
    │   │   ├── evaluate_all_models.py
    │   │   ├── instruction_handler.py
    │   │   ├── learn_baseline.py
    │   │   ├── learn_baseline_model.py
    │   │   ├── make_agent_demos.py
    │   │   ├── make_subtask_recipe_demos.py
    │   │   ├── manual_control.py
    │   │   ├── result_l_class_study.py
    │   │   ├── show_level_instructions.py
    │   │   ├── subtask_prediction.py
    │   │   ├── subtask_prediction_model.py
    │   │   ├── test_PPO.py
    │   │   ├── test_rl.py
    │   │   ├── trace_agent_traj.py
    │   │   ├── train_il.py
    │   │   ├── train_intelligent_expert.py
    │   │   ├── train_l_class.py
    │   │   ├── train_learn_baseline_model.py
    │   │   ├── train_rl.py
    │   │   ├── train_rl_paral.py
    │   │   └── train_subtask_prediction_model.py
    │   └── setup.py
    ├── babyai_text
    │   ├── __init__.py
    │   └── levels
    │   │   ├── __init__.py
    │   │   └── mixed_seq_levels.py
    ├── gym-minigrid
    │   ├── .gitignore
    │   ├── .travis.yml
    │   ├── LICENSE
    │   ├── README.md
    │   ├── benchmark.py
    │   ├── figures
    │   │   ├── BlockedUnlockPickup.png
    │   │   ├── DistShift1.png
    │   │   ├── DistShift2.png
    │   │   ├── KeyCorridorS3R1.png
    │   │   ├── KeyCorridorS3R2.png
    │   │   ├── KeyCorridorS3R3.png
    │   │   ├── KeyCorridorS4R3.png
    │   │   ├── KeyCorridorS5R3.png
    │   │   ├── KeyCorridorS6R3.png
    │   │   ├── LavaCrossingS11N5.png
    │   │   ├── LavaCrossingS9N1.png
    │   │   ├── LavaCrossingS9N2.png
    │   │   ├── LavaCrossingS9N3.png
    │   │   ├── LavaGapS6.png
    │   │   ├── ObstructedMaze-1Dl.png
    │   │   ├── ObstructedMaze-1Dlh.png
    │   │   ├── ObstructedMaze-1Dlhb.png
    │   │   ├── ObstructedMaze-1Q.png
    │   │   ├── ObstructedMaze-2Dl.png
    │   │   ├── ObstructedMaze-2Dlh.png
    │   │   ├── ObstructedMaze-2Dlhb.png
    │   │   ├── ObstructedMaze-2Q.png
    │   │   ├── ObstructedMaze-4Q.png
    │   │   ├── SimpleCrossingS11N5.png
    │   │   ├── SimpleCrossingS9N1.png
    │   │   ├── SimpleCrossingS9N2.png
    │   │   ├── SimpleCrossingS9N3.png
    │   │   ├── Unlock.png
    │   │   ├── UnlockPickup.png
    │   │   ├── door-key-curriculum.gif
    │   │   ├── door-key-env.png
    │   │   ├── dynamic_obstacles.gif
    │   │   ├── empty-env.png
    │   │   ├── fetch-env.png
    │   │   ├── four-rooms-env.png
    │   │   ├── gotodoor-6x6.mp4
    │   │   ├── gotodoor-6x6.png
    │   │   └── multi-room.gif
    │   ├── gym_minigrid
    │   │   ├── __init__.py
    │   │   ├── envs
    │   │   │   ├── __init__.py
    │   │   │   ├── blockedunlockpickup.py
    │   │   │   ├── crossing.py
    │   │   │   ├── distshift.py
    │   │   │   ├── doorkey.py
    │   │   │   ├── dynamicobstacles.py
    │   │   │   ├── empty.py
    │   │   │   ├── fetch.py
    │   │   │   ├── fourrooms.py
    │   │   │   ├── gotodoor.py
    │   │   │   ├── gotoobject.py
    │   │   │   ├── keycorridor.py
    │   │   │   ├── lavagap.py
    │   │   │   ├── lockedroom.py
    │   │   │   ├── memory.py
    │   │   │   ├── multiroom.py
    │   │   │   ├── obstructedmaze.py
    │   │   │   ├── playground_v0.py
    │   │   │   ├── putnear.py
    │   │   │   ├── redbluedoors.py
    │   │   │   ├── unlock.py
    │   │   │   └── unlockpickup.py
    │   │   ├── minigrid.py
    │   │   ├── register.py
    │   │   ├── rendering.py
    │   │   ├── roomgrid.py
    │   │   ├── window.py
    │   │   └── wrappers.py
    │   ├── manual_control.py
    │   ├── run_tests.py
    │   └── setup.py
    ├── images
    │   └── babyai-text_schema.png
    └── setup.py
├── docs
    └── images
    │   ├── generalization_tests.png
    │   └── main_schema.png
├── experiments
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-310.pyc
    │   ├── main.cpython-310.pyc
    │   └── test_llm.cpython-310.pyc
    ├── agents
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-310.pyc
    │   ├── base_agent.py
    │   ├── bot
    │   │   ├── __pycache__
    │   │   │   └── bot.cpython-310.pyc
    │   │   └── bot.py
    │   ├── drrn
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── drrn.cpython-310.pyc
    │   │   │   └── model.cpython-310.pyc
    │   │   ├── drrn.py
    │   │   ├── model.py
    │   │   ├── spm_models
    │   │   │   ├── unigram_8k.model
    │   │   │   └── unigram_8k.vocab
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │       ├── __init__.cpython-310.pyc
    │   │   │       ├── memory.cpython-310.pyc
    │   │   │       └── pad_sequences.cpython-310.pyc
    │   │   │   ├── memory.py
    │   │   │   └── pad_sequences.py
    │   ├── ppo
    │   │   ├── __init__.py
    │   │   ├── base_ppo_agent.py
    │   │   ├── llm_ppo_agent.py
    │   │   └── symbolic_ppo_agent.py
    │   └── random_agent
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-310.pyc
    │   │       └── random_agent.cpython-310.pyc
    │   │   └── random_agent.py
    ├── campaign
    │   ├── Mixed_BC_finetuning
    │   │   ├── bc_finetuning_Flan-T5_large.slurm
    │   │   └── bc_finetuning_from-bot_Flan-T5_large.slurm
    │   ├── Mixed_tests_no-change
    │   │   └── GFlan-T5_large.slurm
    │   └── Mixed_training
    │   │   ├── DRRN.slurm
    │   │   ├── GFlan-T5_large.slurm
    │   │   ├── NPAE-Flan-T5_large.slurm
    │   │   └── Symbolic-PPO.slurm
    ├── clm_behavioral-cloning.py
    ├── configs
    │   ├── accelerate
    │   │   └── default_config.yaml
    │   ├── local_gpu_config.yaml
    │   ├── multi-node_slurm_cluster_config.yaml
    │   └── multi-node_slurm_cluster_config_test.yaml
    ├── plot_utils
    │   ├── __init__.py
    │   ├── plotting_paper.py
    │   └── plotting_results.py
    ├── post-training_tests.py
    ├── slurm
    │   ├── accelerate_launcher.sh
    │   ├── lamorel_launcher.sh
    │   └── train_symbolic_ppo.sh
    ├── test_results.py
    ├── train_language_agent.py
    └── train_symbolic_ppo.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | useless/
 2 | storage/
 3 | slurm_logs/
 4 | plots/
 5 | outputs/
 6 | notebooks/*
 7 | !notebooks/*.ipynb
 8 | /old_slurms/
 9 | .idea
10 | .DS_Store
11 | *.pyc
12 | .hydra/*


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Flowers Team
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/babyai-text/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | # pycharm
104 | .idea/
105 | 
106 | # storage
107 | storage/
108 | 
109 | # pytorch weights, training history
110 | *.csv
111 | *.pt
112 | *.json
113 | 


--------------------------------------------------------------------------------
/babyai-text/README.md:
--------------------------------------------------------------------------------
 1 | # BabyAI-Text
 2 | BabyAI-Text is a wrapper on top of BabyAI to make it a text-only environment returning a textual description of the agent's observation.
 3 | ![Main schema](images/babyai-text_schema.png)
 4 | 
 5 | ## Installation
 6 | 1. Install BabyAI
 7 | ```
 8 | pip install blosc; cd babyai-text/babyai; pip install -e .; cd ..
 9 | ```
10 | 2. Install gym-minigrid
11 | ```
12 | cd gym-minigrid; pip install -e.; cd ..
13 | ```
14 | 3. Install BabyAI-Text
15 | ```
16 | pip install -e .
17 | ```
18 | 
19 | ## New environment
20 | We introduce two new environments containing a mix of BabyAI's tasks (*Go to*, *Pick up*, *Put next to*, *Unlock*, *Pick up <then|after> go to*, *Pick up <then|after> pick up*):
21 | - **BabyAI-MixedTrainLocal**: Training tasks in a single room setup (without some objects and the *Pick up <then|after> pick up* task)
22 | - **BabyAI-MixedTestLocal**: Test tasks in a single room setup (including never seen objects and the *Pick up <then|after> pick up* task)
23 | 
24 | To use them, import our package and create the GYM environment:
25 | ```python
26 | import gym
27 | import babyai_text
28 | 
29 | env = gym.make("BabyAI-MixedTrainLocal")
30 | ```


--------------------------------------------------------------------------------
/babyai-text/babyai/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | # pycharm
104 | .idea/
105 | 
106 | # storage
107 | storage/
108 | 
109 | # pytorch weights, training history
110 | *.csv
111 | *.pt
112 | *.json
113 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | cache: pip
 3 | python:
 4 |     - "3.5"
 5 | 
 6 | before_install:
 7 |     - pip3 install --upgrade pip
 8 | 
 9 | # command to install dependencies
10 | install:
11 |     - pip3 install http://download.pytorch.org/whl/cpu/torch-0.4.1-cp35-cp35m-linux_x86_64.whl
12 |     - pip3 install flake8
13 |     - pip3 install scikit-build
14 |     - pip3 install --editable .
15 | 
16 | # command to run tests
17 | script:
18 |     # Check the source code for obvious errors
19 |     - python3 -m flake8 . --count --show-source --statistics --select=E901,E999,F821,F822,F823
20 | 
21 |     # Test the BabyAI levels
22 |     - ./run_tests.py
23 | 
24 |     # Quickly exercise the RL training code
25 |     - time python3 -m scripts.train_rl --env BabyAI-GoToObj-v0 --algo ppo --procs 4 --batch-size 80 --log-interval 1 --save-interval 2 --val-episodes 10 --frames 300 --arch cnn1 --instr-dim 16 --image-dim 16 --memory-dim 16
26 | 
27 |     # Check that the bot works on a few episodes of Boss Level
28 |     - python3 -m scripts.eval_bot --level BossLevel --num_runs 50
29 |     - python3 -m scripts.eval_bot --level BossLevel --num_runs 50 --advise_mode --non_optimal_steps 100 --bad_action_proba .3
30 |     # Check that the bot works on a single episode from each level
31 |     - python3 -m scripts.eval_bot --num_runs 1
32 | 
33 |     # Quickly test the generation of bot demos
34 |     - python3 -m scripts.make_agent_demos --env BabyAI-GoToRedBallGrey-v0 --episodes 100 --valid-episodes 32
35 | 
36 |     # Quickly test the evaluation of bot demos
37 |     - python3 -m scripts.evaluate --env BabyAI-GoToRedBallGrey-v0 --demos BabyAI-GoToRedBallGrey-v0_agent
38 | 
39 |     # Quick test for imitation learning
40 |     - python3 -m scripts.train_il --env BabyAI-GoToRedBallGrey-v0 --demos BabyAI-GoToRedBallGrey-v0_agent --model GoToRedBallGrey-il --val-interval 1 --patience 0 --episodes 100 --val-episodes 50
41 | 
42 |     # Quickly test the evaluation of models
43 |     - python3 -m scripts.evaluate --env BabyAI-GoToRedBallGrey-v0 --model GoToRedBallGrey-il
44 | 
45 |     # Quick test for imitation learning with multi env
46 |     - python3 -m scripts.train_il --multi-env BabyAI-GoToRedBall-v0 BabyAI-GoToRedBallGrey-v0 --multi-demos BabyAI-GoToRedBallGrey-v0_agent BabyAI-GoToRedBallGrey-v0_agent --val-interval 1 --patience 0 --multi-episodes 100 100 --val-episodes 50
47 | 
48 |     # Quick test for train_intelligent_expert
49 |     - python3 -m scripts.train_intelligent_expert --env BabyAI-GoToRedBallGrey-v0 --demos BabyAI-GoToRedBallGrey-v0_agent --val-interval 1 --patience 0 --val-episodes 50 --start-demos 10 --num-eval-demos 5 --phases 2
50 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Instructions for Contributors
 2 | 
 3 | To contribute to this project, you should first create your own fork, and remember to periodically [sync changes from this repository](https://stackoverflow.com/questions/7244321/how-do-i-update-a-github-forked-repository). You can then create [pull requests](https://yangsu.github.io/pull-request-tutorial/) for modifications you have made. Your changes will be tested and reviewed before they are merged into this repository. If you are not familiar with forks and pull requests, we recommend doing a Google or YouTube search to find many useful tutorials on the topic.
 4 | 
 5 | Also, you can have a look at the [codebase structure](docs/codebase.md) before getting started.
 6 | 
 7 | A suggested flow for contributing would be:
 8 | First, open up a new feature branch to solve an existing bug/issue
 9 | ```bash
10 | $ git checkout -b <feature-branch> upstream/master
11 | ```
12 | This ensures that the branch is up-to-date with the `master` branch of the main repository, irrespective of the status of your forked repository.
13 | 
14 | Once you are done making commits of your changes / adding the feature, you can:
15 | (In case this is the first set of commits from this _new_ local branch)
16 | ```bash
17 | git push --set-upstream origin 
18 | ```
19 | (Assuming the name of your forked repository remote is `origin`), which will create a new branch `<feature-branch>`
20 | tracking your local `<feature-branch>`, in case it hasn't been created already.
21 | 
22 | Then, create a [pull request](https://help.github.com/en/articles/about-pull-requests) in this repository.


--------------------------------------------------------------------------------
/babyai-text/babyai/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, Maxime Chevalier-Boisvert
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/__init__.py:
--------------------------------------------------------------------------------
1 | # Import levels so that the OpenAI Gym environments get registered
2 | # when the babyai package is imported
3 | from . import levels
4 | from . import utils
5 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/base.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | # from alfred.utils import data_util
 4 | 
 5 | 
 6 | class Model(nn.Module):
 7 |     def __init__(self, args, emb_ann_size, numb_action, pad):
 8 |         '''
 9 |         Abstract model
10 |         '''
11 |         nn.Module.__init__(self)
12 |         self.args = args
13 |         self.numb_action = numb_action
14 |         self.pad = pad
15 |         # shape manually given TO IMPROVE as in ET
16 |         # self.visual_tensor_shape = data_util.read_dataset_info(
17 |         #   args.data['train'][0])['feat_shape'][1:]
18 |         self.visual_tensor_shape = [128, 2, 2]
19 |         # self.visual_tensor_shape = [512, 7, 7]
20 |         # create language and action embeddings
21 | 
22 |         self.emb_ann = nn.Embedding(emb_ann_size, args.demb)
23 | 
24 |         # dropouts
25 |         self.dropout_vis = nn.Dropout(args.dropout['vis'], inplace=True)
26 |         self.dropout_lang = nn.Dropout2d(args.dropout['lang'])
27 | 
28 |     def init_weights(self, init_range=0.1):
29 |         '''
30 |         init linear layers in embeddings
31 |         '''
32 |         self.emb_ann.weight.data.uniform_(-init_range, init_range)
33 | 
34 |     def compute_metrics(self, model_out, gt_dict, metrics_dict, verbose):
35 |         '''
36 |         compute model-specific metrics and put it to metrics dict
37 |         '''
38 |         raise NotImplementedError
39 | 
40 |     def forward(self, vocab, **inputs):
41 |         '''
42 |         forward the model for multiple time-steps (used for training)
43 |         '''
44 |         raise NotImplementedError()
45 | 
46 |     def compute_batch_loss(self, model_out, gt_dict):
47 |         '''
48 |         compute the loss function for a single batch
49 |         '''
50 |         raise NotImplementedError()
51 | 
52 |     def compute_loss(self, model_outs, gt_dicts):
53 |         '''
54 |         compute the loss function for several batches
55 |         '''
56 |         # compute losses for each batch
57 |         losses = {}
58 |         for dataset_key in model_outs.keys():
59 |             losses[dataset_key] = self.compute_batch_loss(
60 |                 model_outs[dataset_key], gt_dicts[dataset_key])
61 |         return losses
62 | 
63 |     def compute_batch_DOE(self, model_out, gt_dict):
64 |         '''
65 |         compute the DOE for a single batch
66 |         '''
67 |         raise NotImplementedError()
68 | 
69 |     def compute_DOE(self, model_outs):
70 |         '''
71 |         compute the DOE for several batches
72 |         '''
73 |         # compute losses for each batch
74 |         DOE= {}
75 |         for dataset_key in model_outs.keys():
76 |             DOE[dataset_key] = self.compute_batch_DOE(model_outs[dataset_key])
77 |         return DOE
78 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/batchsampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import copy
 3 | 
 4 | class BatchSampler(object):
 5 |     """
 6 |     Class used to sample a batch of demonstrations from demonstrations of multiple
 7 |     environments based on a distribution.
 8 |     Used for Teacher Student Curriculum setting in imitation learning.
 9 |     """
10 | 
11 |     def __init__(self, demos, batch_size, seed, no_mem=False):
12 |         self.num_task = len(demos)
13 |         self.dist_task = np.ones(self.num_task) / self.num_task * 1.0
14 |         self.demos = demos
15 |         self.batch_size = batch_size
16 |         self.no_mem = no_mem
17 |         self.rng = np.random.RandomState(seed)
18 | 
19 |         self.total_demos = 0
20 |         self.num_used_demos = 0
21 |         self.current_demos = [None] * self.num_task
22 |         self.current_ids = [None] * self.num_task
23 |         for tid in range(self.num_task):
24 |             self.total_demos += self.reset(tid)
25 | 
26 |         self.tracking_total_demos = self.total_demos
27 | 
28 |     def setDist(self, dist_task):
29 |         self.dist_task = dist_task
30 | 
31 |     def reset(self, tid):
32 |         np.random.shuffle(self.demos[tid])
33 |         self.current_demos[tid] = self.demos[tid]
34 |         self.current_ids[tid] = 0
35 | 
36 |         return len(self.demos[tid])
37 | 
38 |     def sample(self):
39 | 
40 |         batch = []
41 |         for i in range(self.batch_size):
42 |             tid = self.rng.choice(range(len(self.dist_task)), p=self.dist_task)
43 |             cid = self.current_ids[tid]
44 |             if cid >= len(self.current_demos[tid]):
45 |                 self.reset(tid)
46 |                 cid = self.current_ids[tid]
47 | 
48 |             batch += [self.current_demos[tid][cid]]
49 |             self.current_ids[tid] += 1
50 | 
51 |         if self.no_mem:
52 |             batch = np.array(batch)
53 | 
54 |         self.num_used_demos += self.batch_size
55 |         should_evaluate = self.num_used_demos >= self.tracking_total_demos
56 |         if should_evaluate:
57 |             self.tracking_total_demos += self.total_demos
58 |         return batch, should_evaluate


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/l_class.py:
--------------------------------------------------------------------------------
  1 | import gc
  2 | import torch
  3 | from torch import nn
  4 | from torch.nn import functional as F
  5 | 
  6 | from babyai import base
  7 | from nn.enc_lang_QA import EncoderLang_QA
  8 | from nn.enc_visual import FeatureFlat, SimpleEncoder
  9 | from nn.enc_vl import EncoderVL
 10 | # from alfred.nn.encodings import DatasetLearnedEncoding
 11 | from nn.dec_QA import QAClassifier
 12 | 
 13 | class Model(base.Model):
 14 |     def __init__(self, args, emb_ann_size, numb_action, pad):
 15 |         '''
 16 |         transformer agent
 17 |         '''
 18 |         super().__init__(args, emb_ann_size, numb_action, pad)
 19 | 
 20 |         # pre-encoder for language tokens
 21 |         self.encoder_lang = EncoderLang_QA(args.encoder_lang['layers'], args)
 22 | 
 23 |         # dataset id learned encoding (applied after the encoder_lang)
 24 |         self.dataset_enc = None
 25 | 
 26 |         # decoder parts
 27 |         encoder_output_size = args.demb
 28 |         self.dec_QA = QAClassifier(encoder_output_size, args['vocab_path'])
 29 | 
 30 |         # final touch
 31 |         self.init_weights()
 32 |         self.reset()
 33 | 
 34 |     def forward(self, vocab, **inputs):
 35 |         '''
 36 |         forward the model for multiple time-steps (used for training)
 37 |         '''
 38 |         # embed language
 39 |         indexes = torch.squeeze((inputs['questions'] == 1).nonzero(as_tuple=False)[:, 1:], dim=1)
 40 |         indexes_3d = torch.unsqueeze(torch.unsqueeze(indexes, dim=1), dim=1)
 41 |         output = {}
 42 |         emb_lang, lengths_lang = self.embed_lang(inputs['questions'], vocab)
 43 |         emb_lang = self.dataset_enc(emb_lang, vocab) if self.dataset_enc else emb_lang
 44 | 
 45 |         decoder_input = emb_lang.reshape(-1, self.args.demb)
 46 |         answer_flat = self.dec_QA(decoder_input)  # B*language_seq x voc_size
 47 |         answers = answer_flat.view(
 48 |             *emb_lang.shape[:2], *answer_flat.shape[1:])  # B x language_seq x voc_size
 49 | 
 50 |         indices = torch.mul(indexes_3d, torch.ones((answers.shape[0], 1, answers.shape[2]), device=torch.device("cuda"))).type(torch.LongTensor).cuda()  # B x 1 x voc_size
 51 |         answers = torch.gather(answers, 1, indices)  # B x 1 x voc_size
 52 |         answers = answers.reshape(-1, answers.shape[2])  # B x voc_size
 53 | 
 54 |         output.update({'answers': answers})
 55 |         return output
 56 | 
 57 |     def embed_lang(self, lang_pad, vocab):
 58 |         '''
 59 |         take a list of annotation tokens and extract embeddings with EncoderLang
 60 |         '''
 61 |         assert lang_pad.max().item() < len(vocab)
 62 |         embedder_lang = self.emb_ann
 63 |         emb_lang, lengths_lang = self.encoder_lang(
 64 |             lang_pad, embedder_lang, vocab, self.pad)
 65 |         if self.args.detach_lang_emb:
 66 |             emb_lang = emb_lang.clone().detach()
 67 |         return emb_lang, lengths_lang
 68 | 
 69 | 
 70 |     def reset(self):
 71 |         '''
 72 |         reset internal states (used for real-time execution during eval)
 73 |         '''
 74 |         self.frames_traj = torch.zeros(1, 0, *self.visual_tensor_shape)
 75 |         self.action_traj = torch.zeros(1, 0).long()
 76 | 
 77 | 
 78 | 
 79 |     def compute_batch_loss(self, model_out, gt_dict):
 80 |         '''
 81 |         loss function for Seq2Seq agent
 82 |         '''
 83 |         losses = dict()
 84 | 
 85 |         # answer classes loss
 86 |         answer_pred = model_out['answers'].view(-1, model_out['answers'].shape[-1])
 87 |         answer_gt = gt_dict['answers'].view(-1)
 88 |         answer_loss = F.cross_entropy(answer_pred, answer_gt, reduction='mean')
 89 |         losses['answers'] = answer_loss
 90 | 
 91 |         # prediction of <<no answer>> loss
 92 |         no_answer_pred = model_out['no_answers'].view(-1, model_out['no_answers'].shape[-1])
 93 |         no_answer_gt = gt_dict['no_answers'].view(-1)
 94 |         no_answer_loss = F.cross_entropy(no_answer_pred, no_answer_gt, reduction='mean')
 95 |         losses['no_answers'] = no_answer_loss
 96 | 
 97 |         return losses
 98 | 
 99 | 
100 |     def init_weights(self, init_range=0.1):
101 |         '''
102 |         init embeddings uniformly
103 |         '''
104 |         super().init_weights(init_range)
105 | 
106 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/levels/__init__.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 | 
3 | from . import iclr19_levels
4 | from . import bonus_levels
5 | from . import test_levels
6 | 
7 | from .levelgen import test, level_dict
8 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/rl/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Lucas Willems
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/rl/__init__.py:
--------------------------------------------------------------------------------
1 | from babyai.rl.algos.ppo import PPOAlgo
2 | from babyai.rl.utils import DictList
3 | from babyai.rl.model import ACModel, RecurrentACModel, ETModel
4 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/rl/algos/__init__.py:
--------------------------------------------------------------------------------
1 | from babyai.rl.algos.ppo import PPOAlgo
2 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/rl/format.py:
--------------------------------------------------------------------------------
1 | import torch
2 | 
3 | def default_preprocess_obss(obss, device=None):
4 |     return torch.tensor(obss, device=device)


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/rl/model.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod, abstractproperty
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class ACModel:
 6 |     recurrent = False
 7 | 
 8 |     @abstractmethod
 9 |     def __init__(self, obs_space, action_space):
10 |         pass
11 | 
12 |     @abstractmethod
13 |     def forward(self, obs):
14 |         pass
15 | 
16 | class RecurrentACModel(ACModel):
17 |     recurrent = True
18 | 
19 |     @abstractmethod
20 |     def forward(self, obs, memory):
21 |         pass
22 | 
23 |     @property
24 |     @abstractmethod
25 |     def memory_size(self):
26 |         pass
27 | 
28 | class ETModel(nn.Module):
29 |     def __init__(self, args, embs_ann, vocab_out, pad, seg):
30 |         '''
31 |         Abstract model
32 |         '''
33 |         nn.Module.__init__(self)
34 |         self.args = args
35 |         self.vocab_out = vocab_out
36 |         self.pad, self.seg = pad, seg
37 |         self.visual_tensor_shape = data_util.read_dataset_info(
38 |             args.data['train'][0])['feat_shape'][1:]
39 | 
40 |         # create language and action embeddings
41 |         self.embs_ann = nn.ModuleDict({})
42 |         for emb_name, emb_size in embs_ann.items():
43 |             self.embs_ann[emb_name] = nn.Embedding(emb_size, args.demb)
44 | 
45 |         # dropouts
46 |         self.dropout_vis = nn.Dropout(args.dropout['vis'], inplace=True)
47 |         self.dropout_lang = nn.Dropout2d(args.dropout['lang'])
48 | 
49 |     def init_weights(self, init_range=0.1):
50 |         '''
51 |         init linear layers in embeddings
52 |         '''
53 |         for emb_ann in self.embs_ann.values():
54 |             emb_ann.weight.data.uniform_(-init_range, init_range)
55 | 
56 | 
57 |     def forward(self, vocab, **inputs):
58 |         '''
59 |         forward the model for multiple time-steps (used for training)
60 |         '''
61 |         raise NotImplementedError()


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/rl/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from babyai.rl.utils.dictlist import DictList
2 | from babyai.rl.utils.penv import ParallelEnv


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/rl/utils/dictlist.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | 
 4 | class DictList(dict):
 5 |     """A dictionnary of lists of same size. Dictionnary items can be
 6 |     accessed using `.` notation and list items using `[]` notation.
 7 | 
 8 |     Example:
 9 |         >>> d = DictList({"a": [[1, 2], [3, 4]], "b": [[5], [6]]})
10 |         >>> d.a
11 |         [[1, 2], [3, 4]]
12 |         >>> d[0]
13 |         DictList({"a": [1, 2], "b": [5]})
14 |     """
15 | 
16 |     __getattr__ = dict.__getitem__
17 |     __setattr__ = dict.__setitem__
18 | 
19 |     def __len__(self):
20 |         return len(next(iter(dict.values(self))))
21 | 
22 |     def __getitem__(self, index):
23 |         return DictList({key: value[index] for key, value in dict.items(self)})
24 | 
25 |     def __setitem__(self, index, d):
26 |         for key, value in d.items():
27 |             dict.__getitem__(self, key)[index] = value
28 | 
29 |     def shuffle_lists_same_order(self):
30 |         """
31 |         return the dictionnary with each list of the dictionnary shuffled such that:
32 |         list_1[i]=list_2[i]=list_1[i_shuffle]=list_2[i_shuffle]
33 | 
34 |         Example:
35 |             >>> d = DictList({"a":[1, 2, 3], "b":[4, 5, 6]})
36 |             >>> d.shuffle_lists_same_order()
37 |             DictList({"a":[3, 1, 2], "b":[6, 4, 5]})
38 |         """
39 |         keys = list(dict.keys(self))
40 |         len_keys = len(keys)
41 |         map_list = list(zip(*[v for v in dict.values(self)]))
42 |         random.shuffle(map_list)
43 |         l = list(zip(*map_list))
44 |         return DictList({keys[i]: list(l[i]) for i in range(len_keys)})
45 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/rl/utils/penv.py:
--------------------------------------------------------------------------------
 1 | from torch.multiprocessing import Process, Pipe
 2 | import gym
 3 | from tqdm import tqdm
 4 | import logging
 5 | import torch
 6 | from tqdm import tqdm
 7 | logger = logging.getLogger(__name__)
 8 | import concurrent.futures
 9 | 
10 | # For multiprocessing
11 | def worker(conn, env):
12 |     while True:
13 |         cmd, data = conn.recv()
14 |         if cmd == "step": 
15 |             obs, reward, done, info = env.step(data)
16 |             if done:
17 |                 obs = env.reset()
18 |             conn.send((obs, reward, done, info))
19 |         elif cmd == "reset":
20 |             obs = env.reset()
21 |             conn.send(obs)
22 |         else:
23 |             raise NotImplementedError
24 | 
25 | # For multithreading
26 | def thread(env, cmd, *args):
27 |     if cmd == "step":
28 |         obs, reward, done, info = env.step(args[0])
29 |         if done:
30 |             obs = env.reset()
31 |         return obs, reward, done, info
32 |     elif cmd == "reset":
33 |         obs = env.reset()
34 |         return obs
35 |     else:
36 |         raise NotImplementedError
37 | 
38 | class ParallelEnv(gym.Env):
39 |     """A concurrent execution of environments in multiple processes."""
40 | 
41 |     def __init__(self, envs, use_procs=False):
42 |         assert len(envs) >= 1, "No environment given."
43 | 
44 |         self.envs = envs
45 |         self.observation_space = self.envs[0].observation_space
46 |         self.action_space = self.envs[0].action_space
47 |         self.use_procs = use_procs
48 | 
49 |         if self.use_procs:
50 |             self.locals = []
51 |             self.processes = []
52 |             for env in tqdm(self.envs[1:]):
53 |                 local, remote = Pipe()
54 |                 self.locals.append(local)
55 |                 p = Process(target=worker, args=(remote, env))
56 |                 p.daemon = True
57 |                 p.start()
58 |                 remote.close()
59 |                 self.processes.append(p)
60 | 
61 |     def reset(self):
62 |         if self.use_procs:
63 |             for local in self.locals:
64 |                 local.send(("reset", None))
65 |             proc_results = []
66 |             for local in self.locals:
67 |                 proc_results.append(local.recv())
68 |             results = [self.envs[0].reset()] + proc_results
69 |             # results = [self.envs[0].reset()] + [local.recv() for local in self.locals]
70 |         else:
71 |             with concurrent.futures.ThreadPoolExecutor() as executor:
72 |                 futures = [executor.submit(thread, self.envs[i], "reset") for i in range(len(self.envs))]
73 |                 results = [f.result() for f in futures]
74 |         return results
75 | 
76 |     def step(self, actions):
77 |         if self.use_procs:
78 |             for local, action in zip(self.locals, actions[1:]):
79 |                 local.send(("step", action))
80 |             obs, reward, done, info = self.envs[0].step(actions[0])
81 |             if done:
82 |                 obs = self.envs[0].reset()
83 |             results = zip(*[(obs, reward, done, info)] + [local.recv() for local in self.locals])
84 |         else:
85 |             with concurrent.futures.ThreadPoolExecutor(max_workers=64) as executor:
86 |                 futures = [executor.submit(thread, self.envs[i], "step", actions[i]) for i in range(len(self.envs))]
87 |                 results = [f.result() for f in futures]
88 |             results = zip(*results)
89 |         return results
90 | 
91 |     def render(self):
92 |         raise NotImplementedError
93 | 
94 |     def __del__(self):
95 |         if self.use_procs:
96 |             for p in self.processes:
97 |                 p.terminate()


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import numpy
 4 | import torch
 5 | from babyai.utils.agent import load_agent, ModelAgent, DemoAgent, BotAgent
 6 | from babyai.utils.demos import (
 7 |     load_demos, load_voc,  save_demos, synthesize_demos, get_demos_path, get_demos_QG_path, get_demos_QG_voc_path)
 8 | from babyai.utils.format import ObssPreprocessor, ObssContPreprocessor, ObssDirPreprocessor, IntObssPreprocessor, InstructionOnlyPreprocessor, get_vocab_path
 9 | from babyai.utils.log import (
10 |     get_log_path, get_log_dir, synthesize, configure_logging)
11 | from babyai.utils.model import get_model_dir, load_model, save_model, load_stactpredictor_model, save_stactpredictor_model
12 | from babyai.utils.viz import watch, viz, info, clear
13 | 
14 | def storage_dir():
15 |     # defines the storage directory to be in the root (Same level as babyai folder)
16 |     print(os.environ)
17 |     return os.environ.get("DLP_STORAGE", '')
18 | 
19 | 
20 | def create_folders_if_necessary(path):
21 |     dirname = os.path.dirname(path)
22 |     if not(os.path.isdir(dirname)):
23 |         os.makedirs(dirname)
24 | 
25 | 
26 | def seed(seed):
27 |     random.seed(seed)
28 |     numpy.random.seed(seed)
29 |     torch.manual_seed(seed)
30 |     if torch.cuda.is_available():
31 |         torch.cuda.manual_seed_all(seed)
32 | 
33 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/utils/log.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import numpy
 4 | import logging
 5 | 
 6 | from .. import utils
 7 | 
 8 | 
 9 | def get_log_dir(log_name):
10 |     return os.path.join(utils.storage_dir(), "logs", log_name)
11 | 
12 | 
13 | def get_log_path(log_name):
14 |     return os.path.join(get_log_dir(log_name), "log.log")
15 | 
16 | 
17 | def synthesize(array):
18 |     import collections
19 |     d = collections.OrderedDict()
20 |     d["mean"] = numpy.mean(array)
21 |     d["std"] = numpy.std(array)
22 |     if len(array) > 0:
23 |         d["min"] = numpy.amin(array)
24 |         d["max"] = numpy.amax(array)
25 |     else:
26 |         d["min"] = numpy.nan
27 |         d["max"] = numpy.nan
28 |     return d
29 | 
30 | 
31 | def configure_logging(log_name):
32 |     path = get_log_path(log_name)
33 |     utils.create_folders_if_necessary(path)
34 | 
35 |     logging.basicConfig(
36 |         level=logging.INFO,
37 |         format="%(name)s: %(asctime)s: %(message)s",
38 |         handlers=[
39 |             logging.FileHandler(filename=path),
40 |             logging.StreamHandler(sys.stdout)
41 |         ]
42 |     )
43 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/babyai/utils/model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | 
 4 | from .. import utils
 5 | 
 6 | 
 7 | def get_model_dir(model_name):
 8 |     return os.path.join(utils.storage_dir(), "models", model_name)
 9 | 
10 | 
11 | def get_model_path(model_name):
12 |     return os.path.join(get_model_dir(model_name), "model.pt")
13 | 
14 | def get_stactpredictor_model_path(model_name):
15 |     return os.path.join(get_model_dir(model_name), "stactpredictor_model.pt")
16 | 
17 | 
18 | def load_model(model_name, raise_not_found=True):
19 |     path = get_model_path(model_name)
20 |     try:
21 |         model = torch.load(path)
22 |         model.eval()
23 |         return model
24 |     except FileNotFoundError:
25 |         if raise_not_found:
26 |             raise FileNotFoundError("No model found at {}".format(path))
27 | 
28 | def load_stactpredictor_model(model_name, raise_not_found=True):
29 |     path = get_stactpredictor_model_path(model_name)
30 |     try:
31 |         model = torch.load(path)
32 |         model.eval()
33 |         return model
34 |     except FileNotFoundError:
35 |         if raise_not_found:
36 |             raise FileNotFoundError("No model found at {}".format(path))
37 | 
38 | 
39 | def save_model(model, model_name, writer):
40 |     path = get_model_path(model_name)
41 |     utils.create_folders_if_necessary(path)
42 |     torch.save(model, path)
43 |     if writer:
44 |         writer.save(path)
45 | 
46 | def save_stactpredictor_model(model, model_name, writer):
47 |     path = get_stactpredictor_model_path(model_name)
48 |     utils.create_folders_if_necessary(path)
49 |     torch.save(model, path)
50 |     if writer:
51 |         writer.save(path)
52 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/docs/codebase.md:
--------------------------------------------------------------------------------
 1 | # Structure of the Codebase
 2 | In `babyai`:
 3 | - `levels` contains the code for all levels
 4 | - `bot.py` is a heuristic stack-based bot that can solve all levels
 5 | - `imitation.py` is an imitation learning implementation
 6 | - `rl` contains an implementation of the Proximal Policy Optimization (PPO) RL algorithm
 7 | - `model.py` contains the neural network code
 8 | 
 9 | In `scripts`:
10 | - use `train_il.py` to train an agent with imitation learning, using demonstrations from the bot, from another agent or even provided by a human
11 | - use `train_rl.py` to train an agent with reinforcement learning
12 | - use `make_agent_demos.py` to generate demonstrations with the bot or with another agent
13 | - use `make_human_demos.py` to make and save human demonstrations
14 | - use `train_intelligent_expert.py` to train an agent with an interactive imitation learning algorithm that incrementally grows the training set by adding demonstrations for the missions that the agent currently fails
15 | - use `evaluate.py` to evaluate a trained agent
16 | - use `enjoy.py` to visualze an agent's behavior
17 | - use `manual_control.py` to visualize example missions from BabyAI levels
18 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/docs/iclr19_levels.md:
--------------------------------------------------------------------------------
  1 | # ICLR19 Levels
  2 | 
  3 | The levels described in this file were created for the ICLR19 submission.
  4 | These form a curriculum that is subdivided according to specific competencies.
  5 | 
  6 | ## GoToObj
  7 | 
  8 | Go to an object, inside a single room with no doors, no distractors.
  9 | 
 10 | <p align="center"><img src="/media/GoToObj.png" width="180"></p>
 11 | 
 12 | ## GoToRedBall
 13 | 
 14 | Go to the red ball, single room, with obstacles.
 15 | The obstacles/distractors are all the same, to eliminate
 16 | perceptual complexity.
 17 | 
 18 | <p align="center"><img src="/media/GoToRedBall.png" width="180"></p>
 19 | 
 20 | ## GoToRedBallGrey
 21 | 
 22 | Go to the red ball, single room, with obstacles.
 23 | The obstacles/distractors are all grey boxes, to eliminate
 24 | perceptual complexity. No unblocking required.
 25 | 
 26 | <p align="center"><img src="/media/GoToRedBallGrey.png" width="180"></p>
 27 | 
 28 | ## GoToLocal
 29 | 
 30 | Go to an object, inside a single room with no doors, no distractors.
 31 | 
 32 | <p align="center"><img src="/media/GoToLocal.png" width="180"></p>
 33 | 
 34 | ## PutNextLocal
 35 | 
 36 | Put an object next to another object, inside a single room
 37 | with no doors, no distractors.
 38 | 
 39 | <p align="center"><img src="/media/PutNextLocal.png" width="180"></p>
 40 | 
 41 | ## PickUpLoc
 42 | 
 43 | Pick up an object which may be described using its location. This is a
 44 | single room environment.
 45 | 
 46 | Competencies: PickUp, Loc. No unblocking.
 47 | 
 48 | <p align="center"><img src="/media/PickupLoc.png" width="180"></p>
 49 | 
 50 | ## GoToObjMaze
 51 | 
 52 | Go to an object, the object may be in another room. No distractors.
 53 | 
 54 | <p align="center"><img src="/media/GoToObjMaze.png" width="400"></p>
 55 | 
 56 | ## GoTo
 57 | 
 58 | Go to an object, the object may be in another room. Many distractors.
 59 | 
 60 | <p align="center"><img src="/media/GoTo.png" width="400"></p>
 61 | 
 62 | ## Pickup
 63 | 
 64 | Pick up an object, the object may be in another room.
 65 | 
 66 | <p align="center"><img src="/media/Pickup.png" width="400"></p>
 67 | 
 68 | ## UnblockPickup
 69 | 
 70 | Pick up an object, the object may be in another room. The path may
 71 | be blocked by one or more obstructors.
 72 | 
 73 | <p align="center"><img src="/media/UnblockPickup.png" width="400"></p>
 74 | 
 75 | ## Open
 76 | 
 77 | Open a door, which may be in another room.
 78 | 
 79 | <p align="center"><img src="/media/Open.png" width="400"></p>
 80 | 
 81 | ## Unlock
 82 | 
 83 | Maze environment where the agent has to retrieve a key to open a locked door.
 84 | 
 85 | Competencies: Maze, Open, Unlock. No unblocking.
 86 | 
 87 | <p align="center"><img src="/media/Unlock.png" width="400"></p>
 88 | 
 89 | ## PutNext
 90 | 
 91 | Put an object next to another object. Either of these may be in another room.
 92 | 
 93 | <p align="center"><img src="/media/PutNext.png" width="400"></p>
 94 | 
 95 | ## Synth
 96 | 
 97 | Union of all instructions from PutNext, Open, Goto and PickUp. The agent
 98 | may need to move objects around. The agent may have to unlock the door,
 99 | but only if it is explicitly referred by the instruction.
100 | 
101 | Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open
102 | 
103 | <p align="center"><img src="/media/Synth.png" width="400"></p>
104 | 
105 | ## SynthLoc
106 | 
107 | Like Synth, but a significant share of object descriptions involves
108 | location language like in PickUpLoc. No implicit unlocking.
109 | Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc
110 | 
111 | <p align="center"><img src="/media/SynthLoc.png" width="400"></p>
112 | 
113 | ## GoToSeq
114 | 
115 | Sequencing of go-to-object commands.
116 | 
117 | Competencies: Maze, GoTo, Seq. No locked room. No locations. No unblocking.
118 | 
119 | <p align="center"><img src="/media/GoToSeq.png" width="400"></p>
120 | 
121 | ## SynthSeq
122 | 
123 | Like SynthLoc, but now with multiple commands, combined just like in GoToSeq.
124 | 
125 | Competencies: Maze, Unblock, Unlock, GoTo, PickUp, PutNext, Open, Loc, Seq. No implicit unlocking.
126 | 
127 | <p align="center"><img src="/media/SynthSeq.png" width="400"></p>
128 | 
129 | ## GoToImpUnlock
130 | 
131 | Go to an object, which may be in a locked room. No unblocking.
132 | 
133 | Competencies: Maze, GoTo, ImpUnlock
134 | 
135 | <p align="center"><img src="/media/GoToImpUnlock.png" width="400"></p>
136 | 
137 | ## BossLevel
138 | 
139 | Command can be any sentence drawn from the Baby Language grammar. Union of
140 | all competencies. This level is a superset of all other levels.
141 | 
142 | <p align="center"><img src="/media/BossLevel.png" width="400"></p>
143 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/docs/train-eval.md:
--------------------------------------------------------------------------------
 1 | # Training
 2 | 
 3 | To train an RL agent run e.g.
 4 | 
 5 | ```
 6 | scripts/train_rl.py --env BabyAI-GoToLocal-v0
 7 | ```
 8 | 
 9 | Folders `logs/` and `models/` will be created in the current directory. The default name
10 | for the model is chosen based on the level name, the current time and the other settings (e.g.
11 | `BabyAI-GoToLocal-v0_ppo_expert_filmcnn_gru_mem_seed1_18-10-12-12-45-02`). You can also choose the model
12 | name by setting `--model`. After 5 hours of training you should be getting a success rate of 97-99\%.
13 | A machine readable log can be found in `logs/<MODEL>/log.csv`, a human readable in `logs/<MODEL>/log.log`.
14 | 
15 | To train an agent with IL (imitation learning) first make sure that you have your demonstrations in
16 | `demos/<DEMOS>` (Instructions to load the demos are present [here](demo-dataset.md)). Then run e.g.
17 | 
18 | ```
19 | scripts/train_il.py --env BabyAI-GoToLocal-v0 --demos <DEMOS>
20 | ```
21 | 
22 | In the example above we run scripts from the root of the repository, but if you have installed BabyAI as
23 | described above, you can also run all scripts with commands like `<PATH-TO-BABYAI-REPO>/scripts/train_il.py`.
24 | 
25 | # Evaluation
26 | 
27 | In the same directory where you trained your model run e.g.
28 | 
29 | ```
30 | scripts/evaluate.py --env BabyAI-GoToLocal-v0 --model <MODEL>
31 | ```
32 | 
33 | to evaluate the performance of your model named `<MODEL>` on 1000 episodes. If you want to see
34 | your agent performing, run
35 | 
36 | ```
37 | scripts/enjoy.py --env BabyAI-GoToLocal-v0 --model <MODEL>
38 | ```


--------------------------------------------------------------------------------
/babyai-text/babyai/docs/troubleshooting.md:
--------------------------------------------------------------------------------
 1 | # Troubleshooting
 2 | 
 3 | If you run into error messages relating to OpenAI gym or PyQT, it may be that the version of those libraries that you have installed is incompatible. You can try upgrading specific libraries with pip3, eg: `pip3 install --upgrade gym`. If the problem persists, please [open an issue](https://github.com/mila-iqia/babyai/issues/new) on this repository and paste a *complete* error message, along with some information about your platform (are you running Windows, Mac, Linux? Are you running this on a Mila machine?).
 4 | 
 5 | ## If you cannot install PyQT
 6 | 
 7 | If you cannot install PyQT using pip, another option is to install it using conda instead:
 8 | 
 9 | ```
10 | conda install -c anaconda pyqt 
11 | ```
12 | 
13 | Alternatively, it is also possible to install PyQT5 manually:
14 | 
15 | ```
16 | wget https://files.pythonhosted.org/packages/98/61/fcd53201a23dd94a1264c29095821fdd55c58b4cd388dc7115e5288866db/PyQt5-5.12.1-5.12.2-cp35.cp36.cp37.cp38-abi3-manylinux1_x86_64.whl
17 | PYTHONPATH=""
18 | pip3 install --user PyQt5-5.12.1-5.12.2-cp35.cp36.cp37.cp38-abi3-manylinux1_x86_64.whl
19 | ```
20 | 
21 | Finally, if none of the above options work, note that PyQT is only needed to produce graphics for human viewing, and isn't needed during training. As such, it's possible to install BabyAI without PyQT and train a policy. To do so, you can comment out the `gym_minigrid` dependency in `setup.py`, clone the [gym-minigrid repository](https://github.com/maximecb/gym-minigrid) manually, and comment out the `pyqt5` dependency in the `setup.py` of the minigrid repository.
22 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: babyai
 2 | channels:
 3 |     - pytorch
 4 |     - defaults
 5 | dependencies:
 6 |     - python=3.6
 7 |     - pytorch=1.4
 8 |     - numpy
 9 |     - blosc
10 |     - pip
11 |     - pip:
12 |         - gym
13 |         - scikit-build
14 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/nn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/babyai/nn/__init__.py


--------------------------------------------------------------------------------
/babyai-text/babyai/nn/dec_QA.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle as pkl
 3 | import torch
 4 | from torch import nn
 5 | from torch.nn import functional as F
 6 | 
 7 | class QAClassifier(nn.Module):
 8 |     '''
 9 |     object classifier module (a single FF layer)
10 |     '''
11 |     def __init__(self, input_size, vocab_path):
12 |         super().__init__()
13 |         with open(vocab_path, 'rb') as filehandle:
14 |             # read the data as binary data stream
15 |             vocab_list = pkl.load(filehandle)['answer']
16 |         num_classes = len(vocab_list)
17 |         self.linear = nn.Linear(input_size, num_classes)
18 | 
19 |     def forward(self, x):
20 |         out = self.linear(x)
21 |         return out


--------------------------------------------------------------------------------
/babyai-text/babyai/nn/enc_lang.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import numpy as np
 4 | from torch import nn
 5 | from torch.nn import functional as F
 6 | from torch.nn.utils.rnn import pad_sequence
 7 | 
 8 | from nn.encodings import PosLangEncoding, InstrLangEncoding
 9 | 
10 | 
11 | class EncoderLang(nn.Module):
12 |     def __init__(self, num_layers, args,
13 |                  subgoal_token='<<instr>>', goal_token='<<goal>>'):
14 |         '''
15 |         transformer encoder for language inputs
16 |         '''
17 |         super(EncoderLang, self).__init__()
18 |         self.subgoal_token = subgoal_token
19 |         self.goal_token = goal_token
20 | 
21 |         # transformer layers
22 |         encoder_layer = nn.TransformerEncoderLayer(
23 |             args.demb, args.encoder_heads, args.demb,
24 |             args.dropout['transformer']['encoder'])
25 |         if args.encoder_lang['shared']:
26 |             enc_transformer = nn.TransformerEncoder(
27 |                 encoder_layer, num_layers)
28 |             self.enc_transformers = enc_transformer
29 |         else:
30 |             self.enc_transformers = nn.TransformerEncoder(
31 |                     encoder_layer, num_layers)
32 | 
33 |         # encodings
34 |         self.enc_pos = PosLangEncoding(args.demb) if args.encoder_lang['pos_enc'] else None
35 |         self.enc_instr = InstrLangEncoding(args.demb) if args.encoder_lang['instr_enc'] else None
36 |         self.enc_layernorm = nn.LayerNorm(args.demb)
37 |         self.enc_dropout = nn.Dropout(args.dropout['lang'], inplace=True)
38 | 
39 |     def forward(self, lang_pad, embedder, vocab, pad):
40 |         '''
41 |         pass embedded inputs through embeddings and encode them using a transformer
42 |         '''
43 |         # pad the input language sequences and embed them with a linear layer
44 |         mask_pad = (lang_pad == pad)
45 |         emb_lang = embedder(lang_pad)
46 |         # add positional encodings
47 |         mask_token = EncoderLang.mask_token(
48 |             lang_pad, vocab, {self.subgoal_token, self.goal_token})
49 |         emb_lang = self.encode_inputs(emb_lang, mask_token, mask_pad)
50 |         # pass the inputs through the encoder
51 |         hiddens = EncoderLang.encoder(
52 |             self.enc_transformers, emb_lang, mask_pad, vocab)
53 |         lengths = (lang_pad != pad).sum(dim=1)
54 |         return hiddens, lengths
55 | 
56 |     @staticmethod
57 |     def mask_token(lang_pad, vocab, tokens):
58 |         '''
59 |         returns mask of the tokens
60 |         '''
61 |         tokens_mask = torch.zeros_like(lang_pad).long()
62 |         for token in tokens:
63 |             tokens_mask += lang_pad == vocab.word2index(token)
64 |         return tokens_mask.bool()
65 | 
66 |     @staticmethod
67 |     def encoder(encoders, emb_lang, mask_pad, mask_attn=None):
68 |         '''
69 |         compute encodings for all tokens using a normal flat encoder
70 |         '''
71 |         # skip mask: mask padded words
72 |         if mask_attn is None:
73 |             # attention mask: all tokens can attend to all others
74 |             mask_attn = torch.zeros(
75 |                 (mask_pad.shape[1], mask_pad.shape[1]), device=mask_pad.device).float()
76 |         # encode the inputs
77 |         output = encoders(
78 |             emb_lang.transpose(0, 1),
79 |             mask_attn,
80 |             mask_pad).transpose(0, 1)
81 |         return output
82 | 
83 |     def encode_inputs(self, emb_lang, mask_token, mask_pad):
84 |         '''
85 |         add positional encodings, apply layernorm and dropout
86 |         '''
87 |         emb_lang = self.enc_pos(emb_lang) if self.enc_pos else emb_lang
88 |         emb_lang = self.enc_instr(emb_lang, mask_token) if self.enc_instr else emb_lang
89 |         emb_lang = self.enc_dropout(emb_lang)
90 |         emb_lang = self.enc_layernorm(emb_lang)
91 |         return emb_lang
92 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/nn/enc_lang_QA.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import numpy as np
 4 | from torch import nn
 5 | from torch.nn import functional as F
 6 | from torch.nn.utils.rnn import pad_sequence
 7 | 
 8 | from nn.enc_lang import EncoderLang
 9 | from nn.encodings import PosLangEncoding, InstrLangEncoding
10 | 
11 | class EncoderLang_QA(EncoderLang):
12 |     def __init__(self, num_layers, args,
13 |                  subgoal_token='<<instr>>', goal_token='<<goal>>', question_token='<<question>>'):
14 |         '''
15 |         transformer encoder for language inputs
16 |         '''
17 |         super(EncoderLang_QA, self).__init__(num_layers, args)
18 |         self.subgoal_token = subgoal_token
19 |         self.goal_token = goal_token
20 |         self.question_token = question_token
21 | 
22 |         # transofmer layers
23 |         encoder_layer = nn.TransformerEncoderLayer(
24 |             args.demb, args.encoder_heads, args.demb,
25 |             args.dropout['transformer']['encoder'])
26 |         if args.encoder_lang['shared']:
27 |             enc_transformer = nn.TransformerEncoder(
28 |                 encoder_layer, num_layers)
29 |             self.enc_transformers = enc_transformer
30 |         else:
31 |             self.enc_transformers = nn.TransformerEncoder(
32 |                     encoder_layer, num_layers)
33 | 
34 |         # encodings
35 |         self.enc_pos = PosLangEncoding(args.demb) if args.encoder_lang['pos_enc'] else None
36 |         self.enc_instr = InstrLangEncoding(args.demb) if args.encoder_lang['instr_enc'] else None
37 |         self.enc_layernorm = nn.LayerNorm(args.demb)
38 |         self.enc_dropout = nn.Dropout(args.dropout['lang'], inplace=True)
39 | 
40 |     def forward(self, lang_pad, embedder, vocab, pad):
41 |         '''
42 |         pass embedded inputs through embeddings and encode them using a transformer
43 |         '''
44 |         # pad the input language sequences and embed them with a linear layer
45 | 
46 |         mask_pad = (lang_pad == pad)
47 |         emb_lang = embedder(lang_pad)
48 |         # add positional encodings
49 |         mask_token = EncoderLang.mask_token(
50 |             lang_pad, vocab, {self.question_token})
51 | 
52 |         emb_lang = self.encode_inputs(emb_lang, mask_token, mask_pad)
53 |         # pass the inputs through the encoder
54 |         hiddens = EncoderLang.encoder(
55 |             self.enc_transformers, emb_lang, mask_pad)
56 | 
57 |         lengths = (lang_pad != pad).sum(dim=1)
58 | 
59 |         return hiddens, lengths
60 | 
61 |     @staticmethod
62 |     def mask_token(lang_pad, vocab, tokens):
63 |         '''
64 |         returns mask of the tokens
65 |         '''
66 |         tokens_mask = torch.zeros_like(lang_pad).long()
67 |         for token in tokens:
68 |             tokens_mask += lang_pad == vocab.word2index(token)
69 |         return tokens_mask.bool()
70 | 
71 |     @staticmethod
72 |     def encoder(encoders, emb_lang, mask_pad, mask_attn=None):
73 |         '''
74 |         compute encodings for all tokens using a normal flat encoder
75 |         '''
76 |         # skip mask: mask padded words
77 |         if mask_attn is None:
78 |             # attention mask: all tokens can attend to all others
79 |             mask_attn = torch.zeros(
80 |                 (mask_pad.shape[1], mask_pad.shape[1]), device=mask_pad.device).float()
81 |         # encode the inputs
82 |         output = encoders(
83 |             emb_lang.transpose(0, 1),
84 |             mask_attn,
85 |             mask_pad).transpose(0, 1)
86 |         return output
87 | 
88 |     def encode_inputs(self, emb_lang, mask_token, mask_pad):
89 |         '''
90 |         add positional encodings, apply layernorm and dropout
91 |         '''
92 |         emb_lang = self.enc_pos(emb_lang) if self.enc_pos else emb_lang
93 |         emb_lang = self.enc_instr(emb_lang, mask_token) if self.enc_instr else emb_lang
94 |         emb_lang = self.enc_dropout(emb_lang)
95 |         emb_lang = self.enc_layernorm(emb_lang)
96 |         return emb_lang
97 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/nn/enc_visual.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import types
  3 | import torch
  4 | import contextlib
  5 | import numpy as np
  6 | import torch.nn as nn
  7 | import PIL
  8 | 
  9 | from PIL import Image
 10 | from torchvision import models
 11 | from torchvision.transforms import functional as F
 12 | 
 13 | from nn.transforms import Transforms
 14 | 
 15 | class Resnet18(nn.Module):
 16 |     '''
 17 |     pretrained Resnet18 from torchvision
 18 |     '''
 19 |     def __init__(self,
 20 |                  device,
 21 |                  checkpoint_path=None,
 22 |                  share_memory=False):
 23 |         super().__init__()
 24 |         self.device = device
 25 |         self.model = models.resnet18(pretrained=True)
 26 |         self.model = nn.Sequential(*list(self.model.children())[:-3])
 27 |         '''if checkpoint_path is not None:
 28 |             print('Loading ResNet checkpoint from {}'.format(checkpoint_path))
 29 |             model_state_dict = torch.load(checkpoint_path, map_location=device)
 30 |             model_state_dict = {
 31 |                 key: value for key, value in model_state_dict.items()
 32 |                 if 'GU_' not in key and 'text_pooling' not in key}
 33 |             model_state_dict = {
 34 |                 key: value for key, value in model_state_dict.items()
 35 |                 if 'fc.' not in key}
 36 |             model_state_dict = {
 37 |                 key.replace('resnet.', ''): value
 38 |                 for key, value in model_state_dict.items()}
 39 |             self.model.load_state_dict(model_state_dict)
 40 |         self.model = self.model.to(torch.device(device))'''
 41 | 
 42 |         if self.device == 'cuda':
 43 |             self.model.cuda()
 44 |         self.model = self.model.eval()
 45 |         if share_memory:
 46 |             self.model.share_memory()
 47 |         self._transform = Transforms.get_transform('default')
 48 | 
 49 |     def extract(self, x):
 50 |         # small image returned by RGBImgPartialObsWrapper transform with resize not necessary
 51 |         x = torch.stack([self._transform(Image.fromarray(i.astype('uint8'), 'RGB')).to(torch.device(self.device)) for i in x])
 52 |         # x_tensor = torch.tensor(x, dtype=torch.float32)
 53 |         return self.model(x)
 54 | 
 55 | class FeatureFlat(nn.Module):
 56 |     '''
 57 |     a few conv layers to flatten features that come out of ResNet
 58 |     '''
 59 |     def __init__(self, input_shape, output_size):
 60 |         super().__init__()
 61 |         if input_shape[0] == -1:
 62 |             input_shape = input_shape[1:]
 63 |         layers, activation_shape = self.init_cnn(
 64 |             input_shape, channels=[256, 64], kernels=[1, 1], paddings=[0, 0])
 65 |         layers += [
 66 |             Flatten(), nn.Linear(np.prod(activation_shape), output_size)]
 67 |         self.layers = nn.Sequential(*layers)
 68 | 
 69 |     def init_cnn(self, input_shape, channels, kernels, paddings):
 70 |         layers = []
 71 |         planes_in, spatial = input_shape[0], input_shape[-1]
 72 |         for planes_out, kernel, padding in zip(channels, kernels, paddings):
 73 |             # do not use striding
 74 |             stride = 1
 75 |             layers += [
 76 |                 nn.Conv2d(planes_in, planes_out, kernel_size=kernel,
 77 |                           stride=stride, padding=padding),
 78 |                 nn.BatchNorm2d(planes_out), nn.ReLU(inplace=True)]
 79 |             planes_in = planes_out
 80 | 
 81 |             spatial = ((spatial - kernel + 2 * padding) // stride) + 1
 82 |         activation_shape = (planes_in, spatial, spatial)
 83 | 
 84 |         return layers, activation_shape
 85 | 
 86 |     def forward(self, frames):
 87 |         activation = self.layers(frames)
 88 |         return activation
 89 | 
 90 | 
 91 | class Flatten(nn.Module):
 92 |     def forward(self, x):
 93 |         return x.view(x.size(0), -1)
 94 | 
 95 | class SimpleEncoder(nn.Module):
 96 |     '''
 97 |     a simple image encoder that is not pretrained to replace the use of resnet18
 98 |     '''
 99 |     def __init__(self):
100 |         super().__init__()
101 |         self.image_conv = nn.Sequential(
102 |             nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(2, 2), padding=1),
103 |             nn.BatchNorm2d(128),
104 |             nn.ReLU(),
105 |             nn.MaxPool2d(kernel_size=(2, 2), stride=2),
106 |             nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1),
107 |             nn.BatchNorm2d(128),
108 |             nn.ReLU(),
109 |             nn.MaxPool2d(kernel_size=(2, 2), stride=2)
110 |         )
111 |     def forward(self, frame):
112 |         frame_extracted = self.image_conv(frame)
113 |         return frame_extracted
114 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/nn/enc_vl.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import nn.model_util as model_util
  4 | from nn.encodings import PosEncoding, PosLearnedEncoding, TokenLearnedEncoding
  5 | 
  6 | 
  7 | class EncoderVL(nn.Module):
  8 |     def __init__(self, args):
  9 |         '''
 10 |         transformer encoder for language, frames and action inputs
 11 |         '''
 12 |         super(EncoderVL, self).__init__()
 13 | 
 14 |         # transofmer layers
 15 |         encoder_layer = nn.TransformerEncoderLayer(
 16 |             args.demb, args.encoder_heads, args.demb,
 17 |             args.dropout['transformer']['encoder'])
 18 |         self.enc_transformer = nn.TransformerEncoder(
 19 |             encoder_layer, args.encoder_layers)
 20 | 
 21 |         # how many last actions to attend to
 22 |         self.num_input_actions = args.num_input_actions
 23 | 
 24 |         # encodings
 25 |         self.enc_pos = PosEncoding(args.demb) if args.enc['pos'] else None
 26 |         self.enc_pos_learn = PosLearnedEncoding(args.demb) if args.enc['pos_learn'] else None
 27 |         self.enc_token = TokenLearnedEncoding(args.demb) if args.enc['token'] else None
 28 |         self.enc_layernorm = nn.LayerNorm(args.demb)
 29 |         self.enc_dropout = nn.Dropout(args.dropout['emb'], inplace=True)
 30 | 
 31 |     def forward(self,
 32 |                 emb_lang,
 33 |                 emb_frames,
 34 |                 emb_actions,
 35 |                 lengths_lang,
 36 |                 lengths_frames,
 37 |                 lengths_actions,
 38 |                 length_frames_max,
 39 |                 attn_masks=True):
 40 |         '''
 41 |         pass embedded inputs through embeddings and encode them using a transformer
 42 |         '''
 43 |         # emb_lang is processed on each GPU separately so they size can vary
 44 |         length_lang_max = lengths_lang.max().item()
 45 |         emb_lang = emb_lang[:, :length_lang_max]
 46 |         # create a mask for padded elements
 47 |         length_mask_pad = length_lang_max + length_frames_max * (
 48 |             2 if lengths_actions.max() > 0 else 1)
 49 |         mask_pad = torch.zeros(
 50 |             (len(emb_lang), length_mask_pad), device=emb_lang.device).bool()
 51 |         for i, (len_l, len_f, len_a) in enumerate(
 52 |                 zip(lengths_lang, lengths_frames, lengths_actions)):
 53 |             # mask padded words
 54 |             mask_pad[i, len_l: length_lang_max] = True
 55 |             # mask padded frames
 56 |             mask_pad[i, length_lang_max + len_f:
 57 |                       length_lang_max + length_frames_max] = True
 58 |             # mask padded actions
 59 |             mask_pad[i, length_lang_max + length_frames_max + len_a:] = True
 60 | 
 61 |         # encode the inputs
 62 |         emb_all = self.encode_inputs(
 63 |             emb_lang, emb_frames, emb_actions, lengths_lang, lengths_frames, mask_pad)
 64 | 
 65 |         # create a mask for attention (prediction at t should not see frames at >= t+1)
 66 |         if attn_masks:
 67 |             # assert length_frames_max == max(lengths_actions)
 68 |             mask_attn = model_util.generate_attention_mask(
 69 |                 length_lang_max, length_frames_max,
 70 |                 emb_all.device, self.num_input_actions)
 71 |         else:
 72 |             # allow every token to attend to all others
 73 |             mask_attn = torch.zeros(
 74 |                 (mask_pad.shape[1], mask_pad.shape[1]),
 75 |                 device=mask_pad.device).float()
 76 | 
 77 |         # encode the inputs
 78 |         output = self.enc_transformer(
 79 |             emb_all.transpose(0, 1), mask_attn, mask_pad).transpose(0, 1)
 80 |         return output, mask_pad
 81 | 
 82 |     def encode_inputs(self, emb_lang, emb_frames, emb_actions,
 83 |                       lengths_lang, lengths_frames, mask_pad):
 84 |         '''
 85 |         add encodings (positional, token and so on)
 86 |         '''
 87 |         if self.enc_pos is not None:
 88 |             emb_lang, emb_frames, emb_actions = self.enc_pos(
 89 |                 emb_lang, emb_frames, emb_actions, lengths_lang, lengths_frames)
 90 |         if self.enc_pos_learn is not None:
 91 |             emb_lang, emb_frames, emb_actions = self.enc_pos_learn(
 92 |                 emb_lang, emb_frames, emb_actions, lengths_lang, lengths_frames)
 93 |         if self.enc_token is not None:
 94 |             emb_lang, emb_frames, emb_actions = self.enc_token(
 95 |                 emb_lang, emb_frames, emb_actions)
 96 |         emb_cat = torch.cat((emb_lang, emb_frames, emb_actions), dim=1)
 97 |         emb_cat = self.enc_layernorm(emb_cat)
 98 |         emb_cat = self.enc_dropout(emb_cat)
 99 |         return emb_cat
100 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/nn/transforms.py:
--------------------------------------------------------------------------------
 1 | import numbers
 2 | import random
 3 | import math
 4 | import torch
 5 | 
 6 | from torchvision import transforms
 7 | 
 8 | 
 9 | class Transforms(object):
10 |     @staticmethod
11 |     def resize(img_size=224):
12 |         # expects a PIL Image
13 |         return transforms.Resize((img_size, img_size))
14 | 
15 |     @staticmethod
16 |     def affine(degree=5, translate=0.04, scale=0.02):
17 |         # expects a PIL Image
18 |         return transforms.RandomAffine(
19 |             degrees=(-degree, degree),
20 |             translate=(translate, translate),
21 |             scale=(1-scale, 1+scale),
22 |             shear=None)
23 | 
24 |     @staticmethod
25 |     def random_crop(img_size=224):
26 |         # expects a PIL Image
27 |         return transforms.RandomCrop((img_size, img_size))
28 | 
29 |     @staticmethod
30 |     def normalize():
31 |         # expects a PIL Image
32 |         return transforms.Compose([
33 |             transforms.ToTensor(),
34 |             transforms.Normalize(
35 |                 mean=[0.485, 0.456, 0.406],
36 |                 std=[0.229, 0.224, 0.225],
37 |             )
38 |         ])
39 | 
40 |     @staticmethod
41 |     def cutout(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0.):
42 |         # expects a tensor
43 |         return transforms.RandomErasing(
44 |             p=p, scale=scale, ratio=ratio, value=value)
45 | 
46 |     @staticmethod
47 |     def get_transform(transform='default'):
48 |         if transform == 'default':
49 |             return transforms.Compose([
50 |                 Transforms.resize(224),
51 |                 Transforms.normalize()])
52 | 
53 |         elif transform == 'none':
54 |             return transforms.ToTensor()
55 |         elif transform == 'crops':
56 |             return transforms.Compose([
57 |                 Transforms.resize(240),
58 |                 Transforms.random_crop(224),
59 |                 Transforms.normalize()])
60 |         elif transform == 'cutout':
61 |             return transforms.Compose([
62 |                 Transforms.resize(224),
63 |                 Transforms.normalize(),
64 |                 Transforms.cutout()])
65 |         elif transform == 'affine':
66 |             return transforms.Compose([
67 |                 Transforms.resize(224),
68 |                 Transforms.affine(),
69 |                 Transforms.normalize()])
70 |         elif transform == 'affine_crops':
71 |             return transforms.Compose([
72 |                 Transforms.resize(240),
73 |                 Transforms.random_crop(224),
74 |                 Transforms.affine(),
75 |                 Transforms.normalize()])
76 |         elif transform == 'affine_crops_cutout':
77 |             return transforms.Compose([
78 |                 Transforms.resize(240),
79 |                 Transforms.random_crop(224),
80 |                 Transforms.affine(),
81 |                 Transforms.normalize(),
82 |                 Transforms.cutout()])
83 |         elif transform == 'affine_cutout':
84 |             return transforms.Compose([
85 |                 Transforms.resize(224),
86 |                 Transforms.affine(),
87 |                 Transforms.normalize(),
88 |                 Transforms.cutout()])
89 |         else:
90 |             raise ValueError('Image augmentation {} is not implemented'.format(transform))
91 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/run_tests.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Run basic BabyAI level tests
 5 | Note: there are other automated tests in .circleci/config.yml
 6 | """
 7 | 
 8 | from babyai import levels
 9 | 
10 | # NOTE: please make sure that tests are always deterministic
11 | 
12 | print('Testing levels, mission generation')
13 | levels.test()
14 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/.gitignore:
--------------------------------------------------------------------------------
1 | *__pycache__
2 | *egg-info
3 | *.sh
4 | !run_slurm.sh


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/GPR.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | from sklearn.gaussian_process import GaussianProcessRegressor
 4 | from sklearn.gaussian_process.kernels import RBF
 5 | 
 6 | SR = np.array([0.73, 0.73, 0.73, 0.73,
 7 |                0.66, 0.66, 0.66, 0.66,
 8 |                0.56, 0.56, 0.56, 0.56,
 9 |                0.41, 0.41, 0.41, 0.41,
10 |                0.250, 0.250, 0.250, 0.250]).reshape(-1, 1)
11 | SE = np.array([0.560, 0.548, 0.573, 0.556,
12 |                0.555, 0.556, 0.568, 0.577,
13 |                0.557, 0.563, 0.529, 0.538,
14 |                0.501, 0.488, 0.452, 0.481,
15 |                0.192, 0.214, 0.206, 0.132]).reshape(-1, 1)
16 | print(SR)
17 | print(SE)
18 | kernel = RBF(length_scale_bounds=(1e-05, 100000.0))
19 | 
20 | """alpha_step = np.arange(1e-4, 5e-3, 2e-4)
21 | score = np.zeros_like(alpha_step)
22 | for a in range(len(alpha_step)):
23 |     gpr = GaussianProcessRegressor(alpha=alpha_step[a], kernel=kernel, random_state=0).fit(SR, SE)
24 |     score[a] = gpr.score(SR, SE)
25 | 
26 | plt.plot(alpha_step, score)
27 | plt.show()"""
28 | 
29 | gpr = GaussianProcessRegressor(alpha=0.0005, kernel=kernel, random_state=0).fit(SR, SE)
30 | print(gpr.score(SR, SE))
31 | SR_pred = np.arange(0.250, 0.73, 0.001)
32 | SE_mean, SE_std = gpr.predict(SR_pred.reshape(-1, 1), return_std=True)
33 | SE_mean = SE_mean.reshape(480, )
34 | plt.scatter(SR, SE, label="Observations")
35 | plt.plot(SR_pred, SE_mean)
36 | plt.fill_between(SR_pred,
37 |                  SE_mean + 1.96 * SE_std,
38 |                  SE_mean - 1.96 * SE_std,
39 |                  alpha=0.5,
40 |                  label=r"95% confidence interval")
41 | plt.ylabel("Sample Efficiency")
42 | plt.xlabel("Success rate of the QA")
43 | plt.legend()
44 | plt.show()
45 | 
46 | high_curve = SE_mean + 1.96 * SE_std
47 | valid_idx = np.where(high_curve >= 0.5)[0][0]
48 | print(SR_pred[valid_idx])
49 | 
50 | low_curve = SE_mean - 1.96 * SE_std
51 | valid_idx = np.where(low_curve >= 0.5)[0][0]
52 | print(SR_pred[valid_idx])
53 | """print(gpr.get_params(deep=True))
54 | SR_min = np.arange(0.250, 0.73, 0.001)
55 | proba_SR_min = []
56 | len_SR_min = len(SR_min)
57 | 
58 | print("proba inferior: {}".format(len(SE_pred[SE_pred < 0.5])/len(SE_pred)))
59 | 
60 | for i in range(1, 481):
61 |     proba_inferior = (len(SE_pred[SE_pred < 0.5])/len(SE_pred))**(i-1)
62 |     proba_superior = len(SE_pred[SE_pred > 0.5])/len(SE_pred)
63 |     proba_SR_min.append(proba_inferior*proba_superior)
64 | len(SR_min)
65 | print(len(proba_SR_min))
66 | plt.plot(SR_min, np.array(proba_SR_min))
67 | plt.show()"""
68 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/babyai/scripts/__init__.py


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/compute_possible_instructions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Compute the number of possible instructions in the BabyAI grammar.
 5 | """
 6 | 
 7 | from gym_minigrid.minigrid import COLOR_NAMES
 8 | 
 9 | def count_Sent():
10 |     return (
11 |         count_Sent1() +
12 |         # Sent1, then Sent1
13 |         count_Sent1() * count_Sent1() +
14 |         # Sent1 after you Sent1
15 |         count_Sent1() * count_Sent1()
16 |     )
17 | 
18 | def count_Sent1():
19 |     return (
20 |         count_Clause() +
21 |         # Clause and Clause
22 |         count_Clause() * count_Clause()
23 |     )
24 | 
25 | def count_Clause():
26 |     return (
27 |         # go to
28 |         count_Descr() +
29 |         # pick up
30 |         count_DescrNotDoor() +
31 |         # open
32 |         count_DescrDoor() +
33 |         # put next
34 |         count_DescrNotDoor() * count_Descr()
35 |     )
36 | 
37 | def count_DescrDoor():
38 |     # (the|a) Color door Location
39 |     return 2 * count_Color() * count_LocSpec()
40 | def count_DescrBall():
41 |     return count_DescrDoor()
42 | def count_DescrBox():
43 |     return count_DescrDoor()
44 | def count_DescrKey():
45 |     return count_DescrDoor()
46 | def count_Descr():
47 |     return count_DescrDoor() + count_DescrBall() + count_DescrBox() + count_DescrKey()
48 | def count_DescrNotDoor():
49 |     return count_DescrBall() + count_DescrBox() + count_DescrKey()
50 | 
51 | def count_Color():
52 |     # Empty string or color
53 |     return len([None] + COLOR_NAMES)
54 | 
55 | def count_LocSpec():
56 |     # Empty string or location
57 |     return len([None, 'left', 'right', 'front', 'behind'])
58 | 
59 | print('DescrKey: ', count_DescrKey())
60 | print('Descr: ', count_Descr())
61 | print('DescrNotDoor: ', count_DescrNotDoor())
62 | print('Clause: ', count_Clause())
63 | print('Sent1: ', count_Sent1())
64 | print('Sent: ', count_Sent())
65 | print('Sent: {:.3g}'.format(count_Sent()))
66 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/enjoy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Visualize the performance of a model on a given environment.
  5 | """
  6 | 
  7 | import argparse
  8 | import gym
  9 | import time
 10 | 
 11 | import babyai.utils as utils
 12 | 
 13 | # Parse arguments
 14 | 
 15 | parser = argparse.ArgumentParser()
 16 | parser.add_argument("--env", required=True,
 17 |                     help="name of the environment to be run (REQUIRED)")
 18 | parser.add_argument("--model", default=None,
 19 |                     help="name of the trained model (REQUIRED or --demos-origin or --demos REQUIRED)")
 20 | parser.add_argument("--demos", default=None,
 21 |                     help="demos filename (REQUIRED or --model demos-origin required)")
 22 | parser.add_argument("--demos-origin", default=None,
 23 |                     help="origin of the demonstrations: human | agent (REQUIRED or --model or --demos REQUIRED)")
 24 | parser.add_argument("--seed", type=int, default=None,
 25 |                     help="random seed (default: 0 if model agent, 1 if demo agent)")
 26 | parser.add_argument("--argmax", action="store_true", default=False,
 27 |                     help="action with highest probability is selected for model agent")
 28 | parser.add_argument("--pause", type=float, default=0.1,
 29 |                     help="the pause between two consequent actions of an agent")
 30 | parser.add_argument("--manual-mode", action="store_true", default=False,
 31 |                     help="Allows you to take control of the agent at any point of time")
 32 | 
 33 | args = parser.parse_args()
 34 | 
 35 | action_map = {
 36 |     "LEFT"   : "left",
 37 |     "RIGHT"  : "right",
 38 |     "UP"     : "forward",
 39 |     "PAGE_UP": "pickup",
 40 |     "PAGE_DOWN": "drop",
 41 |     "SPACE": "toggle"
 42 | }
 43 | 
 44 | assert args.model is not None or args.demos is not None, "--model or --demos must be specified."
 45 | if args.seed is None:
 46 |     args.seed = 0 if args.model is not None else 1
 47 | 
 48 | # Set seed for all randomness sources
 49 | 
 50 | utils.seed(args.seed)
 51 | 
 52 | # Generate environment
 53 | 
 54 | env = gym.make(args.env)
 55 | env.seed(args.seed)
 56 | 
 57 | global obs
 58 | obs = env.reset()
 59 | print("Mission: {}".format(obs["mission"]))
 60 | 
 61 | # Define agent
 62 | agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, args.env)
 63 | 
 64 | # Run the agent
 65 | 
 66 | done = True
 67 | 
 68 | action = None
 69 | 
 70 | def keyDownCb(keyName):
 71 |     global obs
 72 |     # Avoiding processing of observation by agent for wrong key clicks
 73 |     if keyName not in action_map and keyName != "RETURN":
 74 |         return
 75 | 
 76 |     agent_action = agent.act(obs)['action']
 77 | 
 78 |     if keyName in action_map:
 79 |         action = env.actions[action_map[keyName]]
 80 | 
 81 |     elif keyName == "RETURN":
 82 |         action = agent_action
 83 | 
 84 |     obs, reward, done, _ = env.step(action)
 85 |     agent.analyze_feedback(reward, done)
 86 |     if done:
 87 |         print("Reward:", reward)
 88 |         obs = env.reset()
 89 |         print("Mission: {}".format(obs["mission"]))
 90 | 
 91 | step = 0
 92 | episode_num = 0
 93 | while True:
 94 |     time.sleep(args.pause)
 95 |     renderer = env.render("human")
 96 |     if args.manual_mode and renderer.window is not None:
 97 |         renderer.window.setKeyDownCb(keyDownCb)
 98 |     else:
 99 |         result = agent.act(obs)
100 |         obs, reward, done, _ = env.step(result['action'])
101 |         agent.analyze_feedback(reward, done)
102 |         if 'dist' in result and 'value' in result:
103 |             dist, value = result['dist'], result['value']
104 |             dist_str = ", ".join("{:.4f}".format(float(p)) for p in dist.probs[0])
105 |             print("step: {}, mission: {}, dist: {}, entropy: {:.2f}, value: {:.2f}".format(
106 |                 step, obs["mission"], dist_str, float(dist.entropy()), float(value)))
107 |         else:
108 |             print("step: {}, mission: {}".format(step, obs['mission']))
109 |         if done:
110 |             print("Reward:", reward)
111 |             episode_num += 1
112 |             env.seed(args.seed + episode_num)
113 |             obs = env.reset()
114 |             agent.on_reset()
115 |             step = 0
116 |         else:
117 |             step += 1
118 | 
119 |     if renderer.window is None:
120 |         break
121 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/evaluate_all_demos.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Script to evaluate all available demos.
 3 | 
 4 | Assumes all demos (human and agent, except the "valid" ones)
 5 | are generated with seed 1
 6 | """
 7 | 
 8 | import os
 9 | from subprocess import call
10 | import sys
11 | 
12 | import babyai.utils as utils
13 | 
14 | folder = os.path.join(utils.storage_dir(), "demos")
15 | for filename in sorted(os.listdir(folder)):
16 |     if filename.endswith(".pkl") and 'valid' in filename:
17 |         env = 'BabyAI-BossLevel-v0'  # It doesn't really matter. The evaluation only considers the lengths of demos.
18 |         demo = filename[:-4]  # Remove the .pkl part of the name
19 | 
20 |         print("> Demos: {}".format(demo))
21 | 
22 |         command = ["python evaluate.py --env {} --demos {} --worst-episodes-to-show 0".format(env, demo)] + sys.argv[1:]
23 |         call(" ".join(command), shell=True)
24 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/evaluate_all_models.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Evaluate all models in a storage directory.
 3 | 
 4 | In order to use this script make sure to add baby-ai-game/scripts to the $PATH
 5 | environment variable.
 6 | 
 7 | Sample usage:
 8 | evaluate_all_models.py --episodes 200 --argmax
 9 | """
10 | 
11 | import os
12 | from subprocess import call
13 | import sys
14 | 
15 | import babyai.utils as utils
16 | from babyai.levels import level_dict
17 | import re
18 | 
19 | # List of all levels ordered by length of the level name from longest to shortest
20 | LEVELS = sorted(list(level_dict.keys()), key=len)[::-1]
21 | 
22 | 
23 | def get_levels_from_model_name(model):
24 |     levels = []
25 |     # Assume that our model names are separated with _ or -
26 |     model_name_parts = re.split('_|-', model)
27 |     for part in model_name_parts:
28 |         # Assume that each part contains at most one level name.
29 |         # Sorting LEVELS using length of level name is to avoid scenarios like
30 |         # extracting 'GoTo' from the model name 'GoToLocal-model'
31 |         for level in LEVELS:
32 |             if level in part:
33 |                 levels.append('BabyAI-{}-v0'.format(level))
34 |                 break
35 |     return list(set(levels))
36 | 
37 | 
38 | folder = os.path.join(utils.storage_dir(), "models")
39 | 
40 | for model in sorted(os.listdir(folder)):
41 |     if model.startswith('.'):
42 |         continue
43 |     envs = get_levels_from_model_name(model)
44 |     print("> Envs: {} > Model: {}".format(envs, model))
45 |     for env in envs:
46 |         command = ["evaluate.py --env {} --model {}".format(env, model)] + sys.argv[1:]
47 |         print("Command: {}".format(" ".join(command)))
48 |         call(" ".join(command), shell=True)
49 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/learn_baseline_model.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A reimplmentation of the LEARN model (Goyal et al., 2019)
 3 | """
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from torch.autograd import Variable
 9 | 
10 | 
11 | def initialize_parameters(m):
12 |     classname = m.__class__.__name__
13 |     if classname.find('Linear') != -1:
14 |         torch.nn.init.xavier_uniform_(m.weight)
15 |         if m.bias is not None:
16 |             m.bias.data.fill_(0.1)
17 | 
18 | 
19 | class LEARNBaselineModel(nn.Module):
20 | 
21 |     def __init__(self, obs_space, arch="learn", lang_model="gru", instr_dim=128, action_dim=128, hidden_dim=128, dropout=0):
22 |         super().__init__()
23 |         
24 |         self.arch = arch
25 |         self.lang_model = lang_model
26 |         self.instr_dim = instr_dim
27 |         self.action_dim = action_dim
28 |         self.hidden_dim = hidden_dim
29 | 
30 |         if self.lang_model in ['gru']:
31 |             self.word_embedding = nn.Embedding(obs_space["instr"], self.instr_dim)
32 |             gru_dim = self.instr_dim
33 |             self.instr_rnn = nn.GRU(
34 |                 self.instr_dim, gru_dim, num_layers=2,
35 |                 batch_first=True,
36 |                 bidirectional=False
37 |             )
38 | 
39 |         action_input_sizes = [obs_space['num_actions'], self.hidden_dim, self.hidden_dim]
40 |         action_output_sizes = [self.hidden_dim, self.hidden_dim, self.action_dim]
41 |         self.action_mlp = self.mlp(action_input_sizes, action_output_sizes, dropout=dropout)
42 | 
43 |         cls_input_sizes = [self.action_dim + self.instr_dim, self.hidden_dim, self.hidden_dim]
44 |         cls_output_sizes = [self.hidden_dim, self.hidden_dim, 2]
45 |         self.classification_mlp = self.mlp(cls_input_sizes, cls_output_sizes, dropout=dropout)
46 |         
47 |         self.apply(initialize_parameters)
48 |     
49 |     def mlp(self, in_dim, out_dim, dropout=0, n_layers=3):
50 |         layers = []
51 |         for l in range(n_layers - 1):
52 |             layers.extend([nn.Linear(in_dim[l], out_dim[l]),
53 |                            nn.ReLU(),
54 |                            nn.BatchNorm1d(out_dim[l]),
55 |                            nn.Dropout(dropout)])
56 |         layers.extend([nn.Linear(in_dim[-1], out_dim[-1])])
57 |         return nn.Sequential(*layers)    
58 | 
59 |     def forward(self, missions, action_frequencies):
60 |         action_enc = self.action_mlp(action_frequencies)
61 |         text_enc = self._get_instr_embedding(missions)
62 |         action_text = torch.cat((action_enc, text_enc,), dim=-1)
63 |         
64 |         logits = self.classification_mlp(action_text)
65 | 
66 |         preds = torch.argmax(logits, axis=-1)
67 |         return preds, logits
68 | 
69 |     def _get_instr_embedding(self, instr):
70 |         lengths = (instr != 0).sum(1).long()
71 |         if self.lang_model == 'gru':
72 |             out, _ = self.instr_rnn(self.word_embedding(instr))
73 |             hidden = out[range(len(lengths)), lengths-1, :]
74 |             return hidden


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/manual_control.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import time
  4 | import argparse
  5 | import numpy as np
  6 | import gym
  7 | import gym_minigrid
  8 | from gym_minigrid.wrappers import *
  9 | from gym_minigrid.window import Window
 10 | import babyai
 11 | 
 12 | def redraw(img):
 13 |     if not args.agent_view:
 14 |         img = env.render('rgb_array', tile_size=args.tile_size)
 15 | 
 16 |     window.show_img(img)
 17 | 
 18 | def reset():
 19 |     if args.seed != -1:
 20 |         env.seed(args.seed)
 21 | 
 22 |     obs = env.reset()
 23 | 
 24 |     if hasattr(env, 'mission'):
 25 |         print('Mission: %s' % env.mission)
 26 |         window.set_caption(env.mission)
 27 | 
 28 |     redraw(obs)
 29 | 
 30 | def step(action):
 31 |     obs, reward, done, info = env.step(action)
 32 |     print('step=%s, reward=%.2f' % (env.step_count, reward))
 33 | 
 34 |     if done:
 35 |         print('done!')
 36 |         reset()
 37 |     else:
 38 |         redraw(obs)
 39 | 
 40 | def key_handler(event):
 41 |     print('pressed', event.key)
 42 | 
 43 |     if event.key == 'escape':
 44 |         window.close()
 45 |         return
 46 | 
 47 |     if event.key == 'backspace':
 48 |         reset()
 49 |         return
 50 | 
 51 |     if event.key == 'left':
 52 |         step(env.actions.left)
 53 |         return
 54 |     if event.key == 'right':
 55 |         step(env.actions.right)
 56 |         return
 57 |     if event.key == 'up':
 58 |         step(env.actions.forward)
 59 |         return
 60 | 
 61 |     # Spacebar
 62 |     if event.key == ' ':
 63 |         step(env.actions.toggle)
 64 |         return
 65 |     if event.key == 'pageup':
 66 |         step(env.actions.pickup)
 67 |         return
 68 |     if event.key == 'pagedown':
 69 |         step(env.actions.drop)
 70 |         return
 71 | 
 72 |     if event.key == 'enter':
 73 |         step(env.actions.done)
 74 |         return
 75 | 
 76 | parser = argparse.ArgumentParser()
 77 | parser.add_argument(
 78 |     "--env",
 79 |     help="gym environment to load",
 80 |     default='BabyAI-BossLevel-v0'
 81 | )
 82 | parser.add_argument(
 83 |     "--seed",
 84 |     type=int,
 85 |     help="random seed to generate the environment with",
 86 |     default=-1
 87 | )
 88 | parser.add_argument(
 89 |     "--tile_size",
 90 |     type=int,
 91 |     help="size at which to render tiles",
 92 |     default=32
 93 | )
 94 | parser.add_argument(
 95 |     '--agent_view',
 96 |     default=False,
 97 |     help="draw the agent sees (partially observable view)",
 98 |     action='store_true'
 99 | )
100 | 
101 | args = parser.parse_args()
102 | 
103 | env = gym.make(args.env)
104 | 
105 | if args.agent_view:
106 |     env = RGBImgPartialObsWrapper(env)
107 |     env = ImgObsWrapper(env)
108 | 
109 | window = Window('gym_minigrid - ' + args.env)
110 | window.reg_key_handler(key_handler)
111 | 
112 | reset()
113 | 
114 | # Blocking event loop
115 | window.show(block=True)
116 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/result_l_class_study.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pickle as pkl
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | 
 7 | def learning_curves(name_env, model_number):
 8 |     print("======== env:{} model:{}=======".format(name_env, model_number))
 9 |     log = pkl.load(open('storage/models/' + name_env + '/' + 'model_{}'.format(model_number) + '/log.pkl', "rb"))
10 | 
11 |     train_error = np.array(log["loss_cross_entropy_train"])
12 |     success_rate_train = np.array(log["success_pred_train"])
13 |     valid_error = np.array(log["loss_cross_entropy_valid"])
14 |     success_rate_valid = np.array(log["success_pred_valid"])
15 | 
16 |     print('At epoch {} the CE error for train reach the minimum value of {}'.format(np.argmin(train_error),
17 |                                                                                     min(train_error)))
18 |     print(train_error)
19 |     print(" ")
20 |     print('At epoch {} the CE error for valid reach the minimum value of {}'.format(np.argmin(valid_error),
21 |                                                                                     min(valid_error)))
22 |     print(valid_error)
23 |     print(" ")
24 |     print('At epoch {} the success rate for train reach the maximum value of {}'.format(np.argmax(success_rate_train),
25 |                                                                                         max(success_rate_train)))
26 |     print(success_rate_train)
27 |     print(" ")
28 |     print('At epoch {} the success rate for valid reach the maximum value of {}'.format(np.argmax(success_rate_valid),
29 |                                                                                         max(success_rate_valid)))
30 |     print(success_rate_valid)
31 | 
32 |     """plt.plot(np.arange(len(train_error)), train_error)
33 |     plt.title("Train error")
34 |     plt.grid(axis='both')
35 |     plt.show()
36 |     plt.plot(np.arange(len(valid_error)), valid_error)
37 |     plt.title("Valid error")
38 |     plt.grid(axis='both')
39 |     plt.show()
40 |     plt.plot(np.arange(len(success_rate_train)), success_rate_train)
41 |     plt.title("Success rate train set")
42 |     plt.grid(axis='both')
43 |     plt.show()
44 |     plt.plot(np.arange(len(success_rate_valid)), success_rate_valid)
45 |     plt.title("Success rate valid set")
46 |     plt.grid(axis='both')
47 |     plt.show()
48 | """
49 | 
50 | 
51 | 
52 | 
53 | learning_curves('BabyAI-PutNextLocal-v0_no_answer_l_class', 0)
54 | 
55 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/show_level_instructions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Randomly sample and print out instructions from a level.
 3 | """
 4 | 
 5 | import argparse
 6 | 
 7 | import babyai
 8 | import gym
 9 | 
10 | 
11 | parser = argparse.ArgumentParser("Show level instructions")
12 | parser.add_argument("--n-episodes", type=int, default=10000,
13 |                     help="Collect instructions from this many episodes")
14 | parser.add_argument("level",
15 |                     help="The level of interest")
16 | args = parser.parse_args()
17 | 
18 | env = gym.make(args.level)
19 | instructions = set(env.reset()['mission'] for i in range(args.n_episodes))
20 | for instr in sorted(instructions):
21 |     print(instr)
22 | 


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/subtask_prediction_model.py:
--------------------------------------------------------------------------------
 1 | """
 2 | """
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | from torch.autograd import Variable
 8 | 
 9 | 
10 | def initialize_parameters(m):
11 |     classname = m.__class__.__name__
12 |     if classname.find('Linear') != -1:
13 |         m.weight.data.normal_(0, 1)
14 |         m.weight.data *= 1 / torch.sqrt(m.weight.data.pow(2).sum(1, keepdim=True))
15 |         if m.bias is not None:
16 |             m.bias.data.fill_(0)
17 | 
18 | 
19 | class SubtaskPredictionModel(nn.Module):
20 | 
21 |     def __init__(self, obs_space, arch="siamese", lang_model="gru", instr_dim=128):
22 |         super().__init__()
23 |         
24 |         self.arch = arch
25 |         self.lang_model = lang_model
26 |         self.instr_dim = instr_dim
27 | 
28 |         if self.lang_model in ['gru']:
29 |             self.word_embedding = nn.Embedding(obs_space["instr"], self.instr_dim)
30 |             gru_dim = self.instr_dim
31 |             self.instr_rnn = nn.GRU(
32 |                 self.instr_dim, gru_dim, batch_first=True,
33 |                 bidirectional=False
34 |             )
35 | 
36 |         self.fc1 = nn.Linear(self.instr_dim, self.instr_dim // 2)
37 |         self.fc2 = nn.Linear(self.instr_dim, self.instr_dim // 2)
38 |         self.dropout1 = nn.Dropout(0.1)
39 |         self.dropout2 = nn.Dropout(0.1)
40 |         self.fc3 = nn.Linear(self.instr_dim, self.instr_dim // 2)
41 |         self.fc4 = nn.Linear(self.instr_dim // 2, 1)
42 |         
43 |         self.sigmoid = nn.Sigmoid()
44 |         
45 |         self.apply(initialize_parameters)
46 |     
47 |     def forward(self, missions, subtasks):
48 |         if self.arch == "siamese":
49 |             mission_embedding = self._get_instr_embedding(missions)
50 |             subtask_embedding = self._get_instr_embedding(subtasks)
51 | 
52 |             mission_embedding = self.dropout1(self.fc1(mission_embedding))
53 |             subtask_embedding = self.dropout2(self.fc2(subtask_embedding))
54 | 
55 |             both_embeddings = torch.cat((mission_embedding, subtask_embedding), dim=-1)
56 |             both_embeddings = self.fc3(both_embeddings)
57 | 
58 |             logits = self.fc4(both_embeddings)
59 |             preds = self.sigmoid(logits).squeeze(-1)
60 |         elif self.arch == "siamese-l1":
61 |             mission_embedding = self._get_instr_embedding(missions)
62 |             subtask_embedding = self._get_instr_embedding(subtasks)
63 |             
64 |             mission_embedding = self.fc1(mission_embedding)
65 |             subtask_embedding = self.fc2(subtask_embedding)
66 | 
67 |             dist = torch.norm(mission_embedding - subtask_embedding, p=1, dim=1)
68 |             
69 |             preds = torch.exp(-dist)
70 |             
71 |         return preds
72 | 
73 |     def _get_instr_embedding(self, instr):
74 |         lengths = (instr != 0).sum(1).long()
75 |         if self.lang_model == 'gru':
76 |             out, _ = self.instr_rnn(self.word_embedding(instr))
77 |             hidden = out[range(len(lengths)), lengths-1, :]
78 |             return hidden


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/train_learn_baseline_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Training code for the LEARN model (Goyal et al., 2019)
 5 | """
 6 | 
 7 | import os
 8 | import csv
 9 | import copy
10 | import gym
11 | import time
12 | import datetime
13 | import numpy as np
14 | import sys
15 | import logging
16 | import torch
17 | import wandb
18 | from babyai.arguments import ArgumentParser
19 | import babyai.utils as utils
20 | 
21 | from learn_baseline import LEARNBaseline
22 | from babyai.arguments import ArgumentParser
23 | import babyai.utils as utils
24 | 
25 | 
26 | parser = ArgumentParser()
27 | 
28 | parser.add_argument("--demos", default=None,
29 |                     help="demos filename (REQUIRED or demos-origin or multi-demos required)")
30 | parser.add_argument("--demos-origin", required=False,
31 |                     help="origin of the demonstrations: human | agent (REQUIRED or demos or multi-demos required)")
32 | parser.add_argument("--episodes", type=int, default=0,
33 |                     help="number of high-level episodes of demonstrations to use"
34 |                         "(default: 0, meaning all demos)")
35 | parser.add_argument("--save-interval", type=int, default=1,
36 |                     help="number of epochs between two saves (default: 1, 0 means no saving)")
37 | 
38 | 
39 | def main(args):
40 | 
41 |     args.model = args.model or LEARNBaseline.default_model_name(args)
42 |     utils.configure_logging(args.model)
43 |     logger = logging.getLogger(__name__)
44 | 
45 |     learn_baseline = LEARNBaseline(args)
46 | 
47 |     header = (["update", "frames", "fps", "duration", "train_loss", "train_accuracy", "train_precision", "train_recall"] 
48 |         + ["validation_loss", "validation_accuracy", "validation_precision", "validation_recall"])
49 | 
50 |     writer = None
51 |     if args.wb:
52 |         wandb.init(project="ella", name=args.model)
53 |         wandb.config.update(args)
54 |         writer = wandb
55 |     
56 |     # Define csv writer
57 |     csv_writer = None
58 |     csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
59 |     first_created = not os.path.exists(csv_path)
60 |     # we don't buffer data going in the csv log, cause we assume
61 |     # that one update will take much longer that one write to the log
62 |     csv_writer = csv.writer(open(csv_path, 'a', 1))
63 |     if first_created:
64 |         csv_writer.writerow(header)
65 | 
66 |     # Get the status path
67 |     status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
68 | 
69 |     # Log command, availability of CUDA, and model
70 |     logger.info(args)
71 |     logger.info("CUDA available: {}".format(torch.cuda.is_available()))
72 |     logger.info(learn_baseline.model)
73 | 
74 |     learn_baseline.train(learn_baseline.train_demos, writer, csv_writer, status_path, header)
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     args = parser.parse_args()
79 |     main(args)


--------------------------------------------------------------------------------
/babyai-text/babyai/scripts/train_subtask_prediction_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Pre-training code for the subtask prediction model (relevance classifier).
 5 | """
 6 | 
 7 | import os
 8 | import csv
 9 | import copy
10 | import gym
11 | import time
12 | import datetime
13 | import numpy as np
14 | import sys
15 | import logging
16 | import torch
17 | from babyai.arguments import ArgumentParser
18 | import babyai.utils as utils
19 | 
20 | from subtask_prediction import SubtaskPrediction
21 | from babyai.arguments import ArgumentParser
22 | import babyai.utils as utils
23 | 
24 | 
25 | parser = ArgumentParser()
26 | 
27 | parser.add_argument("--demos", default=None,
28 |                     help="demos filename (REQUIRED or demos-origin or multi-demos required)")
29 | parser.add_argument("--demos-origin", required=False,
30 |                     help="origin of the demonstrations: human | agent (REQUIRED or demos or multi-demos required)")
31 | parser.add_argument("--episodes", type=int, default=0,
32 |                     help="number of high-level episodes of demonstrations to use"
33 |                         "(default: 0, meaning all demos)")
34 | parser.add_argument("--low-level-demos", default=None,
35 |                     help="low-level demos filename")
36 | parser.add_argument("--ll-episodes", type=int, default=0,
37 |                     help="number of low-level episodes of demonstrations to use"
38 |                         "(default: 0, meaning all demos)")
39 | parser.add_argument("--save-interval", type=int, default=1,
40 |                     help="number of epochs between two saves (default: 1, 0 means no saving)")
41 | parser.add_argument("--denoise", action="store_true",
42 |                     help="whether or not to denoise the data")
43 | parser.add_argument("--denoise-k", type=int, default=1,
44 |                     help="how many examples of each instruction to use")
45 | parser.add_argument("--denoise-total", type=int, default=100,
46 |                     help="total number of instructions in the denoised dataset")
47 | parser.add_argument("--augment", action="store_true",
48 |                     help="whether or not to augment the data")
49 | parser.add_argument("--augment-total", type=int, default=100,
50 |                     help="total number of instructions in the augmented dataset")
51 | parser.add_argument("--wait-finetune", type=int, default=50,
52 |                     help="how long to wait to fine-tune")
53 | parser.add_argument("--ones", action="store_true", default=False,
54 |                     help="whether to ignore labels")
55 | 
56 | def main(args):
57 | 
58 |     args.model = args.model or SubtaskPrediction.default_model_name(args)
59 |     utils.configure_logging(args.model)
60 |     logger = logging.getLogger(__name__)
61 | 
62 |     subtask_prediction = SubtaskPrediction(args)
63 | 
64 |     header = (["update", "frames", "fps", "duration", "train_loss", "train_accuracy", "train_precision", "train_recall"]
65 |               + ["validation_loss", "validation_accuracy"]
66 |               + ["ground_truth_validation_accuracy", "ground_truth_validation_precision", "ground_truth_validation_recall"])
67 | 
68 |     writer = None
69 |     if args.wb:
70 |         import wandb
71 |         wandb.init(project="ella")
72 |         wandb.config.update(args)
73 |         writer = wandb
74 |     
75 |     # Define csv writer
76 |     csv_writer = None
77 |     csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
78 |     first_created = not os.path.exists(csv_path)
79 |     # we don't buffer data going in the csv log, cause we assume
80 |     # that one update will take much longer that one write to the log
81 |     csv_writer = csv.writer(open(csv_path, 'a', 1))
82 |     if first_created:
83 |         csv_writer.writerow(header)
84 | 
85 |     # Get the status path
86 |     status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
87 | 
88 |     # Log command, availability of CUDA, and model
89 |     logger.info(args)
90 |     logger.info("CUDA available: {}".format(torch.cuda.is_available()))
91 |     logger.info(subtask_prediction.model)
92 | 
93 |     subtask_prediction.train(subtask_prediction.train_demos, writer, csv_writer, status_path, header)
94 | 
95 | 
96 | if __name__ == "__main__":
97 |     args = parser.parse_args()
98 |     main(args)


--------------------------------------------------------------------------------
/babyai-text/babyai/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='babyai',
 5 |     version='0.1.0',
 6 |     license='BSD 3-clause',
 7 |     keywords='memory, environment, agent, rl, openaigym, openai-gym, gym',
 8 |     packages=['babyai', 'babyai.levels', 'babyai.utils', 'babyai.rl'],
 9 |     install_requires=[
10 |         'gym>=0.9.6,<0.26.2',
11 |         'numpy>=1.17.0',
12 |         "torch>=0.4.1",
13 |         'blosc>=1.5.1',
14 |         # 'gym_minigrid @ https://github.com/maximecb/gym-minigrid/archive/master.zip'
15 |     ],
16 | )
17 | 


--------------------------------------------------------------------------------
/babyai-text/babyai_text/__init__.py:
--------------------------------------------------------------------------------
1 | from .levels.mixed_seq_levels import *


--------------------------------------------------------------------------------
/babyai-text/babyai_text/levels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/babyai_text/levels/__init__.py


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *__pycache__
3 | gym_minigrid.egg-info
4 | trained_models
5 | 
6 | # PyPI
7 | build/*
8 | dist/*
9 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.5"
 4 | 
 5 | # command to install dependencies
 6 | install:
 7 |   - pip3 install -e .
 8 | 
 9 | # command to run tests
10 | script: ./run_tests.py
11 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/benchmark.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time
 4 | import argparse
 5 | from gym_minigrid.wrappers import *
 6 | 
 7 | parser = argparse.ArgumentParser()
 8 | parser.add_argument(
 9 |     "--env-name",
10 |     dest="env_name",
11 |     help="gym environment to load",
12 |     default='MiniGrid-LavaGapS7-v0'
13 | )
14 | parser.add_argument("--num_resets", default=200)
15 | parser.add_argument("--num_frames", default=5000)
16 | args = parser.parse_args()
17 | 
18 | env = gym.make(args.env_name)
19 | 
20 | # Benchmark env.reset
21 | t0 = time.time()
22 | for i in range(args.num_resets):
23 |     env.reset()
24 | t1 = time.time()
25 | dt = t1 - t0
26 | reset_time = (1000 * dt) / args.num_resets
27 | 
28 | # Benchmark rendering
29 | t0 = time.time()
30 | for i in range(args.num_frames):
31 |     env.render('rgb_array')
32 | t1 = time.time()
33 | dt = t1 - t0
34 | frames_per_sec = args.num_frames / dt
35 | 
36 | # Create an environment with an RGB agent observation
37 | env = gym.make(args.env_name)
38 | env = RGBImgPartialObsWrapper(env)
39 | env = ImgObsWrapper(env)
40 | 
41 | # Benchmark rendering
42 | t0 = time.time()
43 | for i in range(args.num_frames):
44 |     obs, reward, done, info = env.step(0)
45 | t1 = time.time()
46 | dt = t1 - t0
47 | agent_view_fps = args.num_frames / dt
48 | 
49 | print('Env reset time: {:.1f} ms'.format(reset_time))
50 | print('Rendering FPS : {:.0f}'.format(frames_per_sec))
51 | print('Agent view FPS: {:.0f}'.format(agent_view_fps))
52 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/BlockedUnlockPickup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/BlockedUnlockPickup.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/DistShift1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/DistShift1.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/DistShift2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/DistShift2.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/KeyCorridorS3R1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS3R1.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/KeyCorridorS3R2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS3R2.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/KeyCorridorS3R3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS3R3.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/KeyCorridorS4R3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS4R3.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/KeyCorridorS5R3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS5R3.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/KeyCorridorS6R3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/KeyCorridorS6R3.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/LavaCrossingS11N5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/LavaCrossingS11N5.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/LavaCrossingS9N1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/LavaCrossingS9N1.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/LavaCrossingS9N2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/LavaCrossingS9N2.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/LavaCrossingS9N3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/LavaCrossingS9N3.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/LavaGapS6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/LavaGapS6.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/ObstructedMaze-1Dl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-1Dl.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/ObstructedMaze-1Dlh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-1Dlh.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/ObstructedMaze-1Dlhb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-1Dlhb.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/ObstructedMaze-1Q.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-1Q.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/ObstructedMaze-2Dl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-2Dl.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/ObstructedMaze-2Dlh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-2Dlh.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/ObstructedMaze-2Dlhb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-2Dlhb.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/ObstructedMaze-2Q.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-2Q.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/ObstructedMaze-4Q.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/ObstructedMaze-4Q.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/SimpleCrossingS11N5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/SimpleCrossingS11N5.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/SimpleCrossingS9N1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/SimpleCrossingS9N1.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/SimpleCrossingS9N2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/SimpleCrossingS9N2.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/SimpleCrossingS9N3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/SimpleCrossingS9N3.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/Unlock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/Unlock.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/UnlockPickup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/UnlockPickup.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/door-key-curriculum.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/door-key-curriculum.gif


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/door-key-env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/door-key-env.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/dynamic_obstacles.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/dynamic_obstacles.gif


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/empty-env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/empty-env.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/fetch-env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/fetch-env.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/four-rooms-env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/four-rooms-env.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/gotodoor-6x6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/gotodoor-6x6.mp4


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/gotodoor-6x6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/gotodoor-6x6.png


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/figures/multi-room.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/gym-minigrid/figures/multi-room.gif


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/__init__.py:
--------------------------------------------------------------------------------
1 | # Import the envs module so that envs register themselves
2 | import gym_minigrid.envs
3 | 
4 | # Import wrappers so it's accessible when installing with pip
5 | import gym_minigrid.wrappers
6 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.envs.empty import *
 2 | from gym_minigrid.envs.doorkey import *
 3 | from gym_minigrid.envs.multiroom import *
 4 | from gym_minigrid.envs.fetch import *
 5 | from gym_minigrid.envs.gotoobject import *
 6 | from gym_minigrid.envs.gotodoor import *
 7 | from gym_minigrid.envs.putnear import *
 8 | from gym_minigrid.envs.lockedroom import *
 9 | from gym_minigrid.envs.keycorridor import *
10 | from gym_minigrid.envs.unlock import *
11 | from gym_minigrid.envs.unlockpickup import *
12 | from gym_minigrid.envs.blockedunlockpickup import *
13 | from gym_minigrid.envs.playground_v0 import *
14 | from gym_minigrid.envs.redbluedoors import *
15 | from gym_minigrid.envs.obstructedmaze import *
16 | from gym_minigrid.envs.memory import *
17 | from gym_minigrid.envs.fourrooms import *
18 | from gym_minigrid.envs.crossing import *
19 | from gym_minigrid.envs.lavagap import *
20 | from gym_minigrid.envs.dynamicobstacles import *
21 | from gym_minigrid.envs.distshift import *
22 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/blockedunlockpickup.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import Ball
 2 | from gym_minigrid.roomgrid import RoomGrid
 3 | from gym_minigrid.register import register
 4 | 
 5 | class BlockedUnlockPickup(RoomGrid):
 6 |     """
 7 |     Unlock a door blocked by a ball, then pick up a box
 8 |     in another room
 9 |     """
10 | 
11 |     def __init__(self, seed=None):
12 |         room_size = 6
13 |         super().__init__(
14 |             num_rows=1,
15 |             num_cols=2,
16 |             room_size=room_size,
17 |             max_steps=16*room_size**2,
18 |             seed=seed
19 |         )
20 | 
21 |     def _gen_grid(self, width, height):
22 |         super()._gen_grid(width, height)
23 | 
24 |         # Add a box to the room on the right
25 |         obj, _ = self.add_object(1, 0, kind="box")
26 |         # Make sure the two rooms are directly connected by a locked door
27 |         door, pos = self.add_door(0, 0, 0, locked=True)
28 |         # Block the door with a ball
29 |         color = self._rand_color()
30 |         self.grid.set(pos[0]-1, pos[1], Ball(color))
31 |         # Add a key to unlock the door
32 |         self.add_object(0, 0, 'key', door.color)
33 | 
34 |         self.place_agent(0, 0)
35 | 
36 |         self.obj = obj
37 |         self.mission = "pick up the %s %s" % (obj.color, obj.type)
38 | 
39 |     def step(self, action):
40 |         obs, reward, done, info = super().step(action)
41 | 
42 |         if action == self.actions.pickup:
43 |             if self.carrying and self.carrying == self.obj:
44 |                 reward = self._reward()
45 |                 done = True
46 | 
47 |         return obs, reward, done, info
48 | 
49 | register(
50 |     id='MiniGrid-BlockedUnlockPickup-v0',
51 |     entry_point='gym_minigrid.envs:BlockedUnlockPickup'
52 | )
53 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/distshift.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class DistShiftEnv(MiniGridEnv):
 5 |     """
 6 |     Distributional shift environment.
 7 |     """
 8 | 
 9 |     def __init__(
10 |         self,
11 |         width=9,
12 |         height=7,
13 |         agent_start_pos=(1,1),
14 |         agent_start_dir=0,
15 |         strip2_row=2
16 |     ):
17 |         self.agent_start_pos = agent_start_pos
18 |         self.agent_start_dir = agent_start_dir
19 |         self.goal_pos = (width-2, 1)
20 |         self.strip2_row = strip2_row
21 | 
22 |         super().__init__(
23 |             width=width,
24 |             height=height,
25 |             max_steps=4*width*height,
26 |             # Set this to True for maximum speed
27 |             see_through_walls=True
28 |         )
29 | 
30 |     def _gen_grid(self, width, height):
31 |         # Create an empty grid
32 |         self.grid = Grid(width, height)
33 | 
34 |         # Generate the surrounding walls
35 |         self.grid.wall_rect(0, 0, width, height)
36 | 
37 |         # Place a goal square in the bottom-right corner
38 |         self.put_obj(Goal(), *self.goal_pos)
39 | 
40 |         # Place the lava rows
41 |         for i in range(self.width - 6):
42 |             self.grid.set(3+i, 1, Lava())
43 |             self.grid.set(3+i, self.strip2_row, Lava())
44 | 
45 |         # Place the agent
46 |         if self.agent_start_pos is not None:
47 |             self.agent_pos = self.agent_start_pos
48 |             self.agent_dir = self.agent_start_dir
49 |         else:
50 |             self.place_agent()
51 | 
52 |         self.mission = "get to the green goal square"
53 | 
54 | class DistShift1(DistShiftEnv):
55 |     def __init__(self):
56 |         super().__init__(strip2_row=2)
57 | 
58 | class DistShift2(DistShiftEnv):
59 |     def __init__(self):
60 |         super().__init__(strip2_row=5)
61 | 
62 | register(
63 |     id='MiniGrid-DistShift1-v0',
64 |     entry_point='gym_minigrid.envs:DistShift1'
65 | )
66 | 
67 | register(
68 |     id='MiniGrid-DistShift2-v0',
69 |     entry_point='gym_minigrid.envs:DistShift2'
70 | )
71 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/doorkey.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class DoorKeyEnv(MiniGridEnv):
 5 |     """
 6 |     Environment with a door and key, sparse reward
 7 |     """
 8 | 
 9 |     def __init__(self, size=8):
10 |         super().__init__(
11 |             grid_size=size,
12 |             max_steps=10*size*size
13 |         )
14 | 
15 |     def _gen_grid(self, width, height):
16 |         # Create an empty grid
17 |         self.grid = Grid(width, height)
18 | 
19 |         # Generate the surrounding walls
20 |         self.grid.wall_rect(0, 0, width, height)
21 | 
22 |         # Place a goal in the bottom-right corner
23 |         self.put_obj(Goal(), width - 2, height - 2)
24 | 
25 |         # Create a vertical splitting wall
26 |         splitIdx = self._rand_int(2, width-2)
27 |         self.grid.vert_wall(splitIdx, 0)
28 | 
29 |         # Place the agent at a random position and orientation
30 |         # on the left side of the splitting wall
31 |         self.place_agent(size=(splitIdx, height))
32 | 
33 |         # Place a door in the wall
34 |         doorIdx = self._rand_int(1, width-2)
35 |         self.put_obj(Door('yellow', is_locked=True), splitIdx, doorIdx)
36 | 
37 |         # Place a yellow key on the left side
38 |         self.place_obj(
39 |             obj=Key('yellow'),
40 |             top=(0, 0),
41 |             size=(splitIdx, height)
42 |         )
43 | 
44 |         self.mission = "use the key to open the door and then get to the goal"
45 | 
46 | class DoorKeyEnv5x5(DoorKeyEnv):
47 |     def __init__(self):
48 |         super().__init__(size=5)
49 | 
50 | class DoorKeyEnv6x6(DoorKeyEnv):
51 |     def __init__(self):
52 |         super().__init__(size=6)
53 | 
54 | class DoorKeyEnv16x16(DoorKeyEnv):
55 |     def __init__(self):
56 |         super().__init__(size=16)
57 | 
58 | register(
59 |     id='MiniGrid-DoorKey-5x5-v0',
60 |     entry_point='gym_minigrid.envs:DoorKeyEnv5x5'
61 | )
62 | 
63 | register(
64 |     id='MiniGrid-DoorKey-6x6-v0',
65 |     entry_point='gym_minigrid.envs:DoorKeyEnv6x6'
66 | )
67 | 
68 | register(
69 |     id='MiniGrid-DoorKey-8x8-v0',
70 |     entry_point='gym_minigrid.envs:DoorKeyEnv'
71 | )
72 | 
73 | register(
74 |     id='MiniGrid-DoorKey-16x16-v0',
75 |     entry_point='gym_minigrid.envs:DoorKeyEnv16x16'
76 | )
77 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/empty.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class EmptyEnv(MiniGridEnv):
 5 |     """
 6 |     Empty grid environment, no obstacles, sparse reward
 7 |     """
 8 | 
 9 |     def __init__(
10 |         self,
11 |         size=8,
12 |         agent_start_pos=(1,1),
13 |         agent_start_dir=0,
14 |     ):
15 |         self.agent_start_pos = agent_start_pos
16 |         self.agent_start_dir = agent_start_dir
17 | 
18 |         super().__init__(
19 |             grid_size=size,
20 |             max_steps=4*size*size,
21 |             # Set this to True for maximum speed
22 |             see_through_walls=True
23 |         )
24 | 
25 |     def _gen_grid(self, width, height):
26 |         # Create an empty grid
27 |         self.grid = Grid(width, height)
28 | 
29 |         # Generate the surrounding walls
30 |         self.grid.wall_rect(0, 0, width, height)
31 | 
32 |         # Place a goal square in the bottom-right corner
33 |         self.put_obj(Goal(), width - 2, height - 2)
34 | 
35 |         # Place the agent
36 |         if self.agent_start_pos is not None:
37 |             self.agent_pos = self.agent_start_pos
38 |             self.agent_dir = self.agent_start_dir
39 |         else:
40 |             self.place_agent()
41 | 
42 |         self.mission = "get to the green goal square"
43 | 
44 | class EmptyEnv5x5(EmptyEnv):
45 |     def __init__(self):
46 |         super().__init__(size=5)
47 | 
48 | class EmptyRandomEnv5x5(EmptyEnv):
49 |     def __init__(self):
50 |         super().__init__(size=5, agent_start_pos=None)
51 | 
52 | class EmptyEnv6x6(EmptyEnv):
53 |     def __init__(self):
54 |         super().__init__(size=6)
55 | 
56 | class EmptyRandomEnv6x6(EmptyEnv):
57 |     def __init__(self):
58 |         super().__init__(size=6, agent_start_pos=None)
59 | 
60 | class EmptyEnv16x16(EmptyEnv):
61 |     def __init__(self):
62 |         super().__init__(size=16)
63 | 
64 | register(
65 |     id='MiniGrid-Empty-5x5-v0',
66 |     entry_point='gym_minigrid.envs:EmptyEnv5x5'
67 | )
68 | 
69 | register(
70 |     id='MiniGrid-Empty-Random-5x5-v0',
71 |     entry_point='gym_minigrid.envs:EmptyRandomEnv5x5'
72 | )
73 | 
74 | register(
75 |     id='MiniGrid-Empty-6x6-v0',
76 |     entry_point='gym_minigrid.envs:EmptyEnv6x6'
77 | )
78 | 
79 | register(
80 |     id='MiniGrid-Empty-Random-6x6-v0',
81 |     entry_point='gym_minigrid.envs:EmptyRandomEnv6x6'
82 | )
83 | 
84 | register(
85 |     id='MiniGrid-Empty-8x8-v0',
86 |     entry_point='gym_minigrid.envs:EmptyEnv'
87 | )
88 | 
89 | register(
90 |     id='MiniGrid-Empty-16x16-v0',
91 |     entry_point='gym_minigrid.envs:EmptyEnv16x16'
92 | )
93 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/fetch.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.register import register
  3 | 
  4 | class FetchEnv(MiniGridEnv):
  5 |     """
  6 |     Environment in which the agent has to fetch a random object
  7 |     named using English text strings
  8 |     """
  9 | 
 10 |     def __init__(
 11 |         self,
 12 |         size=8,
 13 |         numObjs=3
 14 |     ):
 15 |         self.numObjs = numObjs
 16 | 
 17 |         super().__init__(
 18 |             grid_size=size,
 19 |             max_steps=5*size**2,
 20 |             # Set this to True for maximum speed
 21 |             see_through_walls=True
 22 |         )
 23 | 
 24 |     def _gen_grid(self, width, height):
 25 |         self.grid = Grid(width, height)
 26 | 
 27 |         # Generate the surrounding walls
 28 |         self.grid.horz_wall(0, 0)
 29 |         self.grid.horz_wall(0, height-1)
 30 |         self.grid.vert_wall(0, 0)
 31 |         self.grid.vert_wall(width-1, 0)
 32 | 
 33 |         types = ['key', 'ball']
 34 | 
 35 |         objs = []
 36 | 
 37 |         # For each object to be generated
 38 |         while len(objs) < self.numObjs:
 39 |             objType = self._rand_elem(types)
 40 |             objColor = self._rand_elem(COLOR_NAMES)
 41 | 
 42 |             if objType == 'key':
 43 |                 obj = Key(objColor)
 44 |             elif objType == 'ball':
 45 |                 obj = Ball(objColor)
 46 | 
 47 |             self.place_obj(obj)
 48 |             objs.append(obj)
 49 | 
 50 |         # Randomize the player start position and orientation
 51 |         self.place_agent()
 52 | 
 53 |         # Choose a random object to be picked up
 54 |         target = objs[self._rand_int(0, len(objs))]
 55 |         self.targetType = target.type
 56 |         self.targetColor = target.color
 57 | 
 58 |         descStr = '%s %s' % (self.targetColor, self.targetType)
 59 | 
 60 |         # Generate the mission string
 61 |         idx = self._rand_int(0, 5)
 62 |         if idx == 0:
 63 |             self.mission = 'get a %s' % descStr
 64 |         elif idx == 1:
 65 |             self.mission = 'go get a %s' % descStr
 66 |         elif idx == 2:
 67 |             self.mission = 'fetch a %s' % descStr
 68 |         elif idx == 3:
 69 |             self.mission = 'go fetch a %s' % descStr
 70 |         elif idx == 4:
 71 |             self.mission = 'you must fetch a %s' % descStr
 72 |         assert hasattr(self, 'mission')
 73 | 
 74 |     def step(self, action):
 75 |         obs, reward, done, info = MiniGridEnv.step(self, action)
 76 | 
 77 |         if self.carrying:
 78 |             if self.carrying.color == self.targetColor and \
 79 |                self.carrying.type == self.targetType:
 80 |                 reward = self._reward()
 81 |                 done = True
 82 |             else:
 83 |                 reward = 0
 84 |                 done = True
 85 | 
 86 |         return obs, reward, done, info
 87 | 
 88 | class FetchEnv5x5N2(FetchEnv):
 89 |     def __init__(self):
 90 |         super().__init__(size=5, numObjs=2)
 91 | 
 92 | class FetchEnv6x6N2(FetchEnv):
 93 |     def __init__(self):
 94 |         super().__init__(size=6, numObjs=2)
 95 | 
 96 | register(
 97 |     id='MiniGrid-Fetch-5x5-N2-v0',
 98 |     entry_point='gym_minigrid.envs:FetchEnv5x5N2'
 99 | )
100 | 
101 | register(
102 |     id='MiniGrid-Fetch-6x6-N2-v0',
103 |     entry_point='gym_minigrid.envs:FetchEnv6x6N2'
104 | )
105 | 
106 | register(
107 |     id='MiniGrid-Fetch-8x8-N3-v0',
108 |     entry_point='gym_minigrid.envs:FetchEnv'
109 | )
110 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/fourrooms.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from gym_minigrid.minigrid import *
 5 | from gym_minigrid.register import register
 6 | 
 7 | 
 8 | class FourRoomsEnv(MiniGridEnv):
 9 |     """
10 |     Classic 4 rooms gridworld environment.
11 |     Can specify agent and goal position, if not it set at random.
12 |     """
13 | 
14 |     def __init__(self, agent_pos=None, goal_pos=None):
15 |         self._agent_default_pos = agent_pos
16 |         self._goal_default_pos = goal_pos
17 |         super().__init__(grid_size=19, max_steps=100)
18 | 
19 |     def _gen_grid(self, width, height):
20 |         # Create the grid
21 |         self.grid = Grid(width, height)
22 | 
23 |         # Generate the surrounding walls
24 |         self.grid.horz_wall(0, 0)
25 |         self.grid.horz_wall(0, height - 1)
26 |         self.grid.vert_wall(0, 0)
27 |         self.grid.vert_wall(width - 1, 0)
28 | 
29 |         room_w = width // 2
30 |         room_h = height // 2
31 | 
32 |         # For each row of rooms
33 |         for j in range(0, 2):
34 | 
35 |             # For each column
36 |             for i in range(0, 2):
37 |                 xL = i * room_w
38 |                 yT = j * room_h
39 |                 xR = xL + room_w
40 |                 yB = yT + room_h
41 | 
42 |                 # Bottom wall and door
43 |                 if i + 1 < 2:
44 |                     self.grid.vert_wall(xR, yT, room_h)
45 |                     pos = (xR, self._rand_int(yT + 1, yB))
46 |                     self.grid.set(*pos, None)
47 | 
48 |                 # Bottom wall and door
49 |                 if j + 1 < 2:
50 |                     self.grid.horz_wall(xL, yB, room_w)
51 |                     pos = (self._rand_int(xL + 1, xR), yB)
52 |                     self.grid.set(*pos, None)
53 | 
54 |         # Randomize the player start position and orientation
55 |         if self._agent_default_pos is not None:
56 |             self.agent_pos = self._agent_default_pos
57 |             self.grid.set(*self._agent_default_pos, None)
58 |             self.agent_dir = self._rand_int(0, 4)  # assuming random start direction
59 |         else:
60 |             self.place_agent()
61 | 
62 |         if self._goal_default_pos is not None:
63 |             goal = Goal()
64 |             self.put_obj(goal, *self._goal_default_pos)
65 |             goal.init_pos, goal.cur_pos = self._goal_default_pos
66 |         else:
67 |             self.place_obj(Goal())
68 | 
69 |         self.mission = 'Reach the goal'
70 | 
71 |     def step(self, action):
72 |         obs, reward, done, info = MiniGridEnv.step(self, action)
73 |         return obs, reward, done, info
74 | 
75 | register(
76 |     id='MiniGrid-FourRooms-v0',
77 |     entry_point='gym_minigrid.envs:FourRoomsEnv'
78 | )
79 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/gotodoor.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.register import register
  3 | 
  4 | class GoToDoorEnv(MiniGridEnv):
  5 |     """
  6 |     Environment in which the agent is instructed to go to a given object
  7 |     named using an English text string
  8 |     """
  9 | 
 10 |     def __init__(
 11 |         self,
 12 |         size=5
 13 |     ):
 14 |         assert size >= 5
 15 | 
 16 |         super().__init__(
 17 |             grid_size=size,
 18 |             max_steps=5*size**2,
 19 |             # Set this to True for maximum speed
 20 |             see_through_walls=True
 21 |         )
 22 | 
 23 |     def _gen_grid(self, width, height):
 24 |         # Create the grid
 25 |         self.grid = Grid(width, height)
 26 | 
 27 |         # Randomly vary the room width and height
 28 |         width = self._rand_int(5, width+1)
 29 |         height = self._rand_int(5, height+1)
 30 | 
 31 |         # Generate the surrounding walls
 32 |         self.grid.wall_rect(0, 0, width, height)
 33 | 
 34 |         # Generate the 4 doors at random positions
 35 |         doorPos = []
 36 |         doorPos.append((self._rand_int(2, width-2), 0))
 37 |         doorPos.append((self._rand_int(2, width-2), height-1))
 38 |         doorPos.append((0, self._rand_int(2, height-2)))
 39 |         doorPos.append((width-1, self._rand_int(2, height-2)))
 40 | 
 41 |         # Generate the door colors
 42 |         doorColors = []
 43 |         while len(doorColors) < len(doorPos):
 44 |             color = self._rand_elem(COLOR_NAMES)
 45 |             if color in doorColors:
 46 |                 continue
 47 |             doorColors.append(color)
 48 | 
 49 |         # Place the doors in the grid
 50 |         for idx, pos in enumerate(doorPos):
 51 |             color = doorColors[idx]
 52 |             self.grid.set(*pos, Door(color))
 53 | 
 54 |         # Randomize the agent start position and orientation
 55 |         self.place_agent(size=(width, height))
 56 | 
 57 |         # Select a random target door
 58 |         doorIdx = self._rand_int(0, len(doorPos))
 59 |         self.target_pos = doorPos[doorIdx]
 60 |         self.target_color = doorColors[doorIdx]
 61 | 
 62 |         # Generate the mission string
 63 |         self.mission = 'go to the %s door' % self.target_color
 64 | 
 65 |     def step(self, action):
 66 |         obs, reward, done, info = super().step(action)
 67 | 
 68 |         ax, ay = self.agent_pos
 69 |         tx, ty = self.target_pos
 70 | 
 71 |         # Don't let the agent open any of the doors
 72 |         if action == self.actions.toggle:
 73 |             done = True
 74 | 
 75 |         # Reward performing done action in front of the target door
 76 |         if action == self.actions.done:
 77 |             if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1):
 78 |                 reward = self._reward()
 79 |             done = True
 80 | 
 81 |         return obs, reward, done, info
 82 | 
 83 | class GoToDoor8x8Env(GoToDoorEnv):
 84 |     def __init__(self):
 85 |         super().__init__(size=8)
 86 | 
 87 | class GoToDoor6x6Env(GoToDoorEnv):
 88 |     def __init__(self):
 89 |         super().__init__(size=6)
 90 | 
 91 | register(
 92 |     id='MiniGrid-GoToDoor-5x5-v0',
 93 |     entry_point='gym_minigrid.envs:GoToDoorEnv'
 94 | )
 95 | 
 96 | register(
 97 |     id='MiniGrid-GoToDoor-6x6-v0',
 98 |     entry_point='gym_minigrid.envs:GoToDoor6x6Env'
 99 | )
100 | 
101 | register(
102 |     id='MiniGrid-GoToDoor-8x8-v0',
103 |     entry_point='gym_minigrid.envs:GoToDoor8x8Env'
104 | )
105 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/gotoobject.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class GoToObjectEnv(MiniGridEnv):
 5 |     """
 6 |     Environment in which the agent is instructed to go to a given object
 7 |     named using an English text string
 8 |     """
 9 | 
10 |     def __init__(
11 |         self,
12 |         size=6,
13 |         numObjs=2
14 |     ):
15 |         self.numObjs = numObjs
16 | 
17 |         super().__init__(
18 |             grid_size=size,
19 |             max_steps=5*size**2,
20 |             # Set this to True for maximum speed
21 |             see_through_walls=True
22 |         )
23 | 
24 |     def _gen_grid(self, width, height):
25 |         self.grid = Grid(width, height)
26 | 
27 |         # Generate the surrounding walls
28 |         self.grid.wall_rect(0, 0, width, height)
29 | 
30 |         # Types and colors of objects we can generate
31 |         types = ['key', 'ball', 'box']
32 | 
33 |         objs = []
34 |         objPos = []
35 | 
36 |         # Until we have generated all the objects
37 |         while len(objs) < self.numObjs:
38 |             objType = self._rand_elem(types)
39 |             objColor = self._rand_elem(COLOR_NAMES)
40 | 
41 |             # If this object already exists, try again
42 |             if (objType, objColor) in objs:
43 |                 continue
44 | 
45 |             if objType == 'key':
46 |                 obj = Key(objColor)
47 |             elif objType == 'ball':
48 |                 obj = Ball(objColor)
49 |             elif objType == 'box':
50 |                 obj = Box(objColor)
51 | 
52 |             pos = self.place_obj(obj)
53 |             objs.append((objType, objColor))
54 |             objPos.append(pos)
55 | 
56 |         # Randomize the agent start position and orientation
57 |         self.place_agent()
58 | 
59 |         # Choose a random object to be picked up
60 |         objIdx = self._rand_int(0, len(objs))
61 |         self.targetType, self.target_color = objs[objIdx]
62 |         self.target_pos = objPos[objIdx]
63 | 
64 |         descStr = '%s %s' % (self.target_color, self.targetType)
65 |         self.mission = 'go to the %s' % descStr
66 |         #print(self.mission)
67 | 
68 |     def step(self, action):
69 |         obs, reward, done, info = MiniGridEnv.step(self, action)
70 | 
71 |         ax, ay = self.agent_pos
72 |         tx, ty = self.target_pos
73 | 
74 |         # Toggle/pickup action terminates the episode
75 |         if action == self.actions.toggle:
76 |             done = True
77 | 
78 |         # Reward performing the done action next to the target object
79 |         if action == self.actions.done:
80 |             if abs(ax - tx) <= 1 and abs(ay - ty) <= 1:
81 |                 reward = self._reward()
82 |             done = True
83 | 
84 |         return obs, reward, done, info
85 | 
86 | class GotoEnv8x8N2(GoToObjectEnv):
87 |     def __init__(self):
88 |         super().__init__(size=8, numObjs=2)
89 | 
90 | register(
91 |     id='MiniGrid-GoToObject-6x6-N2-v0',
92 |     entry_point='gym_minigrid.envs:GoToObjectEnv'
93 | )
94 | 
95 | register(
96 |     id='MiniGrid-GoToObject-8x8-N2-v0',
97 |     entry_point='gym_minigrid.envs:GotoEnv8x8N2'
98 | )
99 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/keycorridor.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.roomgrid import RoomGrid
  2 | from gym_minigrid.register import register
  3 | 
  4 | class KeyCorridor(RoomGrid):
  5 |     """
  6 |     A ball is behind a locked door, the key is placed in a
  7 |     random room.
  8 |     """
  9 | 
 10 |     def __init__(
 11 |         self,
 12 |         num_rows=3,
 13 |         obj_type="ball",
 14 |         room_size=6,
 15 |         seed=None
 16 |     ):
 17 |         self.obj_type = obj_type
 18 | 
 19 |         super().__init__(
 20 |             room_size=room_size,
 21 |             num_rows=num_rows,
 22 |             max_steps=30*room_size**2,
 23 |             seed=seed,
 24 |         )
 25 | 
 26 |     def _gen_grid(self, width, height):
 27 |         super()._gen_grid(width, height)
 28 | 
 29 |         # Connect the middle column rooms into a hallway
 30 |         for j in range(1, self.num_rows):
 31 |             self.remove_wall(1, j, 3)
 32 | 
 33 |         # Add a locked door on the bottom right
 34 |         # Add an object behind the locked door
 35 |         room_idx = self._rand_int(0, self.num_rows)
 36 |         door, _ = self.add_door(2, room_idx, 2, locked=True)
 37 |         obj, _ = self.add_object(2, room_idx, kind=self.obj_type)
 38 | 
 39 |         # Add a key in a random room on the left side
 40 |         self.add_object(0, self._rand_int(0, self.num_rows), 'key', door.color)
 41 | 
 42 |         # Place the agent in the middle
 43 |         self.place_agent(1, self.num_rows // 2)
 44 | 
 45 |         # Make sure all rooms are accessible
 46 |         self.connect_all()
 47 | 
 48 |         self.obj = obj
 49 |         self.mission = "pick up the %s %s" % (obj.color, obj.type)
 50 | 
 51 |     def step(self, action):
 52 |         obs, reward, done, info = super().step(action)
 53 | 
 54 |         if action == self.actions.pickup:
 55 |             if self.carrying and self.carrying == self.obj:
 56 |                 reward = self._reward()
 57 |                 done = True
 58 | 
 59 |         return obs, reward, done, info
 60 | 
 61 | class KeyCorridorS3R1(KeyCorridor):
 62 |     def __init__(self, seed=None):
 63 |         super().__init__(
 64 |             room_size=3,
 65 |             num_rows=1,
 66 |             seed=seed
 67 |         )
 68 | 
 69 | class KeyCorridorS3R2(KeyCorridor):
 70 |     def __init__(self, seed=None):
 71 |         super().__init__(
 72 |             room_size=3,
 73 |             num_rows=2,
 74 |             seed=seed
 75 |         )
 76 | 
 77 | class KeyCorridorS3R3(KeyCorridor):
 78 |     def __init__(self, seed=None):
 79 |         super().__init__(
 80 |             room_size=3,
 81 |             num_rows=3,
 82 |             seed=seed
 83 |         )
 84 | 
 85 | class KeyCorridorS4R3(KeyCorridor):
 86 |     def __init__(self, seed=None):
 87 |         super().__init__(
 88 |             room_size=4,
 89 |             num_rows=3,
 90 |             seed=seed
 91 |         )
 92 | 
 93 | class KeyCorridorS5R3(KeyCorridor):
 94 |     def __init__(self, seed=None):
 95 |         super().__init__(
 96 |             room_size=5,
 97 |             num_rows=3,
 98 |             seed=seed
 99 |         )
100 | 
101 | class KeyCorridorS6R3(KeyCorridor):
102 |     def __init__(self, seed=None):
103 |         super().__init__(
104 |             room_size=6,
105 |             num_rows=3,
106 |             seed=seed
107 |         )
108 | 
109 | register(
110 |     id='MiniGrid-KeyCorridorS3R1-v0',
111 |     entry_point='gym_minigrid.envs:KeyCorridorS3R1'
112 | )
113 | 
114 | register(
115 |     id='MiniGrid-KeyCorridorS3R2-v0',
116 |     entry_point='gym_minigrid.envs:KeyCorridorS3R2'
117 | )
118 | 
119 | register(
120 |     id='MiniGrid-KeyCorridorS3R3-v0',
121 |     entry_point='gym_minigrid.envs:KeyCorridorS3R3'
122 | )
123 | 
124 | register(
125 |     id='MiniGrid-KeyCorridorS4R3-v0',
126 |     entry_point='gym_minigrid.envs:KeyCorridorS4R3'
127 | )
128 | 
129 | register(
130 |     id='MiniGrid-KeyCorridorS5R3-v0',
131 |     entry_point='gym_minigrid.envs:KeyCorridorS5R3'
132 | )
133 | 
134 | register(
135 |     id='MiniGrid-KeyCorridorS6R3-v0',
136 |     entry_point='gym_minigrid.envs:KeyCorridorS6R3'
137 | )
138 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/lavagap.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class LavaGapEnv(MiniGridEnv):
 5 |     """
 6 |     Environment with one wall of lava with a small gap to cross through
 7 |     This environment is similar to LavaCrossing but simpler in structure.
 8 |     """
 9 | 
10 |     def __init__(self, size, obstacle_type=Lava, seed=None):
11 |         self.obstacle_type = obstacle_type
12 |         super().__init__(
13 |             grid_size=size,
14 |             max_steps=4*size*size,
15 |             # Set this to True for maximum speed
16 |             see_through_walls=False,
17 |             seed=None
18 |         )
19 | 
20 |     def _gen_grid(self, width, height):
21 |         assert width >= 5 and height >= 5
22 | 
23 |         # Create an empty grid
24 |         self.grid = Grid(width, height)
25 | 
26 |         # Generate the surrounding walls
27 |         self.grid.wall_rect(0, 0, width, height)
28 | 
29 |         # Place the agent in the top-left corner
30 |         self.agent_pos = (1, 1)
31 |         self.agent_dir = 0
32 | 
33 |         # Place a goal square in the bottom-right corner
34 |         self.goal_pos = np.array((width - 2, height - 2))
35 |         self.put_obj(Goal(), *self.goal_pos)
36 | 
37 |         # Generate and store random gap position
38 |         self.gap_pos = np.array((
39 |             self._rand_int(2, width - 2),
40 |             self._rand_int(1, height - 1),
41 |         ))
42 | 
43 |         # Place the obstacle wall
44 |         self.grid.vert_wall(self.gap_pos[0], 1, height - 2, self.obstacle_type)
45 | 
46 |         # Put a hole in the wall
47 |         self.grid.set(*self.gap_pos, None)
48 | 
49 |         self.mission = (
50 |             "avoid the lava and get to the green goal square"
51 |             if self.obstacle_type == Lava
52 |             else "find the opening and get to the green goal square"
53 |         )
54 | 
55 | class LavaGapS5Env(LavaGapEnv):
56 |     def __init__(self):
57 |         super().__init__(size=5)
58 | 
59 | class LavaGapS6Env(LavaGapEnv):
60 |     def __init__(self):
61 |         super().__init__(size=6)
62 | 
63 | class LavaGapS7Env(LavaGapEnv):
64 |     def __init__(self):
65 |         super().__init__(size=7)
66 | 
67 | register(
68 |     id='MiniGrid-LavaGapS5-v0',
69 |     entry_point='gym_minigrid.envs:LavaGapS5Env'
70 | )
71 | 
72 | register(
73 |     id='MiniGrid-LavaGapS6-v0',
74 |     entry_point='gym_minigrid.envs:LavaGapS6Env'
75 | )
76 | 
77 | register(
78 |     id='MiniGrid-LavaGapS7-v0',
79 |     entry_point='gym_minigrid.envs:LavaGapS7Env'
80 | )
81 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/lockedroom.py:
--------------------------------------------------------------------------------
  1 | from gym import spaces
  2 | from gym_minigrid.minigrid import *
  3 | from gym_minigrid.register import register
  4 | 
  5 | class Room:
  6 |     def __init__(self,
  7 |         top,
  8 |         size,
  9 |         doorPos
 10 |     ):
 11 |         self.top = top
 12 |         self.size = size
 13 |         self.doorPos = doorPos
 14 |         self.color = None
 15 |         self.locked = False
 16 | 
 17 |     def rand_pos(self, env):
 18 |         topX, topY = self.top
 19 |         sizeX, sizeY = self.size
 20 |         return env._rand_pos(
 21 |             topX + 1, topX + sizeX - 1,
 22 |             topY + 1, topY + sizeY - 1
 23 |         )
 24 | 
 25 | class LockedRoom(MiniGridEnv):
 26 |     """
 27 |     Environment in which the agent is instructed to go to a given object
 28 |     named using an English text string
 29 |     """
 30 | 
 31 |     def __init__(
 32 |         self,
 33 |         size=19
 34 |     ):
 35 |         super().__init__(grid_size=size, max_steps=10*size)
 36 | 
 37 |     def _gen_grid(self, width, height):
 38 |         # Create the grid
 39 |         self.grid = Grid(width, height)
 40 | 
 41 |         # Generate the surrounding walls
 42 |         for i in range(0, width):
 43 |             self.grid.set(i, 0, Wall())
 44 |             self.grid.set(i, height-1, Wall())
 45 |         for j in range(0, height):
 46 |             self.grid.set(0, j, Wall())
 47 |             self.grid.set(width-1, j, Wall())
 48 | 
 49 |         # Hallway walls
 50 |         lWallIdx = width // 2 - 2
 51 |         rWallIdx = width // 2 + 2
 52 |         for j in range(0, height):
 53 |             self.grid.set(lWallIdx, j, Wall())
 54 |             self.grid.set(rWallIdx, j, Wall())
 55 | 
 56 |         self.rooms = []
 57 | 
 58 |         # Room splitting walls
 59 |         for n in range(0, 3):
 60 |             j = n * (height // 3)
 61 |             for i in range(0, lWallIdx):
 62 |                 self.grid.set(i, j, Wall())
 63 |             for i in range(rWallIdx, width):
 64 |                 self.grid.set(i, j, Wall())
 65 | 
 66 |             roomW = lWallIdx + 1
 67 |             roomH = height // 3 + 1
 68 |             self.rooms.append(Room(
 69 |                 (0, j),
 70 |                 (roomW, roomH),
 71 |                 (lWallIdx, j + 3)
 72 |             ))
 73 |             self.rooms.append(Room(
 74 |                 (rWallIdx, j),
 75 |                 (roomW, roomH),
 76 |                 (rWallIdx, j + 3)
 77 |             ))
 78 | 
 79 |         # Choose one random room to be locked
 80 |         lockedRoom = self._rand_elem(self.rooms)
 81 |         lockedRoom.locked = True
 82 |         goalPos = lockedRoom.rand_pos(self)
 83 |         self.grid.set(*goalPos, Goal())
 84 | 
 85 |         # Assign the door colors
 86 |         colors = set(COLOR_NAMES)
 87 |         for room in self.rooms:
 88 |             color = self._rand_elem(sorted(colors))
 89 |             colors.remove(color)
 90 |             room.color = color
 91 |             if room.locked:
 92 |                 self.grid.set(*room.doorPos, Door(color, is_locked=True))
 93 |             else:
 94 |                 self.grid.set(*room.doorPos, Door(color))
 95 | 
 96 |         # Select a random room to contain the key
 97 |         while True:
 98 |             keyRoom = self._rand_elem(self.rooms)
 99 |             if keyRoom != lockedRoom:
100 |                 break
101 |         keyPos = keyRoom.rand_pos(self)
102 |         self.grid.set(*keyPos, Key(lockedRoom.color))
103 | 
104 |         # Randomize the player start position and orientation
105 |         self.agent_pos = self.place_agent(
106 |             top=(lWallIdx, 0),
107 |             size=(rWallIdx-lWallIdx, height)
108 |         )
109 | 
110 |         # Generate the mission string
111 |         self.mission = (
112 |             'get the %s key from the %s room, '
113 |             'unlock the %s door and '
114 |             'go to the goal'
115 |         ) % (lockedRoom.color, keyRoom.color, lockedRoom.color)
116 | 
117 |     def step(self, action):
118 |         obs, reward, done, info = MiniGridEnv.step(self, action)
119 |         return obs, reward, done, info
120 | 
121 | register(
122 |     id='MiniGrid-LockedRoom-v0',
123 |     entry_point='gym_minigrid.envs:LockedRoom'
124 | )
125 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/playground_v0.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class PlaygroundV0(MiniGridEnv):
 5 |     """
 6 |     Environment with multiple rooms and random objects.
 7 |     This environment has no specific goals or rewards.
 8 |     """
 9 | 
10 |     def __init__(self):
11 |         super().__init__(grid_size=19, max_steps=100)
12 | 
13 |     def _gen_grid(self, width, height):
14 |         # Create the grid
15 |         self.grid = Grid(width, height)
16 | 
17 |         # Generate the surrounding walls
18 |         self.grid.horz_wall(0, 0)
19 |         self.grid.horz_wall(0, height-1)
20 |         self.grid.vert_wall(0, 0)
21 |         self.grid.vert_wall(width-1, 0)
22 | 
23 |         roomW = width // 3
24 |         roomH = height // 3
25 | 
26 |         # For each row of rooms
27 |         for j in range(0, 3):
28 | 
29 |             # For each column
30 |             for i in range(0, 3):
31 |                 xL = i * roomW
32 |                 yT = j * roomH
33 |                 xR = xL + roomW
34 |                 yB = yT + roomH
35 | 
36 |                 # Bottom wall and door
37 |                 if i+1 < 3:
38 |                     self.grid.vert_wall(xR, yT, roomH)
39 |                     pos = (xR, self._rand_int(yT+1, yB-1))
40 |                     color = self._rand_elem(COLOR_NAMES)
41 |                     self.grid.set(*pos, Door(color))
42 | 
43 |                 # Bottom wall and door
44 |                 if j+1 < 3:
45 |                     self.grid.horz_wall(xL, yB, roomW)
46 |                     pos = (self._rand_int(xL+1, xR-1), yB)
47 |                     color = self._rand_elem(COLOR_NAMES)
48 |                     self.grid.set(*pos, Door(color))
49 | 
50 |         # Randomize the player start position and orientation
51 |         self.place_agent()
52 | 
53 |         # Place random objects in the world
54 |         types = ['key', 'ball', 'box']
55 |         for i in range(0, 12):
56 |             objType = self._rand_elem(types)
57 |             objColor = self._rand_elem(COLOR_NAMES)
58 |             if objType == 'key':
59 |                 obj = Key(objColor)
60 |             elif objType == 'ball':
61 |                 obj = Ball(objColor)
62 |             elif objType == 'box':
63 |                 obj = Box(objColor)
64 |             self.place_obj(obj)
65 | 
66 |         # No explicit mission in this environment
67 |         self.mission = ''
68 | 
69 |     def step(self, action):
70 |         obs, reward, done, info = MiniGridEnv.step(self, action)
71 |         return obs, reward, done, info
72 | 
73 | register(
74 |     id='MiniGrid-Playground-v0',
75 |     entry_point='gym_minigrid.envs:PlaygroundV0'
76 | )
77 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/putnear.py:
--------------------------------------------------------------------------------
  1 | from gym_minigrid.minigrid import *
  2 | from gym_minigrid.register import register
  3 | 
  4 | class PutNearEnv(MiniGridEnv):
  5 |     """
  6 |     Environment in which the agent is instructed to place an object near
  7 |     another object through a natural language string.
  8 |     """
  9 | 
 10 |     def __init__(
 11 |         self,
 12 |         size=6,
 13 |         numObjs=2
 14 |     ):
 15 |         self.numObjs = numObjs
 16 | 
 17 |         super().__init__(
 18 |             grid_size=size,
 19 |             max_steps=5*size,
 20 |             # Set this to True for maximum speed
 21 |             see_through_walls=True
 22 |         )
 23 | 
 24 |     def _gen_grid(self, width, height):
 25 |         self.grid = Grid(width, height)
 26 | 
 27 |         # Generate the surrounding walls
 28 |         self.grid.horz_wall(0, 0)
 29 |         self.grid.horz_wall(0, height-1)
 30 |         self.grid.vert_wall(0, 0)
 31 |         self.grid.vert_wall(width-1, 0)
 32 | 
 33 |         # Types and colors of objects we can generate
 34 |         types = ['key', 'ball', 'box']
 35 | 
 36 |         objs = []
 37 |         objPos = []
 38 | 
 39 |         def near_obj(env, p1):
 40 |             for p2 in objPos:
 41 |                 dx = p1[0] - p2[0]
 42 |                 dy = p1[1] - p2[1]
 43 |                 if abs(dx) <= 1 and abs(dy) <= 1:
 44 |                     return True
 45 |             return False
 46 | 
 47 |         # Until we have generated all the objects
 48 |         while len(objs) < self.numObjs:
 49 |             objType = self._rand_elem(types)
 50 |             objColor = self._rand_elem(COLOR_NAMES)
 51 | 
 52 |             # If this object already exists, try again
 53 |             if (objType, objColor) in objs:
 54 |                 continue
 55 | 
 56 |             if objType == 'key':
 57 |                 obj = Key(objColor)
 58 |             elif objType == 'ball':
 59 |                 obj = Ball(objColor)
 60 |             elif objType == 'box':
 61 |                 obj = Box(objColor)
 62 | 
 63 |             pos = self.place_obj(obj, reject_fn=near_obj)
 64 | 
 65 |             objs.append((objType, objColor))
 66 |             objPos.append(pos)
 67 | 
 68 |         # Randomize the agent start position and orientation
 69 |         self.place_agent()
 70 | 
 71 |         # Choose a random object to be moved
 72 |         objIdx = self._rand_int(0, len(objs))
 73 |         self.move_type, self.moveColor = objs[objIdx]
 74 |         self.move_pos = objPos[objIdx]
 75 | 
 76 |         # Choose a target object (to put the first object next to)
 77 |         while True:
 78 |             targetIdx = self._rand_int(0, len(objs))
 79 |             if targetIdx != objIdx:
 80 |                 break
 81 |         self.target_type, self.target_color = objs[targetIdx]
 82 |         self.target_pos = objPos[targetIdx]
 83 | 
 84 |         self.mission = 'put the %s %s near the %s %s' % (
 85 |             self.moveColor,
 86 |             self.move_type,
 87 |             self.target_color,
 88 |             self.target_type
 89 |         )
 90 | 
 91 |     def step(self, action):
 92 |         preCarrying = self.carrying
 93 | 
 94 |         obs, reward, done, info = super().step(action)
 95 | 
 96 |         u, v = self.dir_vec
 97 |         ox, oy = (self.agent_pos[0] + u, self.agent_pos[1] + v)
 98 |         tx, ty = self.target_pos
 99 | 
100 |         # If we picked up the wrong object, terminate the episode
101 |         if action == self.actions.pickup and self.carrying:
102 |             if self.carrying.type != self.move_type or self.carrying.color != self.moveColor:
103 |                 done = True
104 | 
105 |         # If successfully dropping an object near the target
106 |         if action == self.actions.drop and preCarrying:
107 |             if self.grid.get(ox, oy) is preCarrying:
108 |                 if abs(ox - tx) <= 1 and abs(oy - ty) <= 1:
109 |                     reward = self._reward()
110 |             done = True
111 | 
112 |         return obs, reward, done, info
113 | 
114 | class PutNear8x8N3(PutNearEnv):
115 |     def __init__(self):
116 |         super().__init__(size=8, numObjs=3)
117 | 
118 | register(
119 |     id='MiniGrid-PutNear-6x6-N2-v0',
120 |     entry_point='gym_minigrid.envs:PutNearEnv'
121 | )
122 | 
123 | register(
124 |     id='MiniGrid-PutNear-8x8-N3-v0',
125 |     entry_point='gym_minigrid.envs:PutNear8x8N3'
126 | )
127 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/redbluedoors.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import *
 2 | from gym_minigrid.register import register
 3 | 
 4 | class RedBlueDoorEnv(MiniGridEnv):
 5 |     """
 6 |     Single room with red and blue doors on opposite sides.
 7 |     The red door must be opened before the blue door to
 8 |     obtain a reward.
 9 |     """
10 | 
11 |     def __init__(self, size=8):
12 |         self.size = size
13 | 
14 |         super().__init__(
15 |             width=2*size,
16 |             height=size,
17 |             max_steps=20*size*size
18 |         )
19 | 
20 |     def _gen_grid(self, width, height):
21 |         # Create an empty grid
22 |         self.grid = Grid(width, height)
23 | 
24 |         # Generate the grid walls
25 |         self.grid.wall_rect(0, 0, 2*self.size, self.size)
26 |         self.grid.wall_rect(self.size//2, 0, self.size, self.size)
27 | 
28 |         # Place the agent in the top-left corner
29 |         self.place_agent(top=(self.size//2, 0), size=(self.size, self.size))
30 | 
31 |         # Add a red door at a random position in the left wall
32 |         pos = self._rand_int(1, self.size - 1)
33 |         self.red_door = Door("red")
34 |         self.grid.set(self.size//2, pos, self.red_door)
35 | 
36 |         # Add a blue door at a random position in the right wall
37 |         pos = self._rand_int(1, self.size - 1)
38 |         self.blue_door = Door("blue")
39 |         self.grid.set(self.size//2 + self.size - 1, pos, self.blue_door)
40 | 
41 |         # Generate the mission string
42 |         self.mission = "open the red door then the blue door"
43 | 
44 |     def step(self, action):
45 |         red_door_opened_before = self.red_door.is_open
46 |         blue_door_opened_before = self.blue_door.is_open
47 | 
48 |         obs, reward, done, info = MiniGridEnv.step(self, action)
49 | 
50 |         red_door_opened_after = self.red_door.is_open
51 |         blue_door_opened_after = self.blue_door.is_open
52 | 
53 |         if blue_door_opened_after:
54 |             if red_door_opened_before:
55 |                 reward = self._reward()
56 |                 done = True
57 |             else:
58 |                 reward = 0
59 |                 done = True
60 | 
61 |         elif red_door_opened_after:
62 |             if blue_door_opened_before:
63 |                 reward = 0
64 |                 done = True
65 | 
66 |         return obs, reward, done, info
67 | 
68 | class RedBlueDoorEnv6x6(RedBlueDoorEnv):
69 |     def __init__(self):
70 |         super().__init__(size=6)
71 | 
72 | register(
73 |     id='MiniGrid-RedBlueDoors-6x6-v0',
74 |     entry_point='gym_minigrid.envs:RedBlueDoorEnv6x6'
75 | )
76 | 
77 | register(
78 |     id='MiniGrid-RedBlueDoors-8x8-v0',
79 |     entry_point='gym_minigrid.envs:RedBlueDoorEnv'
80 | )
81 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/unlock.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import Ball
 2 | from gym_minigrid.roomgrid import RoomGrid
 3 | from gym_minigrid.register import register
 4 | 
 5 | class Unlock(RoomGrid):
 6 |     """
 7 |     Unlock a door
 8 |     """
 9 | 
10 |     def __init__(self, seed=None):
11 |         room_size = 6
12 |         super().__init__(
13 |             num_rows=1,
14 |             num_cols=2,
15 |             room_size=room_size,
16 |             max_steps=8*room_size**2,
17 |             seed=seed
18 |         )
19 | 
20 |     def _gen_grid(self, width, height):
21 |         super()._gen_grid(width, height)
22 | 
23 |         # Make sure the two rooms are directly connected by a locked door
24 |         door, _ = self.add_door(0, 0, 0, locked=True)
25 |         # Add a key to unlock the door
26 |         self.add_object(0, 0, 'key', door.color)
27 | 
28 |         self.place_agent(0, 0)
29 | 
30 |         self.door = door
31 |         self.mission = "open the door"
32 | 
33 |     def step(self, action):
34 |         obs, reward, done, info = super().step(action)
35 | 
36 |         if action == self.actions.toggle:
37 |             if self.door.is_open:
38 |                 reward = self._reward()
39 |                 done = True
40 | 
41 |         return obs, reward, done, info
42 | 
43 | register(
44 |     id='MiniGrid-Unlock-v0',
45 |     entry_point='gym_minigrid.envs:Unlock'
46 | )
47 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/envs/unlockpickup.py:
--------------------------------------------------------------------------------
 1 | from gym_minigrid.minigrid import Ball
 2 | from gym_minigrid.roomgrid import RoomGrid
 3 | from gym_minigrid.register import register
 4 | 
 5 | class UnlockPickup(RoomGrid):
 6 |     """
 7 |     Unlock a door, then pick up a box in another room
 8 |     """
 9 | 
10 |     def __init__(self, seed=None):
11 |         room_size = 6
12 |         super().__init__(
13 |             num_rows=1,
14 |             num_cols=2,
15 |             room_size=room_size,
16 |             max_steps=8*room_size**2,
17 |             seed=seed
18 |         )
19 | 
20 |     def _gen_grid(self, width, height):
21 |         super()._gen_grid(width, height)
22 | 
23 |         # Add a box to the room on the right
24 |         obj, _ = self.add_object(1, 0, kind="box")
25 |         # Make sure the two rooms are directly connected by a locked door
26 |         door, _ = self.add_door(0, 0, 0, locked=True)
27 |         # Add a key to unlock the door
28 |         self.add_object(0, 0, 'key', door.color)
29 | 
30 |         self.place_agent(0, 0)
31 | 
32 |         self.obj = obj
33 |         self.mission = "pick up the %s %s" % (obj.color, obj.type)
34 | 
35 |     def step(self, action):
36 |         obs, reward, done, info = super().step(action)
37 | 
38 |         if action == self.actions.pickup:
39 |             if self.carrying and self.carrying == self.obj:
40 |                 reward = self._reward()
41 |                 done = True
42 | 
43 |         return obs, reward, done, info
44 | 
45 | register(
46 |     id='MiniGrid-UnlockPickup-v0',
47 |     entry_point='gym_minigrid.envs:UnlockPickup'
48 | )
49 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/register.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register as gym_register
 2 | 
 3 | env_list = []
 4 | 
 5 | def register(
 6 |     id,
 7 |     entry_point,
 8 |     reward_threshold=0.95
 9 | ):
10 |     assert id.startswith("MiniGrid-")
11 |     assert id not in env_list
12 | 
13 |     # Register the environment with OpenAI gym
14 |     gym_register(
15 |         id=id,
16 |         entry_point=entry_point,
17 |         reward_threshold=reward_threshold
18 |     )
19 | 
20 |     # Add the environment to the set
21 |     env_list.append(id)
22 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/rendering.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | 
  4 | def downsample(img, factor):
  5 |     """
  6 |     Downsample an image along both dimensions by some factor
  7 |     """
  8 | 
  9 |     assert img.shape[0] % factor == 0
 10 |     assert img.shape[1] % factor == 0
 11 | 
 12 |     img = img.reshape([img.shape[0]//factor, factor, img.shape[1]//factor, factor, 3])
 13 |     img = img.mean(axis=3)
 14 |     img = img.mean(axis=1)
 15 | 
 16 |     return img
 17 | 
 18 | def fill_coords(img, fn, color):
 19 |     """
 20 |     Fill pixels of an image with coordinates matching a filter function
 21 |     """
 22 | 
 23 |     for y in range(img.shape[0]):
 24 |         for x in range(img.shape[1]):
 25 |             yf = (y + 0.5) / img.shape[0]
 26 |             xf = (x + 0.5) / img.shape[1]
 27 |             if fn(xf, yf):
 28 |                 img[y, x] = color
 29 | 
 30 |     return img
 31 | 
 32 | def rotate_fn(fin, cx, cy, theta):
 33 |     def fout(x, y):
 34 |         x = x - cx
 35 |         y = y - cy
 36 | 
 37 |         x2 = cx + x * math.cos(-theta) - y * math.sin(-theta)
 38 |         y2 = cy + y * math.cos(-theta) + x * math.sin(-theta)
 39 | 
 40 |         return fin(x2, y2)
 41 | 
 42 |     return fout
 43 | 
 44 | def point_in_line(x0, y0, x1, y1, r):
 45 |     p0 = np.array([x0, y0])
 46 |     p1 = np.array([x1, y1])
 47 |     dir = p1 - p0
 48 |     dist = np.linalg.norm(dir)
 49 |     dir = dir / dist
 50 | 
 51 |     xmin = min(x0, x1) - r
 52 |     xmax = max(x0, x1) + r
 53 |     ymin = min(y0, y1) - r
 54 |     ymax = max(y0, y1) + r
 55 | 
 56 |     def fn(x, y):
 57 |         # Fast, early escape test
 58 |         if x < xmin or x > xmax or y < ymin or y > ymax:
 59 |             return False
 60 | 
 61 |         q = np.array([x, y])
 62 |         pq = q - p0
 63 | 
 64 |         # Closest point on line
 65 |         a = np.dot(pq, dir)
 66 |         a = np.clip(a, 0, dist)
 67 |         p = p0 + a * dir
 68 | 
 69 |         dist_to_line = np.linalg.norm(q - p)
 70 |         return dist_to_line <= r
 71 | 
 72 |     return fn
 73 | 
 74 | def point_in_circle(cx, cy, r):
 75 |     def fn(x, y):
 76 |         return (x-cx)*(x-cx) + (y-cy)*(y-cy) <= r * r
 77 |     return fn
 78 | 
 79 | def point_in_rect(xmin, xmax, ymin, ymax):
 80 |     def fn(x, y):
 81 |         return x >= xmin and x <= xmax and y >= ymin and y <= ymax
 82 |     return fn
 83 | 
 84 | def point_in_triangle(a, b, c):
 85 |     a = np.array(a)
 86 |     b = np.array(b)
 87 |     c = np.array(c)
 88 | 
 89 |     def fn(x, y):
 90 |         v0 = c - a
 91 |         v1 = b - a
 92 |         v2 = np.array((x, y)) - a
 93 | 
 94 |         # Compute dot products
 95 |         dot00 = np.dot(v0, v0)
 96 |         dot01 = np.dot(v0, v1)
 97 |         dot02 = np.dot(v0, v2)
 98 |         dot11 = np.dot(v1, v1)
 99 |         dot12 = np.dot(v1, v2)
100 | 
101 |         # Compute barycentric coordinates
102 |         inv_denom = 1 / (dot00 * dot11 - dot01 * dot01)
103 |         u = (dot11 * dot02 - dot01 * dot12) * inv_denom
104 |         v = (dot00 * dot12 - dot01 * dot02) * inv_denom
105 | 
106 |         # Check if point is in triangle
107 |         return (u >= 0) and (v >= 0) and (u + v) < 1
108 | 
109 |     return fn
110 | 
111 | def highlight_img(img, color=(255, 255, 255), alpha=0.30):
112 |     """
113 |     Add highlighting to an image
114 |     """
115 | 
116 |     blend_img = img + alpha * (np.array(color, dtype=np.uint8) - img)
117 |     blend_img = blend_img.clip(0, 255).astype(np.uint8)
118 |     img[:, :, :] = blend_img
119 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/gym_minigrid/window.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | 
 4 | # Only ask users to install matplotlib if they actually need it
 5 | try:
 6 |     import matplotlib.pyplot as plt
 7 | except:
 8 |     print('To display the environment in a window, please install matplotlib, eg:')
 9 |     print('pip3 install --user matplotlib')
10 |     sys.exit(-1)
11 | 
12 | class Window:
13 |     """
14 |     Window to draw a gridworld instance using Matplotlib
15 |     """
16 | 
17 |     def __init__(self, title):
18 |         self.fig = None
19 | 
20 |         self.imshow_obj = None
21 | 
22 |         # Create the figure and axes
23 |         self.fig, self.ax = plt.subplots()
24 | 
25 |         # Show the env name in the window title
26 |         self.fig.canvas.set_window_title(title)
27 | 
28 |         # Turn off x/y axis numbering/ticks
29 |         self.ax.set_xticks([], [])
30 |         self.ax.set_yticks([], [])
31 | 
32 |         # Flag indicating the window was closed
33 |         self.closed = False
34 | 
35 |         def close_handler(evt):
36 |             self.closed = True
37 | 
38 |         self.fig.canvas.mpl_connect('close_event', close_handler)
39 | 
40 |     def show_img(self, img):
41 |         """
42 |         Show an image or update the image being shown
43 |         """
44 | 
45 |         # Show the first image of the environment
46 |         if self.imshow_obj is None:
47 |             self.imshow_obj = self.ax.imshow(img, interpolation='bilinear')
48 | 
49 |         self.imshow_obj.set_data(img)
50 |         self.fig.canvas.draw()
51 | 
52 |         # Let matplotlib process UI events
53 |         # This is needed for interactive mode to work properly
54 |         plt.pause(0.001)
55 | 
56 |     def set_caption(self, text):
57 |         """
58 |         Set/update the caption text below the image
59 |         """
60 | 
61 |         plt.xlabel(text)
62 | 
63 |     def reg_key_handler(self, key_handler):
64 |         """
65 |         Register a keyboard event handler
66 |         """
67 | 
68 |         # Keyboard handler
69 |         self.fig.canvas.mpl_connect('key_press_event', key_handler)
70 | 
71 |     def show(self, block=True):
72 |         """
73 |         Show the window, and start an event loop
74 |         """
75 | 
76 |         # If not blocking, trigger interactive mode
77 |         if not block:
78 |             plt.ion()
79 | 
80 |         # Show the plot
81 |         # In non-interative mode, this enters the matplotlib event loop
82 |         # In interactive mode, this call does not block
83 |         plt.show()
84 | 
85 |     def close(self):
86 |         """
87 |         Close the window
88 |         """
89 | 
90 |         plt.close()
91 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/manual_control.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | from gym_minigrid.wrappers import *
  5 | from gym_minigrid.window import Window
  6 | 
  7 | def redraw(img):
  8 |     if not args.agent_view:
  9 |         img = env.render('rgb_array', tile_size=args.tile_size)
 10 | 
 11 |     window.show_img(img)
 12 | 
 13 | def reset():
 14 |     if args.seed != -1:
 15 |         env.seed(args.seed)
 16 | 
 17 |     obs = env.reset()
 18 | 
 19 |     if hasattr(env, 'mission'):
 20 |         print('Mission: %s' % env.mission)
 21 |         window.set_caption(env.mission)
 22 | 
 23 |     redraw(obs)
 24 | 
 25 | def step(action):
 26 |     obs, reward, done, info = env.step(action)
 27 |     print('step=%s, reward=%.2f' % (env.step_count, reward))
 28 | 
 29 |     if done:
 30 |         print('done!')
 31 |         reset()
 32 |     else:
 33 |         redraw(obs)
 34 | 
 35 | def key_handler(event):
 36 |     print('pressed', event.key)
 37 | 
 38 |     if event.key == 'escape':
 39 |         window.close()
 40 |         return
 41 | 
 42 |     if event.key == 'backspace':
 43 |         reset()
 44 |         return
 45 | 
 46 |     if event.key == 'left':
 47 |         step(env.actions.left)
 48 |         return
 49 |     if event.key == 'right':
 50 |         step(env.actions.right)
 51 |         return
 52 |     if event.key == 'up':
 53 |         step(env.actions.forward)
 54 |         return
 55 | 
 56 |     # Spacebar
 57 |     if event.key == ' ':
 58 |         step(env.actions.toggle)
 59 |         return
 60 |     if event.key == 'pageup':
 61 |         step(env.actions.pickup)
 62 |         return
 63 |     if event.key == 'pagedown':
 64 |         step(env.actions.drop)
 65 |         return
 66 | 
 67 |     if event.key == 'enter':
 68 |         step(env.actions.done)
 69 |         return
 70 | 
 71 | parser = argparse.ArgumentParser()
 72 | parser.add_argument(
 73 |     "--env",
 74 |     help="gym environment to load",
 75 |     default='MiniGrid-MultiRoom-N6-v0'
 76 | )
 77 | parser.add_argument(
 78 |     "--seed",
 79 |     type=int,
 80 |     help="random seed to generate the environment with",
 81 |     default=-1
 82 | )
 83 | parser.add_argument(
 84 |     "--tile_size",
 85 |     type=int,
 86 |     help="size at which to render tiles",
 87 |     default=32
 88 | )
 89 | parser.add_argument(
 90 |     '--agent_view',
 91 |     default=False,
 92 |     help="draw the agent sees (partially observable view)",
 93 |     action='store_true'
 94 | )
 95 | 
 96 | args = parser.parse_args()
 97 | 
 98 | env = gym.make(args.env)
 99 | 
100 | if args.agent_view:
101 |     env = RGBImgPartialObsWrapper(env)
102 |     env = ImgObsWrapper(env)
103 | 
104 | window = Window('gym_minigrid - ' + args.env)
105 | window.reg_key_handler(key_handler)
106 | 
107 | reset()
108 | 
109 | # Blocking event loop
110 | window.show(block=True)
111 | 


--------------------------------------------------------------------------------
/babyai-text/gym-minigrid/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='gym_minigrid',
 5 |     version='1.0.1',
 6 |     keywords='memory, environment, agent, rl, openaigym, openai-gym, gym',
 7 |     url='https://github.com/maximecb/gym-minigrid',
 8 |     description='Minimalistic gridworld package for OpenAI Gym',
 9 |     packages=['gym_minigrid', 'gym_minigrid.envs'],
10 |     install_requires=[
11 |         'gym>=0.9.6',
12 |         'numpy>=1.15.0'
13 |     ]
14 | )
15 | 


--------------------------------------------------------------------------------
/babyai-text/images/babyai-text_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/babyai-text/images/babyai-text_schema.png


--------------------------------------------------------------------------------
/babyai-text/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='babyai_text',
 5 |     version='0.1.0',
 6 |     keywords='babyai, text environment',
 7 |     description='A text-only extension of BabyAI',
 8 |     packages=['babyai_text', 'babyai_text.levels'],
 9 |     install_requires=[
10 |         'colorama',
11 |         'termcolor',
12 |         'matplotlib',
13 |         'ipython',
14 |         'numpy==1.23.1'
15 |     ]
16 | )


--------------------------------------------------------------------------------
/docs/images/generalization_tests.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/docs/images/generalization_tests.png


--------------------------------------------------------------------------------
/docs/images/main_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/docs/images/main_schema.png


--------------------------------------------------------------------------------
/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/__init__.py


--------------------------------------------------------------------------------
/experiments/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/__pycache__/main.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/__pycache__/main.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/__pycache__/test_llm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/__pycache__/test_llm.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/__init__.py


--------------------------------------------------------------------------------
/experiments/agents/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/agents/base_agent.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | 
  3 | class BaseAgent(ABC):
  4 |     def __init__(self, envs):
  5 |         self.env = envs
  6 |         self.dict_translation_actions = {'turn left': "tourner à gauche",
  7 |                                          "turn right": "tourner à droite",
  8 |                                          "go forward": "aller tout droit",
  9 |                                          "pick up": "attraper",
 10 |                                          "drop": "lâcher",
 11 |                                          "toggle": "basculer",
 12 |                                          "eat": "manger",
 13 |                                          "dance": "dancer",
 14 |                                          "sleep": "dormir",
 15 |                                          "do nothing": "ne rien faire",
 16 |                                          "cut": "couper",
 17 |                                          "think": "penser"}
 18 | 
 19 |     @abstractmethod
 20 |     def generate_trajectories(self, dict_modifier, n_tests, language='english'):
 21 |         raise NotImplementedError()
 22 | 
 23 |     @abstractmethod
 24 |     def update_parameters(self):
 25 |         raise NotImplementedError()
 26 | 
 27 |     def generate_prompt(self, goal, subgoals, deque_obs, deque_actions):
 28 |         ldo = len(deque_obs)
 29 |         lda = len(deque_actions)
 30 | 
 31 |         head_prompt = "Possible action of the agent:"
 32 |         for sg in subgoals:
 33 |             head_prompt += " {},".format(sg)
 34 |         head_prompt = head_prompt[:-1]
 35 | 
 36 |         g = " \n Goal of the agent: {}".format(goal)
 37 |         obs = ""
 38 |         for i in range(ldo):
 39 |             obs += " \n Observation {}: ".format(i)
 40 |             for d_obs in deque_obs[i]:
 41 |                 obs += "{}, ".format(d_obs)
 42 |             obs += "\n Action {}: ".format(i)
 43 |             if i < lda:
 44 |                 obs += "{}".format(deque_actions[i])
 45 |         return head_prompt + g + obs
 46 | 
 47 |     def generate_prompt_french(self, goal, subgoals, deque_obs, deque_actions):
 48 |         ldo = len(deque_obs)
 49 |         lda = len(deque_actions)
 50 |         head_prompt = "Actions possibles pour l'agent:"
 51 |         for sg in subgoals:
 52 |             head_prompt += " {},".format(sg)
 53 |         head_prompt = head_prompt[:-1]
 54 | 
 55 |         # translate goal in French
 56 |         dict_translation_det = {"the": "la",
 57 |                            'a': 'une'}
 58 |         dict_translation_names = {"box": "boîte",
 59 |                              "ball": "balle",
 60 |                              "key": "clef"}
 61 |         dict_translation_adjs = {'red': 'rouge',
 62 |                             'green': 'verte',
 63 |                             'blue': 'bleue',
 64 |                             'purple': 'violette',
 65 |                             'yellow': 'jaune',
 66 |                             'grey': 'grise'}
 67 | 
 68 |         det = ''
 69 |         name = ''
 70 |         adj = ''
 71 | 
 72 |         for k in dict_translation_det.keys():
 73 |             if k in goal:
 74 |                 det = dict_translation_det[k]
 75 |         for k in dict_translation_names.keys():
 76 |             if k in goal:
 77 |                 name = dict_translation_names[k]
 78 |         for k in dict_translation_adjs.keys():
 79 |             if k in goal:
 80 |                 adj = dict_translation_adjs[k]
 81 |         translation_goal = 'aller à ' + det + ' ' + name + ' ' + adj
 82 | 
 83 |         g = " \n But de l'agent: {}".format(translation_goal)
 84 |         obs = ""
 85 |         for i in range(ldo):
 86 |             obs += " \n Observation {}: ".format(i)
 87 |             for d_obs in deque_obs[i]:
 88 |                 obs += "{}, ".format(d_obs)
 89 |             obs += "\n Action {}: ".format(i)
 90 |             if i < lda:
 91 |                 obs += "{}".format(deque_actions[i])
 92 |         return head_prompt + g + obs
 93 | 
 94 |     def prompt_modifier(self, prompt: str, dict_changes: dict) -> str:
 95 |         """use a dictionary of equivalence to modify the prompt accordingly
 96 |         ex:
 97 |         prompt= 'green box red box', dict_changes={'box':'tree'}
 98 |         promp_modifier(prompt, dict_changes)='green tree red tree' """
 99 | 
100 |         for key, value in dict_changes.items():
101 |             prompt = prompt.replace(key, value)
102 |         return prompt
103 | 
104 | 


--------------------------------------------------------------------------------
/experiments/agents/bot/__pycache__/bot.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/bot/__pycache__/bot.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/agents/bot/bot.py:
--------------------------------------------------------------------------------
 1 | from babyai.bot import Bot
 2 | from babyai.rl.utils import DictList
 3 | from collections import deque
 4 | from tqdm import tqdm
 5 | import numpy as np
 6 | 
 7 | from agents.base_agent import BaseAgent
 8 | 
 9 | class BotAgent(BaseAgent):
10 |     def __init__(self, envs, subgoals):
11 |         """An agent based on BabyAI's GOFAI bot."""
12 |         self.env = envs.envs[0]
13 |         self.subgoals = subgoals[0]
14 |         self.logs = {
15 |             "return_per_episode": [],
16 |         }
17 |         self.obs, self.infos = self.env.reset()
18 |         self.bot = Bot(self.env)
19 | 
20 |         self.obs_queue = deque([], maxlen=3)
21 |         self.acts_queue = deque([], maxlen=2)
22 | 
23 |         self.obs_queue.append(self.infos['descriptions'])
24 | 
25 |         self.prompts = []
26 |         self.actions = []
27 | 
28 |         self.log_done_counter = 0
29 | 
30 |     def act(self, action_choosen=None):
31 |         actions = self.bot.replan(action_choosen)
32 |         return actions
33 | 
34 |     def generate_trajectories(self, dict_modifier, n_tests, language='english'):
35 |         assert language == "english"
36 | 
37 |         nbr_frames = 1
38 |         pbar = tqdm(range(n_tests), ascii=" " * 9 + ">", ncols=100)
39 |         previous_action = None
40 |         while self.log_done_counter < n_tests:
41 |             nbr_frames += 1
42 |             prompt = self.prompt_modifier(self.generate_prompt(goal=self.obs['mission'], subgoals=self.subgoals,
43 |                                                           deque_obs=self.obs_queue,
44 |                                                           deque_actions=self.acts_queue), dict_modifier)
45 | 
46 |             action = self.act(previous_action)
47 |             # previous_action = action
48 |             self.actions.append(self.subgoals[int(action)])
49 |             self.acts_queue.append(self.subgoals[int(action)])
50 |             self.prompts.append(prompt)
51 | 
52 |             self.obs, reward, done, self.infos = self.env.step(action)
53 | 
54 |             if done:
55 |                 self.log_done_counter += 1
56 |                 pbar.update(1)
57 |                 self.logs["return_per_episode"].append(reward)
58 |                 self.obs_queue.clear()
59 |                 self.acts_queue.clear()
60 |                 self.obs, infos = self.env.reset()
61 |                 self.bot = Bot(self.env)
62 |             self.obs_queue.append(self.infos['descriptions'])
63 |         pbar.close()
64 | 
65 |         exps = DictList()
66 |         exps.prompts = np.array(self.prompts)
67 |         exps.actions = np.array(self.actions)
68 | 
69 |         self.logs["episodes_done"] = self.log_done_counter
70 |         self.logs["nbr_frames"] = nbr_frames
71 |         self.log_done_counter = 0
72 |         return exps, self.logs
73 | 
74 |     def update_parameters(self):
75 |         pass
76 | 
77 | 


--------------------------------------------------------------------------------
/experiments/agents/drrn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/__init__.py


--------------------------------------------------------------------------------
/experiments/agents/drrn/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/agents/drrn/__pycache__/drrn.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/__pycache__/drrn.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/agents/drrn/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/agents/drrn/model.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This code has been taken from https://github.com/microsoft/tdqn and modified to match our needs
 3 | '''
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | import itertools
 8 | from .utils.pad_sequences import pad_sequences
 9 | from .utils.memory import State
10 | 
11 | from accelerate import Accelerator
12 | 
13 | accelerator = Accelerator()
14 | device = accelerator.state.device
15 | 
16 | class DRRN(torch.nn.Module):
17 |     """
18 |         Deep Reinforcement Relevance Network - He et al. '16
19 | 
20 |     """
21 |     def __init__(self, vocab_size, embedding_dim, hidden_dim):
22 |         super(DRRN, self).__init__()
23 |         self.embedding    = nn.Embedding(vocab_size, embedding_dim)
24 |         self.obs_encoder  = nn.GRU(embedding_dim, hidden_dim)
25 |         self.act_encoder  = nn.GRU(embedding_dim, hidden_dim)
26 |         self.hidden       = nn.Linear(2*hidden_dim, hidden_dim)
27 |         self.act_scorer   = nn.Linear(hidden_dim, 1)
28 | 
29 | 
30 |     def packed_rnn(self, x, rnn):
31 |         """ Runs the provided rnn on the input x. Takes care of packing/unpacking.
32 | 
33 |             x: list of unpadded input sequences
34 |             Returns a tensor of size: len(x) x hidden_dim
35 |         """
36 |         lengths = torch.tensor([len(n) for n in x], dtype=torch.long, device=device)
37 |         # Sort this batch in descending order by seq length
38 |         lengths, idx_sort = torch.sort(lengths, dim=0, descending=True)
39 |         _, idx_unsort = torch.sort(idx_sort, dim=0)
40 |         idx_sort = torch.autograd.Variable(idx_sort)
41 |         idx_unsort = torch.autograd.Variable(idx_unsort)
42 |         padded_x = pad_sequences(x)
43 |         x_tt = torch.from_numpy(padded_x).type(torch.long).to(device)
44 |         x_tt = x_tt.index_select(0, idx_sort)
45 |         # Run the embedding layer
46 |         embed = self.embedding(x_tt).permute(1,0,2) # Time x Batch x EncDim
47 |         # Pack padded batch of sequences for RNN module
48 |         packed = nn.utils.rnn.pack_padded_sequence(embed, lengths.cpu())
49 |         # Run the RNN
50 |         out, _ = rnn(packed)
51 |         # Unpack
52 |         out, _ = nn.utils.rnn.pad_packed_sequence(out)
53 |         # Get the last step of each sequence
54 |         idx = (lengths-1).view(-1,1).expand(len(lengths), out.size(2)).unsqueeze(0)
55 |         out = out.gather(0, idx).squeeze(0)
56 |         # Unsort
57 |         out = out.index_select(0, idx_unsort)
58 |         return out
59 | 
60 | 
61 |     def forward(self, state_batch, act_batch):
62 |         """
63 |             Batched forward pass.
64 |             obs_id_batch: iterable of unpadded sequence ids
65 |             act_batch: iterable of lists of unpadded admissible command ids
66 | 
67 |             Returns a tuple of tensors containing q-values for each item in the batch
68 |         """
69 |         # Zip the state_batch into an easy access format
70 |         state = State(*zip(*state_batch))
71 |         # This is number of admissible commands in each element of the batch
72 |         act_sizes = [len(a) for a in act_batch]
73 |         # Combine next actions into one long list
74 |         act_batch = list(itertools.chain.from_iterable(act_batch))
75 |         act_out = self.packed_rnn(act_batch, self.act_encoder)
76 |         # Encode the various aspects of the state
77 |         state_out = self.packed_rnn(state.obs, self.obs_encoder)
78 |         # Expand the state to match the batches of actions
79 |         state_out = torch.cat([state_out[i].repeat(j,1) for i,j in enumerate(act_sizes)], dim=0)
80 |         z = torch.cat((state_out, act_out), dim=1) # Concat along hidden_dim
81 |         z = F.relu(self.hidden(z))
82 |         act_values = self.act_scorer(z).squeeze(-1)
83 |         # Split up the q-values by batch
84 |         return act_values.split(act_sizes)


--------------------------------------------------------------------------------
/experiments/agents/drrn/spm_models/unigram_8k.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/spm_models/unigram_8k.model


--------------------------------------------------------------------------------
/experiments/agents/drrn/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/utils/__init__.py


--------------------------------------------------------------------------------
/experiments/agents/drrn/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/agents/drrn/utils/__pycache__/memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/utils/__pycache__/memory.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/agents/drrn/utils/__pycache__/pad_sequences.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/drrn/utils/__pycache__/pad_sequences.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/agents/drrn/utils/memory.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | import numpy as np
  3 | import json
  4 | import sys
  5 | 
  6 | State = namedtuple('State', ('obs'))  #, 'description', 'inventory'))
  7 | Transition = namedtuple('Transition', ('state', 'act', 'reward', 'next_state', 'next_acts', 'done'))
  8 | 
  9 | 
 10 | def sample(rng: np.random.RandomState, data: list, k: int):
 11 |     """ Chooses k unique random elements from a list. """
 12 |     return [data[i] for i in rng.choice(len(data), k, replace=False)]
 13 | 
 14 | 
 15 | class ReplayMemory(object):
 16 |     def __init__(self, capacity, seed=20210824):
 17 |         self.capacity = capacity
 18 |         self.memory = []
 19 |         self.position = 0
 20 |         self.rng = np.random.RandomState(seed)
 21 | 
 22 |     def push(self, *args):
 23 |         if len(self.memory) < self.capacity:
 24 |             self.memory.append(None)
 25 |         self.memory[self.position] = Transition(*args)
 26 |         self.position = (self.position + 1) % self.capacity
 27 | 
 28 |     def sample(self, batch_size):
 29 |         return sample(self.rng, self.memory, batch_size)
 30 | 
 31 |     def __len__(self):
 32 |         return len(self.memory)
 33 | 
 34 | 
 35 | 
 36 | class PrioritizedReplayMemory(object):
 37 |     def __init__(self, capacity=100000, priority_fraction=0.0, seed=20210824):
 38 |         # Stored
 39 |         self.capacity = capacity
 40 |         self.priority_fraction = priority_fraction
 41 |         self.seed = seed
 42 |         
 43 |         # Calculated at init
 44 |         self.alpha_capacity = int(capacity * priority_fraction)
 45 |         self.beta_capacity = capacity - self.alpha_capacity
 46 | 
 47 |         # Declared
 48 |         self.alpha_memory, self.beta_memory = [], []
 49 |         self.alpha_position, self.beta_position = 0, 0
 50 | 
 51 |         # Initialized
 52 |         self.rng = np.random.RandomState(seed)
 53 | 
 54 |     def push(self, is_prior=False, *args):
 55 |         """Saves a transition."""
 56 |         if self.priority_fraction == 0.0:
 57 |             is_prior = False
 58 |         if is_prior:
 59 |             if len(self.alpha_memory) < self.alpha_capacity:
 60 |                 self.alpha_memory.append(None)
 61 |             self.alpha_memory[self.alpha_position] = Transition(*args)
 62 |             self.alpha_position = (self.alpha_position + 1) % self.alpha_capacity
 63 |         else:
 64 |             if len(self.beta_memory) < self.beta_capacity:
 65 |                 self.beta_memory.append(None)
 66 |             self.beta_memory[self.beta_position] = Transition(*args)
 67 |             self.beta_position = (self.beta_position + 1) % self.beta_capacity
 68 | 
 69 |     def sample(self, batch_size):
 70 |         if self.priority_fraction == 0.0:
 71 |             from_beta = min(batch_size, len(self.beta_memory))
 72 |             res = sample(self.rng, self.beta_memory, from_beta)
 73 |         else:
 74 |             from_alpha = min(int(self.priority_fraction * batch_size), len(self.alpha_memory))
 75 |             from_beta = min(batch_size - int(self.priority_fraction * batch_size), len(self.beta_memory))
 76 |             res = sample(self.rng, self.alpha_memory, from_alpha) + sample(self.rng, self.beta_memory, from_beta)
 77 | 
 78 |         self.rng.shuffle(res)
 79 |         return res
 80 | 
 81 |     def __len__(self):
 82 |         return len(self.alpha_memory) + len(self.beta_memory)
 83 | 
 84 |     def serializeToJSON(self, filenameOut):
 85 |         print("Serializing to JSON... ")
 86 |         sys.stdout.flush()
 87 | 
 88 |         packed = {
 89 |             "capacity": self.capacity,
 90 |             "priority_fraction": self.priority_fraction, 
 91 |             "alpha_memory": self.alpha_memory,
 92 |             "alpha_position": self.alpha_position,
 93 |             "beta_memory": self.beta_memory,
 94 |             "beta_position": self.beta_position,    
 95 |         }
 96 | 
 97 |         print(packed)
 98 |         sys.stdout.flush()
 99 | 
100 |         with open(filenameOut, 'w') as outfile:
101 |             outfile.write(json.dumps(packed, cls=NpEncoder, indent=2))
102 | 
103 |         print("Completed...")
104 |         sys.stdout.flush()
105 | 
106 | 
107 | class NpEncoder(json.JSONEncoder):
108 |     def default(self, obj):
109 |         if isinstance(obj, np.integer):
110 |             return int(obj)
111 |         if isinstance(obj, np.floating):
112 |             return float(obj)
113 |         if isinstance(obj, np.ndarray):
114 |             return obj.tolist()
115 |         if isinstance(obj, np.bool_):
116 |             return bool(obj)
117 |         return super(NpEncoder, self).default(obj)


--------------------------------------------------------------------------------
/experiments/agents/drrn/utils/pad_sequences.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def pad_sequences(sequences, maxlen=None, dtype='int32', value=0.):
 5 |     '''
 6 |     Partially borrowed from Keras
 7 |     # Arguments
 8 |         sequences: list of lists where each element is a sequence
 9 |         maxlen: int, maximum length
10 |         dtype: type to cast the resulting sequence.
11 |         value: float, value to pad the sequences to the desired value.
12 |     # Returns
13 |         x: numpy array with dimensions (number_of_sequences, maxlen)
14 |     '''
15 |     lengths = [len(s) for s in sequences]
16 |     nb_samples = len(sequences)
17 |     if maxlen is None:
18 |         maxlen = np.max(lengths)
19 |     # take the sample shape from the first non empty sequence
20 |     # checking for consistency in the main loop below.
21 |     sample_shape = tuple()
22 |     for s in sequences:
23 |         if len(s) > 0:
24 |             sample_shape = np.asarray(s).shape[1:]
25 |             break
26 |     x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
27 |     for idx, s in enumerate(sequences):
28 |         if len(s) == 0:
29 |             continue  # empty list was found
30 |         # pre truncating
31 |         trunc = s[-maxlen:]
32 |         # check `trunc` has expected shape
33 |         trunc = np.asarray(trunc, dtype=dtype)
34 |         if trunc.shape[1:] != sample_shape:
35 |             raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' %
36 |                              (trunc.shape[1:], idx, sample_shape))
37 |         # post padding
38 |         x[idx, :len(trunc)] = trunc
39 |     return x
40 | 


--------------------------------------------------------------------------------
/experiments/agents/ppo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/ppo/__init__.py


--------------------------------------------------------------------------------
/experiments/agents/ppo/base_ppo_agent.py:
--------------------------------------------------------------------------------
 1 | from agents.base_agent import BaseAgent
 2 | 
 3 | from babyai.rl.utils.supervised_losses import ExtraInfoCollector
 4 | 
 5 | import torch
 6 | 
 7 | class BasePPOAgent(BaseAgent):
 8 |     def __init__(self, envs, num_frames_per_proc, discount, lr, gae_lambda, entropy_coef, value_loss_coef,
 9 |                  max_grad_norm, reshape_reward, aux_info, device):
10 |         """
11 |         Initializes a `BaseAlgo` instance.
12 | 
13 |         Parameters:
14 |         ----------
15 |         envs : list
16 |             a list of environments that will be run in parallel
17 |         num_frames_per_proc : int
18 |             the number of frames collected by every process for an update
19 |         discount : float
20 |             the discount for future rewards
21 |         lr : float
22 |             the learning rate for optimizers
23 |         gae_lambda : float
24 |             the lambda coefficient in the GAE formula
25 |             ([Schulman et al., 2015](https://arxiv.org/abs/1506.02438))
26 |         entropy_coef : float
27 |             the weight of the entropy cost in the final objective
28 |         value_loss_coef : float
29 |             the weight of the value loss in the final objective
30 |         max_grad_norm : float
31 |             gradient will be clipped to be at most this value
32 |         reshape_reward : function
33 |             a function that shapes the reward, takes an
34 |             (observation, action, reward, done) tuple as an input
35 |         aux_info : list
36 |             a list of strings corresponding to the name of the extra information
37 |             retrieved from the environment for supervised auxiliary losses
38 | 
39 |         """
40 |         super().__init__(envs)
41 |         self.num_frames_per_proc = num_frames_per_proc
42 |         self.discount = discount
43 |         self.lr = lr
44 |         self.gae_lambda = gae_lambda
45 |         self.entropy_coef = entropy_coef
46 |         self.value_loss_coef = value_loss_coef
47 |         self.max_grad_norm = max_grad_norm
48 |         self.reshape_reward = reshape_reward
49 |         self.aux_info = aux_info
50 | 
51 |         # Store helpers values
52 |         self.device = device
53 |         self.num_procs = len(envs)
54 |         self.num_frames = self.num_frames_per_proc * self.num_procs
55 | 
56 |         # Initialize experience values
57 |         shape = (self.num_frames_per_proc, self.num_procs)
58 |         self.obss = [None] * (shape[0])
59 | 
60 |         self.mask = torch.ones(shape[1], device=self.device)
61 |         self.masks = torch.zeros(*shape, device=self.device)
62 | 
63 |         self.values = torch.zeros(*shape, device=self.device)
64 |         self.rewards = torch.zeros(*shape, device=self.device)
65 |         self.rewards_bonus = torch.zeros(*shape, device=self.device)
66 |         self.advantages = torch.zeros(*shape, device=self.device)
67 |         self.log_probs = torch.zeros(*shape, device=self.device)
68 | 
69 |         if self.aux_info:
70 |             self.aux_info_collector = ExtraInfoCollector(self.aux_info, shape, self.device)
71 | 
72 |         # Initialize log values
73 |         self.log_episode_return = torch.zeros(self.num_procs, device=self.device)
74 |         self.log_episode_reshaped_return = torch.zeros(self.num_procs, device=self.device)
75 |         self.log_episode_reshaped_return_bonus = torch.zeros(self.num_procs, device=self.device)
76 |         self.log_episode_num_frames = torch.zeros(self.num_procs, device=self.device)
77 | 
78 |         self.log_done_counter = 0
79 |         self.log_return = [0] * self.num_procs
80 |         self.log_reshaped_return = [0] * self.num_procs
81 |         self.log_reshaped_return_bonus = [0] * self.num_procs
82 |         self.log_num_frames = [0] * self.num_procs
83 | 


--------------------------------------------------------------------------------
/experiments/agents/random_agent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/random_agent/__init__.py


--------------------------------------------------------------------------------
/experiments/agents/random_agent/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/random_agent/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/agents/random_agent/__pycache__/random_agent.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/agents/random_agent/__pycache__/random_agent.cpython-310.pyc


--------------------------------------------------------------------------------
/experiments/agents/random_agent/random_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from tqdm import tqdm
 3 | 
 4 | from agents.base_agent import BaseAgent
 5 | 
 6 | class Random_agent(BaseAgent):
 7 |     def __init__(self, envs, subgoals):
 8 |         super().__init__(envs)
 9 |         self.env.reset()
10 |         self.subgoals = subgoals
11 |         self.returns = [0 for _ in range(self.env.num_envs)]
12 |         self.logs = {
13 |             "return_per_episode": [],
14 |         }
15 | 
16 |     def generate_trajectories(self, dict_modifier, n_tests, language='english'):
17 |         episodes_done = 0
18 |         pbar = tqdm(range(n_tests), ascii=" " * 9 + ">", ncols=100)
19 |         while episodes_done < n_tests:
20 |             actions = np.random.randint(low=0, high=len(self.subgoals[0]), size=(self.env.num_envs,))
21 | 
22 |             if len(self.subgoals[0]) > 6:
23 |                 # only useful when we test the impact of the number of actions
24 |                 real_a = np.copy(actions)
25 |                 real_a[real_a > 6] = 6
26 |                 obs, rewards, dones, infos = self.env.step(real_a)
27 |             else:
28 |                 obs, rewards, dones, infos = self.env.step(actions)
29 | 
30 |             for j in range(self.env.num_envs):
31 |                 self.returns[j] += rewards[j]
32 |                 if dones[j]:
33 |                     episodes_done += 1
34 |                     pbar.update(1)
35 |                     self.logs["return_per_episode"].append(self.returns[j])
36 |                     self.returns[j] = 0
37 |         pbar.close()
38 | 
39 |         self.logs["episodes_done"] = episodes_done
40 |         return None, self.logs
41 | 
42 |     def update_parameters(self):
43 |         pass
44 | 


--------------------------------------------------------------------------------
/experiments/campaign/Mixed_BC_finetuning/bc_finetuning_Flan-T5_large.slurm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=bc_finetuning_Flan-T5_large_seed_%a  # job name
 3 | #SBATCH --time=04:00:00  # maximum execution time (HH:MM:SS)
 4 | #SBATCH --output=slurm_logs/bc_finetuning_Flan-T5_large_seed_%a-%j.out # output
 5 | #SBATCH --error=slurm_logs/bc_finetuning_Flan-T5_large_seed_%a-%j.err # err
 6 | #SBATCH --account= # SLURM ACCOUNT
 7 | #SBATCH --qos=qos_gpu-t3
 8 | #SBATCH -C a100
 9 | #SBATCH --gres=gpu:8
10 | #SBATCH --cpus-per-task=64
11 | #SBATCH --hint=nomultithread
12 | #SBATCH --ntasks-per-node=1
13 | #SBATCH --nodes=1
14 | 
15 | #SBATCH --array=1-2
16 | 
17 | module purge
18 | module load python/3.8.2
19 | conda activate dlp
20 | 
21 | chmod +x experiments/slurm/accelerate_launcher.sh
22 | 
23 | srun experiments/slurm/accelerate_launcher.sh \
24 |     --config_file $WORK/Grounding_LLMs/experiments/configs/accelerate/default_config.yaml \
25 |     --multi_gpu \
26 |     --num_processes 8 \
27 |     --num_machines 1 \
28 |     experiments/clm_behavioral-cloning.py \
29 |     --output_dir=$WORK/Grounding_LLMs/storage/logs/bc_finetuning_Flan-T5_large_seed_${SLURM_ARRAY_TASK_ID} \
30 |     --model_dir=$SCRATCH/Grounding_LLMs/llms/flan-t5-large \
31 |     --data_dir=$WORK/Grounding_LLMs/storage/logs/GFlan-T5_large_GoToLocal_seed_${SLURM_ARRAY_TASK_ID}/test/BabyAI-GoToLocal-v0/return_per_episode \
32 |     --per_device_batch_size=8 \
33 |     --gradient_accumulation_steps=1 \
34 |     --seed=${SLURM_ARRAY_TASK_ID}
35 |                     
36 | 


--------------------------------------------------------------------------------
/experiments/campaign/Mixed_BC_finetuning/bc_finetuning_from-bot_Flan-T5_large.slurm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=bc_finetuning_from-bot_Flan-T5_large_seed_%a  # job name
 3 | #SBATCH --time=04:00:00  # maximum execution time (HH:MM:SS)
 4 | #SBATCH --output=slurm_logs/bc_finetuning_from-bot_Flan-T5_large_seed_%a-%j.out # output
 5 | #SBATCH --error=slurm_logs/bc_finetuning_from-bot_Flan-T5_large_seed_%a-%j.err # err
 6 | #SBATCH --account= # SLURM ACCOUNT
 7 | #SBATCH --qos=qos_gpu-t3
 8 | #SBATCH -C a100
 9 | #SBATCH --gres=gpu:8
10 | #SBATCH --cpus-per-task=64
11 | #SBATCH --hint=nomultithread
12 | #SBATCH --ntasks-per-node=1
13 | #SBATCH --nodes=1
14 | 
15 | #SBATCH --array=1-2
16 | 
17 | module purge
18 | module load python/3.8.2
19 | conda activate dlp
20 | 
21 | chmod +x experiments/slurm/accelerate_launcher.sh
22 | 
23 | srun experiments/slurm/accelerate_launcher.sh \
24 |     --config_file $WORK/Grounding_LLMs/experiments/configs/accelerate/default_config.yaml \
25 |     --multi_gpu \
26 |     --num_processes 8 \
27 |     --num_machines 1 \
28 |     experiments/clm_behavioral-cloning.py \
29 |     --output_dir=$WORK/Grounding_LLMs/storage/logs/bc_finetuning_Flan-T5_large_seed_${SLURM_ARRAY_TASK_ID} \
30 |     --model_dir=$SCRATCH/Grounding_LLMs/llms/flan-t5-large \
31 |     --data_dir=$WORK/Grounding_LLMs/storage/logs/bot_GoToLocal_seed_${SLURM_ARRAY_TASK_ID}/test/BabyAI-GoToLocal-v0/return_per_episode \
32 |     --per_device_batch_size=8 \
33 |     --gradient_accumulation_steps=1 \
34 |     --seed=${SLURM_ARRAY_TASK_ID} \
35 |     --file_name=bot_trajectories \
36 |     --file_id=1
37 |                     
38 | 


--------------------------------------------------------------------------------
/experiments/campaign/Mixed_tests_no-change/GFlan-T5_large.slurm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=test_GFlan-T5_large_seed_%a    # job name
 3 | #SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS)
 4 | #SBATCH --output=slurm_logs/test_GFlan-T5_large_seed_%a-%j.out     # output file name
 5 | #SBATCH --error=slurm_logs/test_GFlan-T5_large_seed_%a-%j.err      # err file name
 6 | #SBATCH --account= # SLURM ACCOUNT
 7 | #SBATCH --qos=qos_gpu-t3
 8 | #SBATCH -C a100
 9 | #SBATCH --gres=gpu:8
10 | #SBATCH --cpus-per-task=32
11 | #SBATCH --hint=nomultithread
12 | #SBATCH --ntasks-per-node=1
13 | #SBATCH --nodes=1
14 | 
15 | #SBATCH --array=1-2
16 | 
17 | module purge
18 | module load python/3.8.2
19 | conda activate dlp
20 | 
21 | chmod +x experiments/slurm/launcher.sh
22 | 
23 | srun experiments/slurm/launcher.sh \
24 |                     rl_script_args.path=$WORK/Grounding_LLMs/experiments/post-training_tests.py \
25 |                     rl_script_args.seed=${SLURM_ARRAY_TASK_ID} \
26 |                     rl_script_args.number_envs=32 \
27 |                     rl_script_args.number_episodes=1000 \
28 |                     rl_script_args.action_space=["turn_left","turn_right","go_forward","pick_up","drop","toggle"] \
29 |                     rl_script_args.saving_path_logs=$WORK/Grounding_LLMs/storage/logs \
30 |                     rl_script_args.name_experiment='llm_mtrl' \
31 |                     rl_script_args.name_model='Flan_T5large' \
32 |                     rl_script_args.name_environment='BabyAI-MixedTrainLocal-v0' \
33 |                     rl_script_args.zero_shot=False \
34 |                     rl_script_args.saving_path_model=$SCRATCH/Grounding_LLMs/models \
35 |                     lamorel_args.llm_args.model_type=seq2seq \
36 |                     lamorel_args.llm_args.model_path=$SCRATCH/Grounding_LLMs/llms/flan-t5-large \
37 |                     lamorel_args.llm_args.parallelism.model_parallelism_size=2 \
38 |                     lamorel_args.llm_args.minibatch_size=3 \
39 |                     lamorel_args.accelerate_args.num_machines=1 \
40 |                     --config-path=$WORK/Grounding_LLMs/experiments/configs \
41 |                     --config-name=multi-node_slurm_cluster_config 
42 | 


--------------------------------------------------------------------------------
/experiments/campaign/Mixed_training/DRRN.slurm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --time=10:00:00
 3 | #SBATCH --account= # SLURM ACCOUNT
 4 | #SBATCH --job-name=DRRN_seed_%a
 5 | #SBATCH -o slurm_logs/DRRN_seed_%a.out
 6 | #SBATCH -e slurm_logs/DRRN_seed_%a.err
 7 | #SBATCH --ntasks-per-node=1
 8 | #SBATCH --nodes=1
 9 | #SBATCH --cpus-per-task=20
10 | #SBATCH --gres=gpu:1
11 | #SBATCH --hint=nomultithread
12 | #SBATCH --array=1-2
13 | #SBATCH --qos=qos_gpu-t3
14 | #SBATCH -C v100-32g
15 | 
16 | module purge
17 | module load python/3.8.2
18 | conda activate dlp
19 | 
20 | srun python experiments/train_language_agent.py \
21 | 		    rl_script_args.seed=${SLURM_ARRAY_TASK_ID} \
22 |             rl_script_args.number_envs=32 \
23 |             rl_script_args.num_steps=1500000 \
24 |             rl_script_args.action_space=["turn_left","turn_right","go_forward","pick_up","drop","toggle"] \
25 |             rl_script_args.saving_path_logs=$WORK/Grounding_LLMs/storage/logs \
26 |             rl_script_args.name_experiment='drrn_mtrl' \
27 |             rl_script_args.name_model='DRRN' \
28 |             rl_script_args.name_environment='BabyAI-MixedTrainLocal-v0' \
29 |             rl_script_args.saving_path_model=$SCRATCH/Grounding_LLMs/models \
30 |             rl_script_args.spm_path=$SCRATCH/Grounding_LLMs/experiments/agents/drrn/spm_models/unigram_8k.model \
31 |             lamorel_args.distributed_setup_args.n_llm_processes=0 \
32 |             --config-path=$WORK/Grounding_LLMs/experiments/configs \
33 |             --config-name=multi-node_slurm_cluster_config
34 | 


--------------------------------------------------------------------------------
/experiments/campaign/Mixed_training/GFlan-T5_large.slurm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=GFlan-T5_large_seed_%a    # job name
 3 | #SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS)
 4 | #SBATCH --output=slurm_logs/GFlan-T5_large_seed_%a-%j.out     # output file name
 5 | #SBATCH --error=slurm_logs/GFlan-T5_large_seed_%a-%j.err      # err file name
 6 | #SBATCH --account= # SLURM ACCOUNT
 7 | #SBATCH --qos=qos_gpu-t3
 8 | #SBATCH -C a100
 9 | #SBATCH --gres=gpu:8
10 | #SBATCH --cpus-per-task=32
11 | #SBATCH --hint=nomultithread
12 | #SBATCH --ntasks-per-node=1
13 | #SBATCH --nodes=1
14 | 
15 | #SBATCH --array=1-2
16 | 
17 | module purge
18 | module load python/3.8.2
19 | conda activate dlp
20 | 
21 | chmod +x experiments/slurm/launcher.sh
22 | 
23 | srun experiments/slurm/launcher.sh \
24 |                     rl_script_args.path=$WORK/Grounding_LLMs/experiments/train_language_agent.py \
25 |                     rl_script_args.seed=${SLURM_ARRAY_TASK_ID} \
26 |                     rl_script_args.number_envs=32 \
27 |                     rl_script_args.num_steps=1500000 \
28 |                     rl_script_args.action_space=["turn_left","turn_right","go_forward","pick_up","drop","toggle"] \
29 |                     rl_script_args.saving_path_logs=$WORK/Grounding_LLMs/storage/logs \
30 |                     rl_script_args.name_experiment='llm_mtrl' \
31 |                     rl_script_args.name_model='Flan_T5large' \
32 |                     rl_script_args.name_environment='BabyAI-MixedTrainLocal-v0' \
33 |                     rl_script_args.template_test=1 \
34 |                     rl_script_args.saving_path_model=$SCRATCH/Grounding_LLMs/models \
35 |                     lamorel_args.llm_args.model_type=seq2seq \
36 |                     lamorel_args.llm_args.model_path=$SCRATCH/Grounding_LLMs/llms/flan-t5-large \
37 |                     lamorel_args.llm_args.parallelism.model_parallelism_size=2 \
38 |                     lamorel_args.llm_args.minibatch_size=3 \
39 |                     lamorel_args.accelerate_args.num_machines=1 \
40 |                     --config-path=$WORK/Grounding_LLMs/experiments/configs \
41 |                     --config-name=multi-node_slurm_cluster_config 
42 | 


--------------------------------------------------------------------------------
/experiments/campaign/Mixed_training/NPAE-Flan-T5_large.slurm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=NPAE-Flan-T5_large_seed_%a    # job name
 3 | #SBATCH --time=20:00:00 # maximum execution time (HH:MM:SS)
 4 | #SBATCH --output=slurm_logs/NPAE-Flan-T5_large_seed_%a-%j.out     # output file name
 5 | #SBATCH --error=slurm_logs/NPAE-Flan-T5_large_seed_%a-%j.err      # err file name
 6 | #SBATCH --account= # SLURM ACCOUNT
 7 | #SBATCH --qos=qos_gpu-t3
 8 | #SBATCH -C a100
 9 | #SBATCH --gres=gpu:8
10 | #SBATCH --cpus-per-task=32
11 | #SBATCH --hint=nomultithread
12 | #SBATCH --ntasks-per-node=1
13 | #SBATCH --nodes=1
14 | 
15 | #SBATCH --array=1-2
16 | 
17 | module purge
18 | module load python/3.8.2
19 | conda activate dlp
20 | 
21 | chmod +x experiments/slurm/launcher.sh
22 | 
23 | srun experiments/slurm/launcher.sh \
24 |                     rl_script_args.path=$WORK/Grounding_LLMs/experiments/train_language_agent.py \
25 |                     rl_script_args.seed=${SLURM_ARRAY_TASK_ID} \
26 |                     rl_script_args.number_envs=32 \
27 |                     rl_script_args.num_steps=1500000 \
28 |                     rl_script_args.action_space=["turn_left","turn_right","go_forward","pick_up","drop","toggle"] \
29 |                     rl_script_args.saving_path_logs=$WORK/Grounding_LLMs/storage/logs \
30 |                     rl_script_args.name_experiment='llm_mtrl' \
31 |                     rl_script_args.name_model='Flan_T5large' \
32 |                     rl_script_args.name_environment='BabyAI-MixedTrainLocal-v0' \
33 |                     rl_script_args.template_test=1 \
34 |                     rl_script_args.saving_path_model=$SCRATCH/Grounding_LLMs/models \
35 |                     rl_script_args.load_embedding=true \
36 |                     rl_script_args.use_action_heads=true \
37 |                     lamorel_args.llm_args.model_type=seq2seq \
38 |                     lamorel_args.llm_args.model_path=$SCRATCH/Grounding_LLMs/llms/flan-t5-large \
39 |                     lamorel_args.llm_args.pretrained=false \
40 |                     lamorel_args.llm_args.parallelism.model_parallelism_size=2 \
41 |                     lamorel_args.llm_args.minibatch_size=3 \
42 |                     lamorel_args.accelerate_args.num_machines=1 \
43 |                     --config-path=$WORK/Grounding_LLMs/experiments/configs \
44 |                     --config-name=multi-node_slurm_cluster_config 
45 | 


--------------------------------------------------------------------------------
/experiments/campaign/Mixed_training/Symbolic-PPO.slurm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --time=10:00:00
 3 | #SBATCH --account= # SLURM ACCOUNT
 4 | #SBATCH --job-name=Symbolic-PPO_seed_%a
 5 | #SBATCH --ntasks-per-node=1
 6 | #SBATCH --nodes=1
 7 | #SBATCH --cpus-per-task=20
 8 | #SBATCH --gres=gpu:1
 9 | #SBATCH --hint=nomultithread
10 | #SBATCH -o slurm_logs/Symbolic-PPO_seed_%a.out
11 | #SBATCH -e slurm_logs/Symbolic-PPO_seed_%a.err
12 | #SBATCH --array=1-2
13 | #SBATCH --qos=qos_gpu-t3
14 | #SBATCH -C v100-32g
15 | 
16 | module purge
17 | module load python/3.8.2
18 | conda activate dlp
19 | 
20 | srun experiments/slurm/train_symbolic_ppo.sh BabyAI-MixedTrainLocal-v0 MTRL 6 ${SLURM_ARRAY_TASK_ID}
21 | 


--------------------------------------------------------------------------------
/experiments/configs/accelerate/default_config.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | deepspeed_config: { }
 3 | distributed_type: MULTI_GPU
 4 | fsdp_config: { }
 5 | machine_rank: 0
 6 | main_process_ip: 127.0.0.1
 7 | main_process_port: 12345
 8 | main_training_function: main
 9 | mixed_precision: 'no'
10 | num_machines: 1
11 | num_processes: 2
12 | use_cpu: false


--------------------------------------------------------------------------------
/experiments/configs/local_gpu_config.yaml:
--------------------------------------------------------------------------------
 1 | lamorel_args:
 2 |   log_level: info
 3 |   allow_subgraph_use_whith_gradient: true
 4 |   distributed_setup_args:
 5 |     n_rl_processes: 1
 6 |     n_llm_processes: 1
 7 |   accelerate_args:
 8 |     config_file: accelerate/default_config.yaml
 9 |     machine_rank: 0
10 |     num_machines: 2
11 |   llm_args:
12 |     model_type: seq2seq
13 |     model_path: t5-small
14 |     pretrained: true
15 |     minibatch_size: 4
16 |     pre_encode_inputs: true
17 |     parallelism:
18 |       use_gpu: false
19 |       model_parallelism_size: 1
20 |       synchronize_gpus_after_scoring: false
21 |       empty_cuda_cache_after_scoring: false
22 | rl_script_args:
23 |   path: ???
24 |   seed: 1
25 |   number_envs: 2
26 |   num_steps: 1000
27 |   max_episode_steps: 3
28 |   frames_per_proc: 40
29 |   reward_shaping_beta: 0
30 |   discount: 0.99
31 |   lr: 1e-6
32 |   beta1: 0.9
33 |   beta2: 0.999
34 |   gae_lambda: 0.99
35 |   entropy_coef: 0.01
36 |   value_loss_coef: 0.5
37 |   max_grad_norm: 0.5
38 |   adam_eps: 1e-5
39 |   clip_eps: 0.2
40 |   epochs: 4
41 |   batch_size: 16
42 |   action_space: ["turn_left","turn_right","go_forward","pick_up","drop","toggle"]
43 |   saving_path_logs: ???
44 |   name_experiment: 'llm_mtrl'
45 |   name_model: 'T5small'
46 |   saving_path_model: ???
47 |   name_environment: 'BabyAI-MixedTestLocal-v0'
48 |   number_episodes: 10
49 |   language: 'english'
50 |   load_embedding: true
51 |   use_action_heads: false
52 |   template_test: 1
53 |   zero_shot: true
54 |   modified_action_space: false
55 |   new_action_space: #["rotate_left","rotate_right","move_ahead","take","release","switch"]
56 |   spm_path: "YOUR_PATH_TO_PROJECT/experiments/agents/drrn/spm_models/unigram_8k.model"
57 |   random_agent: true
58 |   get_example_trajectories: false
59 |   nbr_obs: 3
60 |   im_learning: false
61 |   im_path: ""
62 |   bot: false
63 | 


--------------------------------------------------------------------------------
/experiments/configs/multi-node_slurm_cluster_config.yaml:
--------------------------------------------------------------------------------
 1 | lamorel_args:
 2 |   log_level: info
 3 |   allow_subgraph_use_whith_gradient: true
 4 |   distributed_setup_args:
 5 |     n_rl_processes: 1
 6 |     n_llm_processes: 4
 7 |   accelerate_args:
 8 |     config_file: accelerate/default_config.yaml
 9 |     machine_rank: 0
10 |     num_machines: ???
11 |     num_processes: ???
12 |     main_process_ip: ???
13 |     main_process_port: 12345
14 |   llm_args:
15 |     model_type: ???
16 |     model_path: ???
17 |     pretrained: true
18 |     minibatch_size: ???
19 |     pre_encode_inputs: true
20 |     parallelism:
21 |       use_gpu: true
22 |       model_parallelism_size: ???
23 |       synchronize_gpus_after_scoring: false
24 |       empty_cuda_cache_after_scoring: false
25 |   updater_args:
26 | rl_script_args:
27 |   path: ???
28 |   seed: ???
29 |   number_envs: ???
30 |   num_steps: ???
31 |   max_episode_steps: 3
32 |   frames_per_proc: 40
33 |   reward_shaping_beta: 0
34 |   discount: 0.99
35 |   lr: 1e-6
36 |   beta1: 0.9
37 |   beta2: 0.999
38 |   gae_lambda: 0.99
39 |   entropy_coef: 0.01
40 |   value_loss_coef: 0.5
41 |   max_grad_norm: 0.5
42 |   adam_eps: 1e-5
43 |   clip_eps: 0.2
44 |   epochs: 4
45 |   batch_size: 64
46 |   action_space: ???
47 |   saving_path_logs: ???
48 |   name_experiment: ???
49 |   name_model: ???
50 |   saving_path_model: ???
51 |   name_environment: ???
52 |   nbr_obs: 3
53 |   language: 'english'
54 |   load_embedding: false
55 |   use_action_heads: false
56 |   template_test: 1
57 |   spm_path: ''
58 | 


--------------------------------------------------------------------------------
/experiments/configs/multi-node_slurm_cluster_config_test.yaml:
--------------------------------------------------------------------------------
 1 | lamorel_args:
 2 |   log_level: info
 3 |   allow_subgraph_use_whith_gradient: false
 4 |   distributed_setup_args:
 5 |     n_rl_processes: 1
 6 |     n_llm_processes: 4
 7 |   accelerate_args:
 8 |     config_file: accelerate/default_config.yaml
 9 |     machine_rank: 0
10 |     num_machines: ???
11 |     num_processes: ???
12 |     main_process_ip: ???
13 |   llm_args:
14 |     model_type: ???
15 |     model_path: ???
16 |     pretrained: true
17 |     minibatch_size: ???
18 |     pre_encode_inputs: true
19 |     parallelism:
20 |       use_gpu: true
21 |       model_parallelism_size: ???
22 |       synchronize_gpus_after_scoring: false
23 |       empty_cuda_cache_after_scoring: false
24 |   updater_args:
25 | rl_script_args:
26 |   path: ???
27 |   seed: ???
28 |   number_envs: ???
29 |   num_steps: 100
30 |   max_episode_steps: 3
31 |   frames_per_proc: 40
32 |   reward_shaping_beta: 0
33 |   discount: 0.99
34 |   lr: 1e-6
35 |   beta1: 0.9
36 |   beta2: 0.999
37 |   gae_lambda: 0.99
38 |   entropy_coef: 0.01
39 |   value_loss_coef: 0.5
40 |   max_grad_norm: 0.5
41 |   adam_eps: 1e-5
42 |   clip_eps: 0.2
43 |   epochs: 4
44 |   batch_size: 64
45 |   action_space: ???
46 |   saving_path_logs: ???
47 |   name_experiment: ???
48 |   name_model: ???
49 |   saving_path_model: ???
50 |   name_environment: ???
51 |   nbr_obs: 3
52 |   number_episodes: ???
53 |   zero_shot: ???
54 |   language: 'english'
55 |   modified_action_space: false
56 |   new_action_space: []
57 |   spm_path: ???
58 |   random_agent: false
59 |   im_learning: false
60 |   im_path: ???
61 |   get_example_trajectories: false
62 |   bot: false
63 | 


--------------------------------------------------------------------------------
/experiments/plot_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flowersteam/Grounding_LLMs_with_online_RL/31847fb8c1db58bc8f595bea8f8cbaf26fb79dc2/experiments/plot_utils/__init__.py


--------------------------------------------------------------------------------
/experiments/slurm/accelerate_launcher.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | sed -n "1p")
4 | echo "running on node $(hostname)"
5 | accelerate launch --machine_rank $SLURM_PROCID --main_process_ip $MASTER_ADDR --main_process_port 13370 $*


--------------------------------------------------------------------------------
/experiments/slurm/lamorel_launcher.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | sed -n "1p")
4 | echo "running process ${SLURM_PROCID} on node $(hostname) with master ${MASTER_ADDR}"
5 | export "DLP_STORAGE"='storage'
6 | python -m lamorel_launcher.launch lamorel_args.accelerate_args.machine_rank=$SLURM_PROCID lamorel_args.accelerate_args.main_process_ip=$MASTER_ADDR $*


--------------------------------------------------------------------------------
/experiments/slurm/train_symbolic_ppo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | export BABYAI_STORAGE='storage'
 3 | export DLP_STORAGE='storage'
 4 | python -m experiments.train_symbolic_ppo \
 5 | --arch expert_filmcnn \
 6 | --env $1 \
 7 | --hrl vanilla \
 8 | --log-interval 1 --save-interval 15  --val-interval 15 --val-episodes 128 \
 9 | --procs 64 --frames-per-proc 40 --recurrence 20 \
10 | --seed $4 \
11 | --number-actions $3 \
12 | --frames 400000 \
13 | --model $2-nbr_actions-$3-PPO-NoPre-$4 \
14 | #--wb
15 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | sentencepiece>=0.1.91
 2 | tensorboard==2.7.0
 3 | tensorboard-data-server==0.6.1
 4 | tensorboard-plugin-wit==1.8.0
 5 | tensorboardX==1.8
 6 | torch>1.8.1
 7 | protobuf==3.20.*
 8 | pyyaml
 9 | transformers
10 | accelerate
11 | scipy
12 | openai
13 | matplotlib
14 | colorama
15 | termcolor
16 | imageio
17 | wandb
18 | ipython
19 | tqdm==4.64.0
20 | datasets
21 | 


--------------------------------------------------------------------------------