├── .gitignore
├── LICENSE
├── README.md
├── bin
    └── examine.py
├── examples
    ├── blueprint.jsonnet
    ├── blueprint.npz
    ├── hide_and_seek_full.jsonnet
    ├── hide_and_seek_full.npz
    ├── hide_and_seek_policy_phases
    │   ├── a_chasing.npz
    │   ├── b_forts.npz
    │   ├── c_ramps.npz
    │   ├── d_ramp_defense.npz
    │   └── e_box_surfing.npz
    ├── hide_and_seek_quadrant.jsonnet
    ├── hide_and_seek_quadrant.npz
    ├── hide_and_seek_quadrant_physics_exploits.npz
    ├── lock_and_return.jsonnet
    ├── lock_and_return.npz
    ├── sequential_lock.jsonnet
    ├── sequential_lock.npz
    ├── shelter.jsonnet
    ├── shelter.npz
    └── test_all_policies.py
├── ma_policy
    ├── graph_construct.py
    ├── layers.py
    ├── load_policy.py
    ├── ma_policy.py
    ├── normalizers.py
    ├── util.py
    └── variable_schema.py
├── mae_envs
    ├── __init__.py
    ├── envs
    │   ├── __init__.py
    │   ├── base.py
    │   ├── blueprint_construction.py
    │   ├── box_locking.py
    │   ├── hide_and_seek.py
    │   └── shelter_construction.py
    ├── modules
    │   ├── __init__.py
    │   ├── agents.py
    │   ├── construction_sites.py
    │   ├── food.py
    │   ├── module.py
    │   ├── objects.py
    │   ├── util.py
    │   ├── walls.py
    │   └── world.py
    ├── util
    │   ├── geometry.py
    │   ├── transforms.py
    │   └── vision.py
    ├── viewer
    │   ├── __init__.py
    │   ├── env_viewer.py
    │   └── policy_viewer.py
    └── wrappers
    │   ├── food.py
    │   ├── lidar.py
    │   ├── limit_mvmnt.py
    │   ├── line_of_sight.py
    │   ├── manipulation.py
    │   ├── multi_agent.py
    │   ├── prep_phase.py
    │   ├── team.py
    │   └── util.py
├── randomized_uncertain_social_preferences
    ├── rusp
    │   ├── README.md
    │   ├── __init__.py
    │   ├── abstract_base_env.py
    │   ├── env_indirect_reciprocity.py
    │   ├── env_ipd.py
    │   ├── env_oasis.py
    │   ├── env_prisoners_buddy.py
    │   ├── test_env_indirect_reciprocity.py
    │   ├── test_env_ipd.py
    │   ├── test_env_oasis.py
    │   ├── test_env_prisoners_buddy.py
    │   ├── test_wrapper_rusp.py
    │   ├── wrappers_rusp.py
    │   └── wrappers_util.py
    └── setup.py
├── requirements_ma_policy.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | .pytest_cache/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | db.sqlite3
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # Environments
 87 | .env
 88 | .venv
 89 | env/
 90 | venv/
 91 | ENV/
 92 | env.bak/
 93 | venv.bak/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | **Status:** Archive (code is provided as-is, no updates expected)
 2 | 
 3 | # Multiagent emergence environments
 4 | Environment generation code for [Emergent Tool Use From Multi-Agent Autocurricula](https://arxiv.org/abs/1909.07528) ([blog](https://openai.com/blog/emergent-tool-use/))
 5 | 
 6 | ### Installation
 7 | This repository depends on the [mujoco-worldgen](https://github.com/openai/mujoco-worldgen) package. You will need to clone the mujoco-worldgen repository and install it and its dependencies:
 8 | ```
 9 | pip install -r mujoco-worldgen/requirements.txt
10 | pip install -e mujoco-worldgen/
11 | pip install -e multi-agent-emergence-environments/
12 | ```
13 | 
14 | This repository has been tested only on Mac OS X and Ubuntu 16.04 with Python 3.6
15 | 
16 | ### Use
17 | 
18 | Environment construction works in the following way: You start from the `Base` environment (defined in `mae_envs/envs/base.py`) and then you add environment modules (e.g. `Boxes`, `Ramps`, `RandomWalls`, etc.) and then wrappers on top. You can see examples in the `mae_envs/envs` folder.
19 | 
20 | If you want to construct a new environment, we highly recommend using the above paradigm in order to minimize code duplication. If you need new objects or game dynamics that don't already exist in this codebase, add them in via a new `EnvModule` class or a `gym.Wrapper` class rather than subclassing `Base` (or mujoco-worldgen's `Env` class). In general, `EnvModules` should be used for adding objects or sites to the environment, or otherwise modifying the mujoco simulator; wrappers should be used for everything else (e.g. adding rewards, additional observations, or implementing game mechanics like Lock and Grab).
21 | 
22 | The environments defined in this repository are: \
23 | *Hide and seek* - `mae_envs/envs/hide_and_seek.py` - The Hide and Seek environment described in the paper. This encompasses the *random rooms*, *quadrant* and *food* versions of the game (you can switch between them by changing the arguments given to the `make_env` function in the file) \
24 | *Box locking* - `mae_envs/envs/box_locking.py` - Encompasses the *Lock and Return* and *Sequential Lock* transfer tasks described in the paper. \
25 | *Blueprint Construction* - `mae_envs/envs/blueprint_construction.py` \
26 | *Shelter Construction* - `mae_envs/envs/shelter_construction.py`
27 | 
28 | You can test out environments by using the `bin/examine` script. Example usage: `bin/examine.py base`. \
29 | You can also use `bin/examine` to play a saved policy on an environment. There are several environment jsonnets and policies in the `examples` folder. Example usage:
30 | 
31 | ```bin/examine.py examples/hide_and_seek_quadrant.jsonnet examples/hide_and_seek_quadrant.npz``` 
32 | 
33 | Note that to be able to play saved policies, you will need to install a few additional packages. You can do this via
34 | 
35 | `pip install -r multi-agent-emergence-environments/requirements_ma_policy.txt`
36 | 


--------------------------------------------------------------------------------
/bin/examine.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import logging
 3 | import click
 4 | import numpy as np
 5 | from os.path import abspath, dirname, join
 6 | from gym.spaces import Tuple
 7 | 
 8 | from mae_envs.viewer.env_viewer import EnvViewer
 9 | from mae_envs.wrappers.multi_agent import JoinMultiAgentActions
10 | from mujoco_worldgen.util.envs import examine_env, load_env
11 | from mujoco_worldgen.util.types import extract_matching_arguments
12 | from mujoco_worldgen.util.parse_arguments import parse_arguments
13 | 
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | @click.command()
19 | @click.argument('argv', nargs=-1, required=False)
20 | def main(argv):
21 |     '''
22 |     examine.py is used to display environments and run policies.
23 | 
24 |     For an example environment jsonnet, see
25 |         mujoco-worldgen/examples/example_env_examine.jsonnet
26 |     You can find saved policies and the in the 'examples' together with the environment they were
27 |     trained in and the hyperparameters used. The naming used is 'examples/<env_name>.jsonnet' for
28 |     the environment jsonnet file and 'examples/<env_name>.npz' for the policy weights file.
29 |     Example uses:
30 |         bin/examine.py hide_and_seek
31 |         bin/examine.py mae_envs/envs/base.py
32 |         bin/examine.py base n_boxes=6 n_ramps=2 n_agents=3
33 |         bin/examine.py my_env_jsonnet.jsonnet
34 |         bin/examine.py my_env_jsonnet.jsonnet my_policy.npz
35 |         bin/examine.py hide_and_seek my_policy.npz n_hiders=3 n_seekers=2 n_boxes=8 n_ramps=1
36 |     '''
37 |     names, kwargs = parse_arguments(argv)
38 | 
39 |     env_name = names[0]
40 |     core_dir = abspath(join(dirname(__file__), '..'))
41 |     envs_dir = 'mae_envs/envs',
42 |     xmls_dir = 'xmls',
43 | 
44 |     if len(names) == 1:  # examine the environment
45 |         examine_env(env_name, kwargs,
46 |                     core_dir=core_dir, envs_dir=envs_dir, xmls_dir=xmls_dir,
47 |                     env_viewer=EnvViewer)
48 | 
49 |     if len(names) >= 2:  # run policies on the environment
50 |         # importing PolicyViewer and load_policy here because they depend on several
51 |         # packages which are only needed for playing policies, not for any of the
52 |         # environments code.
53 |         from mae_envs.viewer.policy_viewer import PolicyViewer
54 |         from ma_policy.load_policy import load_policy
55 |         policy_names = names[1:]
56 |         env, args_remaining_env = load_env(env_name, core_dir=core_dir,
57 |                                            envs_dir=envs_dir, xmls_dir=xmls_dir,
58 |                                            return_args_remaining=True, **kwargs)
59 | 
60 |         if isinstance(env.action_space, Tuple):
61 |             env = JoinMultiAgentActions(env)
62 |         if env is None:
63 |             raise Exception(f'Could not find environment based on pattern {env_name}')
64 | 
65 |         env.reset()  # generate action and observation spaces
66 |         assert np.all([name.endswith('.npz') for name in policy_names])
67 |         policies = [load_policy(name, env=env, scope=f'policy_{i}')
68 |                     for i, name in enumerate(policy_names)]
69 | 
70 | 
71 |         args_remaining_policy = args_remaining_env
72 | 
73 |         if env is not None and policies is not None:
74 |             args_to_pass, args_remaining_viewer = extract_matching_arguments(PolicyViewer, kwargs)
75 |             args_remaining = set(args_remaining_env)
76 |             args_remaining = args_remaining.intersection(set(args_remaining_policy))
77 |             args_remaining = args_remaining.intersection(set(args_remaining_viewer))
78 |             assert len(args_remaining) == 0, (
79 |                 f"There left unused arguments: {args_remaining}. There shouldn't be any.")
80 |             viewer = PolicyViewer(env, policies, **args_to_pass)
81 |             viewer.run()
82 | 
83 | 
84 |     print(main.__doc__)
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     logging.getLogger('').handlers = []
89 |     logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
90 | 
91 |     main()
92 | 


--------------------------------------------------------------------------------
/examples/blueprint.jsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |     make_env: {
 3 |         "function": "mae_envs.envs.blueprint_construction:make_env",
 4 |         args: {
 5 |             # Agent Actions
 6 |             grab_box: true,
 7 |             grab_out_of_vision: false,
 8 |             grab_selective: false,
 9 |             grab_exclusive: false,
10 | 
11 |             lock_box: true,
12 |             lock_type: "all_lock_team_specific",
13 |             lock_out_of_vision: false,
14 |             
15 | 
16 |             # Scenario
17 |             n_substeps: 15,
18 |             horizon: 150,
19 | 
20 |             # Objects
21 |             n_boxes: 8,
22 |             box_only_z_rot: true,
23 |             boxid_obs: false,
24 |             boxsize_obs: true,
25 | 
26 |             # Observations
27 |             n_lidar_per_agent: 30,
28 |             additional_obs: {
29 |                 hider: [[0]],
30 |                 prep_obs: [[0]],
31 |                 ramp_obs: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
32 |                 mask_ar_obs: [[0]],
33 |             },
34 | 
35 |             # Blueprint
36 |             n_sites: [1, 4],
37 |             site_placement: 'uniform_away_from_walls',
38 |             reward_infos: [
39 |                 {
40 |                     type: 'construction_dense',
41 |                     alpha: -1.5,
42 |                     use_corners: true,
43 |                     reward_scale: 0.05,
44 |                 },
45 |                 {
46 |                     type: 'construction_completed',
47 |                     site_activation_radius: 0.1,
48 |                     use_corners: true,
49 |                     reward_scale: 3,
50 |                 },
51 |             ],
52 |         },
53 |     },
54 | }


--------------------------------------------------------------------------------
/examples/blueprint.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/blueprint.npz


--------------------------------------------------------------------------------
/examples/hide_and_seek_full.jsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |     make_env: {
 3 |         "function": "mae_envs.envs.hide_and_seek:make_env",
 4 |         args: {
 5 |             # Agents
 6 |             n_hiders: 2,
 7 |             n_seekers: 2,
 8 |             # Agent Actions
 9 |             grab_box: true,
10 |             grab_out_of_vision: false,
11 |             grab_selective: false,
12 |             grab_exclusive: false,
13 | 
14 |             lock_box: true,
15 |             lock_type: "all_lock_team_specific",
16 |             lock_out_of_vision: false,
17 | 
18 |             # Scenario
19 |             n_substeps: 15,
20 |             horizon: 240,
21 |             scenario: "randomwalls",
22 |             n_rooms: 4,
23 |             random_room_number: true,
24 |             prob_outside_walls: 0.5,
25 |             prep_fraction: 0.4,
26 |             rew_type: "joint_zero_sum",
27 |             restrict_rect: [-6.0, -6.0, 12.0, 12.0],
28 | 
29 |             hiders_together_radius: 0.5,
30 |             seekers_together_radius: 0.5,
31 | 
32 |             # Objects
33 |             n_boxes: [3, 9],
34 |             n_elongated_boxes: [3, 9],
35 |             box_only_z_rot: true,
36 |             boxid_obs: false,
37 | 
38 |             n_ramps: 2,
39 | 
40 |             # Food
41 |             n_food: 0,
42 |             max_food_health: 40,
43 |             food_radius: 0.5,
44 |             food_box_centered: true,
45 |             food_together_radius: 0.25,
46 |             food_respawn_time: 5,
47 |             food_rew_type: "joint_mean",
48 | 
49 |             # Observations
50 |             n_lidar_per_agent: 30,
51 |             prep_obs: true,
52 |         },
53 |     },
54 | }
55 | 


--------------------------------------------------------------------------------
/examples/hide_and_seek_full.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/hide_and_seek_full.npz


--------------------------------------------------------------------------------
/examples/hide_and_seek_policy_phases/a_chasing.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/hide_and_seek_policy_phases/a_chasing.npz


--------------------------------------------------------------------------------
/examples/hide_and_seek_policy_phases/b_forts.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/hide_and_seek_policy_phases/b_forts.npz


--------------------------------------------------------------------------------
/examples/hide_and_seek_policy_phases/c_ramps.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/hide_and_seek_policy_phases/c_ramps.npz


--------------------------------------------------------------------------------
/examples/hide_and_seek_policy_phases/d_ramp_defense.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/hide_and_seek_policy_phases/d_ramp_defense.npz


--------------------------------------------------------------------------------
/examples/hide_and_seek_policy_phases/e_box_surfing.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/hide_and_seek_policy_phases/e_box_surfing.npz


--------------------------------------------------------------------------------
/examples/hide_and_seek_quadrant.jsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |     make_env: {
 3 |         "function": "mae_envs.envs.hide_and_seek:make_env",
 4 |         args: {
 5 |             # Agents
 6 |             n_hiders: 2,
 7 |             n_seekers: 2,
 8 |             # Agent Actions
 9 |             grab_box: true,
10 |             grab_out_of_vision: false,
11 |             grab_selective: false,
12 |             grab_exclusive: false,
13 | 
14 |             lock_box: true,
15 |             lock_type: "all_lock_team_specific",
16 |             lock_out_of_vision: false,
17 | 
18 |             # Scenario
19 |             n_substeps: 15,
20 |             horizon: 80,
21 |             scenario: 'quadrant',
22 |             prep_fraction: 0.4,
23 |             rew_type: "joint_zero_sum",
24 |             restrict_rect: [0.1, 0.1, 5.9, 5.9],
25 |             p_door_dropout: 0.5,
26 |             quadrant_game_hider_uniform_placement: true,
27 | 
28 |             # Objects
29 |             n_boxes: 2,
30 |             box_only_z_rot: true,
31 |             boxid_obs: false,
32 | 
33 |             n_ramps: 1,
34 |             lock_ramp: false,
35 |             penalize_objects_out: true,
36 | 
37 |             # Food
38 |             n_food: 0,
39 | 
40 |             # Observations
41 |             n_lidar_per_agent: 30,
42 |             prep_obs: true,
43 |         },
44 |     },
45 | }
46 | 


--------------------------------------------------------------------------------
/examples/hide_and_seek_quadrant.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/hide_and_seek_quadrant.npz


--------------------------------------------------------------------------------
/examples/hide_and_seek_quadrant_physics_exploits.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/hide_and_seek_quadrant_physics_exploits.npz


--------------------------------------------------------------------------------
/examples/lock_and_return.jsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |     make_env: {
 3 |         "function": "mae_envs.envs.box_locking:make_env",
 4 |         args: {
 5 |             # Agents
 6 |             n_agents: 1,
 7 |             # Agent Actions
 8 |             grab_box: true,
 9 |             grab_out_of_vision: false,
10 |             grab_selective: false,
11 |             grab_exclusive: false,
12 | 
13 |             lock_box: true,
14 |             lock_type: "all_lock_team_specific",
15 |             lock_out_of_vision: false,
16 | 
17 |             # Scenario
18 |             n_substeps: 15,
19 |             horizon: 120,
20 |             scenario: "randomwalls",
21 |             n_rooms: 6,
22 |             random_room_number: false,
23 | 
24 |             # Objects
25 |             box_only_z_rot: true,
26 |             boxid_obs: false,
27 |             boxsize_obs: true,
28 |             pad_ramp_size: true,
29 | 
30 |             # Observations
31 |             n_lidar_per_agent: 30,
32 |             additional_obs: {
33 |                 hider: [[1]],
34 |                 prep_obs: [[0]],
35 |                 ramp_obs: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
36 |                 ramp_obj_lock: [[0]],
37 |                 ramp_you_lock: [[[0]]],
38 |                 ramp_team_lock: [[[0]]],
39 |                 mask_ar_obs: [[0]],
40 |             },
41 | 
42 |             # Lock Box Task
43 |             n_boxes: 1,
44 |             task_type: 'all-return',
45 |             lock_reward: 5.0,
46 |             unlock_penalty: 5.0,
47 |             shaped_reward_scale: 0.5,
48 |         },
49 |     },
50 | }


--------------------------------------------------------------------------------
/examples/lock_and_return.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/lock_and_return.npz


--------------------------------------------------------------------------------
/examples/sequential_lock.jsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |     make_env: {
 3 |         "function": "mae_envs.envs.box_locking:make_env",
 4 |         args: {
 5 |             # Agents
 6 |             n_agents: 1,
 7 |             fixed_agent_spawn: false,
 8 | 
 9 |             # Agent Actions
10 |             grab_box: true,
11 |             grab_out_of_vision: false,
12 |             grab_selective: false,
13 |             grab_exclusive: false,
14 | 
15 |             lock_box: true,
16 |             lock_type: "all_lock_team_specific",
17 |             lock_out_of_vision: false,
18 | 
19 | 
20 |             # Scenario
21 |             n_substeps: 15,
22 |             n_ramps: 3,
23 |             horizon: 120,
24 |             scenario: "var_tri_uniform",
25 |             door_size: 0,
26 | 
27 |             # Objects
28 |             box_only_z_rot: true,
29 |             boxid_obs: false,
30 |             boxsize_obs: true,
31 |             pad_ramp_size: true,
32 | 
33 |             # Observations
34 |             n_lidar_per_agent: 30,
35 |             additional_obs: {
36 |                 hider: [[1]],
37 |                 prep_obs: [[0]],
38 |             },
39 | 
40 |             # Lock Box Task
41 |             n_boxes: 4,
42 |             task_type: 'order',
43 |             lock_reward: 5.0,
44 |             unlock_penalty: 5.0,
45 |             shaped_reward_scale: 0.5,
46 |         },
47 |     },
48 | }


--------------------------------------------------------------------------------
/examples/sequential_lock.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/sequential_lock.npz


--------------------------------------------------------------------------------
/examples/shelter.jsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |     make_env: {
 3 |         "function": "mae_envs.envs.shelter_construction:make_env",
 4 |         args: {
 5 |             # Agent Actions
 6 |             grab_box: true,
 7 |             grab_out_of_vision: false,
 8 |             grab_selective: false,
 9 |             grab_exclusive: false,
10 | 
11 |             lock_box: true,
12 |             lock_type: "all_lock_team_specific",
13 |             lock_out_of_vision: false,
14 | 
15 |             # Scenario
16 |             n_substeps: 15,
17 |             horizon: 240,
18 | 
19 |             # Objects
20 |             n_boxes: 8,
21 |             n_elongated_boxes: 3,
22 |             box_only_z_rot: true,
23 |             boxid_obs: false,
24 | 
25 |             # Observations
26 |             n_lidar_per_agent: 30,
27 |             additional_obs: {
28 |                 hider: [[0]],
29 |                 prep_obs: [[0]],
30 |                 ramp_obs: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
31 |                 mask_ar_obs: [[0]],
32 |             },
33 | 
34 |             # Shelter
35 |             shelter_reward_scale: 0.001,
36 |             objective_diameter: [1.5, 2],
37 |             objective_placement: 'uniform_away_from_walls',
38 |         },
39 |     },
40 | }


--------------------------------------------------------------------------------
/examples/shelter.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/examples/shelter.npz


--------------------------------------------------------------------------------
/examples/test_all_policies.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import unittest
 3 | import pytest
 4 | import os
 5 | 
 6 | EXAMPLES_DIR = os.path.dirname(os.path.abspath(__file__))
 7 | EXAMINE_FILE_PATH = os.path.join(EXAMPLES_DIR, "../bin/examine.py")
 8 | 
 9 | class ExamineTest(unittest.TestCase):
10 |     def test_examine_env(self):
11 |         envs = [
12 |             "hide_and_seek_full.jsonnet",
13 |             "hide_and_seek_quadrant.jsonnet",
14 |             "blueprint.jsonnet",
15 |             "lock_and_return.jsonnet",
16 |             "sequential_lock.jsonnet",
17 |             "shelter.jsonnet",
18 |         ]
19 |         for env in envs:
20 |             with self.assertRaises(subprocess.TimeoutExpired):
21 |                 subprocess.check_call(
22 |                     ["/usr/bin/env", "python", EXAMINE_FILE_PATH, os.path.join(EXAMPLES_DIR, env)],
23 |                     timeout=10)
24 | 
25 | 
26 |     def test_examine_policies(self):
27 |         envs_policies = [
28 |             ("hide_and_seek_full.jsonnet", "hide_and_seek_full.npz"),
29 |             ("hide_and_seek_quadrant.jsonnet", "hide_and_seek_quadrant.npz"),
30 |             ("blueprint.jsonnet", "blueprint.npz"),
31 |             ("lock_and_return.jsonnet", "lock_and_return.npz"),
32 |             ("sequential_lock.jsonnet", "sequential_lock.npz"),
33 |             ("shelter.jsonnet", "shelter.npz"),
34 |         ]
35 |         for env, policy in envs_policies:
36 |             with self.assertRaises(subprocess.TimeoutExpired):
37 |                 subprocess.check_call(
38 |                     ["/usr/bin/env", "python", EXAMINE_FILE_PATH, os.path.join(EXAMPLES_DIR, env), os.path.join(EXAMPLES_DIR, policy)],
39 |                     timeout=15)
40 | 


--------------------------------------------------------------------------------
/ma_policy/layers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from ma_policy.util import shape_list
  4 | 
  5 | 
  6 | #################
  7 | # Pooling #######
  8 | #################
  9 | 
 10 | def entity_avg_pooling_masked(x, mask):
 11 |     '''
 12 |         Masks and pools x along the second to last dimension. Arguments have dimensions:
 13 |             x:    batch x time x n_entities x n_features
 14 |             mask: batch x time x n_entities
 15 |     '''
 16 |     mask = tf.expand_dims(mask, -1)
 17 |     masked = x * mask
 18 |     summed = tf.reduce_sum(masked, -2)
 19 |     denom = tf.reduce_sum(mask, -2) + 1e-5
 20 |     return summed / denom
 21 | 
 22 | 
 23 | def entity_max_pooling_masked(x, mask):
 24 |     '''
 25 |         Masks and pools x along the second to last dimension. Arguments have dimensions:
 26 |             x:    batch x time x n_entities x n_features
 27 |             mask: batch x time x n_entities
 28 |     '''
 29 |     mask = tf.expand_dims(mask, -1)
 30 |     has_unmasked_entities = tf.sign(tf.reduce_sum(mask, axis=-2, keepdims=True))
 31 |     offset = (mask - 1) * 1e9
 32 |     masked = (x + offset) * has_unmasked_entities
 33 |     return tf.reduce_max(masked, -2)
 34 | 
 35 | 
 36 | #################
 37 | # Contat Ops ####
 38 | #################
 39 | 
 40 | def entity_concat(inps):
 41 |     '''
 42 |         Concat 4D tensors along the third dimension. If a 3D tensor is in the list
 43 |             then treat it as a single entity and expand the third dimension
 44 |         Args:
 45 |             inps (list of tensors): tensors to concatenate
 46 |     '''
 47 |     with tf.variable_scope('concat_entities'):
 48 |         shapes = [shape_list(_x) for _x in inps]
 49 |         # For inputs that don't have entity dimension add one.
 50 |         inps = [_x if len(_shape) == 4 else tf.expand_dims(_x, 2) for _x, _shape in zip(inps, shapes)]
 51 |         shapes = [shape_list(_x) for _x in inps]
 52 |         assert np.all([_shape[-1] == shapes[0][-1] for _shape in shapes]),\
 53 |             f"Some entities don't have the same outer or inner dimensions {shapes}"
 54 |         # Concatenate along entity dimension
 55 |         out = tf.concat(inps, -2)
 56 |     return out
 57 | 
 58 | 
 59 | def concat_entity_masks(inps, masks):
 60 |     '''
 61 |         Concats masks together. If mask is None, then it creates
 62 |             a tensor of 1's with shape (BS, T, NE).
 63 |         Args:
 64 |             inps (list of tensors): tensors that masks apply to
 65 |             masks (list of tensors): corresponding masks
 66 |     '''
 67 |     assert len(inps) == len(masks), "There should be the same number of inputs as masks"
 68 |     with tf.variable_scope('concat_masks'):
 69 |         shapes = [shape_list(_x) for _x in inps]
 70 |         new_masks = []
 71 |         for inp, mask in zip(inps, masks):
 72 |             if mask is None:
 73 |                 inp_shape = shape_list(inp)
 74 |                 if len(inp_shape) == 4:  # this is an entity tensor
 75 |                     new_masks.append(tf.ones(inp_shape[:3]))
 76 |                 elif len(inp_shape) == 3:  # this is a pooled or main tensor. Set NE (outer dimension) to 1
 77 |                     new_masks.append(tf.ones(inp_shape[:2] + [1]))
 78 |             else:
 79 |                 new_masks.append(mask)
 80 |         new_mask = tf.concat(new_masks, -1)
 81 |     return new_mask
 82 | 
 83 | 
 84 | #################
 85 | # Transformer ###
 86 | #################
 87 | 
 88 | 
 89 | def residual_sa_block(inp, mask, heads, n_embd,
 90 |                       layer_norm=False, post_sa_layer_norm=False,
 91 |                       n_mlp=1, qk_w=0.125, v_w=0.125, post_w=0.125,
 92 |                       mlp_w1=0.125, mlp_w2=0.125,
 93 |                       scope="residual_sa_block", reuse=False):
 94 |     '''
 95 |         Residual self attention block for entities.
 96 |         Notation:
 97 |             T  - Time
 98 |             NE - Number entities
 99 |         Args:
100 |             inp (tf): (BS, T, NE, f)
101 |             mask (tf): (BS, T, NE)
102 |             heads (int) -- number of attention heads
103 |             n_embd (int) -- dimension of queries, keys, and values will be n_embd / heads
104 |             layer_norm (bool) -- normalize embedding prior to computing qkv
105 |             n_mlp (int) -- number of mlp layers. If there are more than 1 mlp layers, we'll add a residual
106 |                 connection from after the first mlp to after the last mlp.
107 |             qk_w, v_w, post_w, mlp_w1, mlp_w2 (float) -- scale for gaussian init for keys/queries, values, mlp
108 |                 post self attention, second mlp, and third mlp, respectively. Std will be sqrt(scale/n_embd)
109 |             scope (string) -- tf scope
110 |             reuse (bool) -- tf reuse
111 |     '''
112 |     with tf.variable_scope(scope, reuse=reuse):
113 |         a = self_attention(inp, mask, heads, n_embd, layer_norm=layer_norm, qk_w=qk_w, v_w=v_w,
114 |                            scope='self_attention', reuse=reuse)
115 |         post_scale = np.sqrt(post_w / n_embd)
116 |         post_a_mlp = tf.layers.dense(a,
117 |                                      n_embd,
118 |                                      kernel_initializer=tf.random_normal_initializer(stddev=post_scale),
119 |                                      name="mlp1")
120 |         x = inp + post_a_mlp
121 |         if post_sa_layer_norm:
122 |             with tf.variable_scope('post_a_layernorm'):
123 |                 x = tf.contrib.layers.layer_norm(x, begin_norm_axis=3)
124 |         if n_mlp > 1:
125 |             mlp = x
126 |             mlp2_scale = np.sqrt(mlp_w1 / n_embd)
127 |             mlp = tf.layers.dense(mlp,
128 |                                   n_embd,
129 |                                   kernel_initializer=tf.random_normal_initializer(stddev=mlp2_scale),
130 |                                   name="mlp2")
131 |         if n_mlp > 2:
132 |             mlp3_scale = np.sqrt(mlp_w2 / n_embd)
133 |             mlp = tf.layers.dense(mlp,
134 |                                   n_embd,
135 |                                   kernel_initializer=tf.random_normal_initializer(stddev=mlp3_scale),
136 |                                   name="mlp3")
137 |         if n_mlp > 1:
138 |             x = x + mlp
139 |         return x
140 | 
141 | 
142 | def self_attention(inp, mask, heads, n_embd, layer_norm=False, qk_w=1.0, v_w=0.01,
143 |                    scope='', reuse=False):
144 |     '''
145 |         Self attention over entities.
146 |         Notation:
147 |             T  - Time
148 |             NE - Number entities
149 |         Args:
150 |             inp (tf) -- tensor w/ shape (bs, T, NE, features)
151 |             mask (tf) -- binary tensor with shape (bs, T, NE). For each batch x time,
152 |                             nner matrix represents entity i's ability to see entity j
153 |             heads (int) -- number of attention heads
154 |             n_embd (int) -- dimension of queries, keys, and values will be n_embd / heads
155 |             layer_norm (bool) -- normalize embedding prior to computing qkv
156 |             qk_w, v_w (float) -- scale for gaussian init for keys/queries and values
157 |                 Std will be sqrt(scale/n_embd)
158 |             scope (string) -- tf scope
159 |             reuse (bool) -- tf reuse
160 |     '''
161 |     with tf.variable_scope(scope, reuse=reuse):
162 |         bs, T, NE, features = shape_list(inp)
163 |         # Put mask in format correct for logit matrix
164 |         entity_mask = None
165 |         if mask is not None:
166 |             with tf.variable_scope('expand_mask'):
167 |                 assert np.all(np.array(mask.get_shape().as_list()) == np.array(inp.get_shape().as_list()[:3])),\
168 |                     f"Mask and input should have the same first 3 dimensions. {shape_list(mask)} -- {shape_list(inp)}"
169 |                 entity_mask = mask
170 |                 mask = tf.expand_dims(mask, -2)  # (BS, T, 1, NE)
171 | 
172 |         query, key, value = qkv_embed(inp, heads, n_embd, layer_norm=layer_norm, qk_w=qk_w, v_w=v_w, reuse=reuse)
173 |         logits = tf.matmul(query, key, name="matmul_qk_parallel")  # (bs, T, heads, NE, NE)
174 |         logits /= np.sqrt(n_embd / heads)
175 |         softmax = stable_masked_softmax(logits, mask)
176 |         att_sum = tf.matmul(softmax, value, name="matmul_softmax_value")  # (bs, T, heads, NE, features)
177 |         with tf.variable_scope('flatten_heads'):
178 |             out = tf.transpose(att_sum, (0, 1, 3, 2, 4))  # (bs, T, n_output_entities, heads, features)
179 |             n_output_entities = shape_list(out)[2]
180 |             out = tf.reshape(out, (bs, T, n_output_entities, n_embd))  # (bs, T, n_output_entities, n_embd)
181 | 
182 |         return out
183 | 
184 | 
185 | def stable_masked_softmax(logits, mask):
186 |     '''
187 |         Args:
188 |             logits (tf): tensor with shape (bs, T, heads, NE, NE)
189 |             mask (tf): tensor with shape(bs, T, 1, NE)
190 |     '''
191 |     with tf.variable_scope('stable_softmax'):
192 |         #  Subtract a big number from the masked logits so they don't interfere with computing the max value
193 |         if mask is not None:
194 |             mask = tf.expand_dims(mask, 2)
195 |             logits -= (1.0 - mask) * 1e10
196 | 
197 |         #  Subtract the max logit from everything so we don't overflow
198 |         logits -= tf.reduce_max(logits, axis=-1, keepdims=True)
199 |         unnormalized_p = tf.exp(logits)
200 | 
201 |         #  Mask the unnormalized probibilities and then normalize and remask
202 |         if mask is not None:
203 |             unnormalized_p *= mask
204 |         normalized_p = unnormalized_p / (tf.reduce_sum(unnormalized_p, axis=-1, keepdims=True) + 1e-10)
205 |         if mask is not None:
206 |             normalized_p *= mask
207 |     return normalized_p
208 | 
209 | 
210 | def qkv_embed(inp, heads, n_embd, layer_norm=False, qk_w=1.0, v_w=0.01, reuse=False):
211 |     '''
212 |         Compute queries, keys, and values
213 |         Args:
214 |             inp (tf) -- tensor w/ shape (bs, T, NE, features)
215 |             heads (int) -- number of attention heads
216 |             n_embd (int) -- dimension of queries, keys, and values will be n_embd / heads
217 |             layer_norm (bool) -- normalize embedding prior to computing qkv
218 |             qk_w (float) -- Initialization scale for keys and queries. Actual scale will be
219 |                 sqrt(qk_w / #input features)
220 |             v_w (float) -- Initialization scale for values. Actual scale will be sqrt(v_w / #input features)
221 |             reuse (bool) -- tf reuse
222 |     '''
223 |     with tf.variable_scope('qkv_embed'):
224 |         bs, T, NE, features = shape_list(inp)
225 |         if layer_norm:
226 |             with tf.variable_scope('pre_sa_layer_norm'):
227 |                 inp = tf.contrib.layers.layer_norm(inp, begin_norm_axis=3)
228 | 
229 |         # qk shape (bs x T x NE x h x n_embd/h)
230 |         qk_scale = np.sqrt(qk_w / features)
231 |         qk = tf.layers.dense(inp,
232 |                              n_embd * 2,
233 |                              kernel_initializer=tf.random_normal_initializer(stddev=qk_scale),
234 |                              reuse=reuse,
235 |                              name="qk_embed")  # bs x T x n_embd*2
236 |         qk = tf.reshape(qk, (bs, T, NE, heads, n_embd // heads, 2))
237 | 
238 |         # (bs, T, NE, heads, features)
239 |         query, key = [tf.squeeze(x, -1) for x in tf.split(qk, 2, -1)]
240 | 
241 |         v_scale = np.sqrt(v_w / features)
242 |         value = tf.layers.dense(inp,
243 |                                 n_embd,
244 |                                 kernel_initializer=tf.random_normal_initializer(stddev=v_scale),
245 |                                 reuse=reuse,
246 |                                 name="v_embed")  # bs x T x n_embd
247 |         value = tf.reshape(value, (bs, T, NE, heads, n_embd // heads))
248 | 
249 |         query = tf.transpose(query, (0, 1, 3, 2, 4),
250 |                              name="transpose_query")  # (bs, T, heads, NE, n_embd / heads)
251 |         key = tf.transpose(key, (0, 1, 3, 4, 2),
252 |                            name="transpose_key")  # (bs, T, heads, n_embd / heads, NE)
253 |         value = tf.transpose(value, (0, 1, 3, 2, 4),
254 |                              name="transpose_value")  # (bs, T, heads, NE, n_embd / heads)
255 | 
256 |     return query, key, value
257 | 
258 | 
259 | ##################
260 | # 1D Convolution #
261 | ##################
262 | 
263 | def circ_conv1d(inp, **conv_kwargs):
264 |     valid_activations = {'relu': tf.nn.relu, 'tanh': tf.tanh, '': None}
265 |     assert 'kernel_size' in conv_kwargs, f"Kernel size needs to be specified for circular convolution layer."
266 |     conv_kwargs['activation'] = valid_activations[conv_kwargs['activation']]
267 | 
268 |     # concatenate input for circular convolution
269 |     kernel_size = conv_kwargs['kernel_size']
270 |     num_pad = kernel_size // 2
271 |     inp_shape = shape_list(inp)
272 |     inp_rs = tf.reshape(inp, shape=[inp_shape[0] * inp_shape[1]] + inp_shape[2:]) #  (BS * T, NE, feats)
273 |     inp_padded = tf.concat([inp_rs[..., -num_pad:, :], inp_rs, inp_rs[..., :num_pad, :]], -2)
274 |     out = tf.layers.conv1d(inp_padded,
275 |                            kernel_initializer=tf.contrib.layers.xavier_initializer(),
276 |                            padding='valid',
277 |                            **conv_kwargs)
278 | 
279 |     out = tf.reshape(out, shape=inp_shape[:3] + [conv_kwargs['filters']])
280 |     return out
281 | 
282 | ##################
283 | # Misc ###########
284 | ##################
285 | 
286 | 
287 | def layernorm(x, scope, epsilon=1e-5, reuse=False):
288 |     '''
289 |         normalize state vector to be zero mean / unit variance + learned scale/shift
290 |     '''
291 |     with tf.variable_scope(scope, reuse=reuse):
292 |         n_state = x.get_shape()[-1]
293 |         gain = tf.get_variable('gain', [n_state], initializer=tf.constant_initializer(1))
294 |         bias = tf.get_variable('bias', [n_state], initializer=tf.constant_initializer(0))
295 |         mean = tf.reduce_mean(x, axis=[-1], keep_dims=True)
296 |         variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keep_dims=True)
297 |         norm_x = (x - mean) * tf.rsqrt(variance + epsilon)
298 |         return norm_x * gain + bias
299 | 


--------------------------------------------------------------------------------
/ma_policy/load_policy.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import logging
 5 | import sys
 6 | import traceback
 7 | import cloudpickle as pickle
 8 | 
 9 | from ma_policy.ma_policy import MAPolicy
10 | 
11 | 
12 | def shape_list(x):
13 |     '''
14 |         deal with dynamic shape in tensorflow cleanly
15 |     '''
16 |     ps = x.get_shape().as_list()
17 |     ts = tf.shape(x)
18 |     return [ts[i] if ps[i] is None else ps[i] for i in range(len(ps))]
19 | 
20 | 
21 | def replace_base_scope(var_name, new_base_scope):
22 |     split = var_name.split('/')
23 |     split[0] = new_base_scope
24 |     return os.path.normpath('/'.join(split))
25 | 
26 | 
27 | def load_variables(policy, weights):
28 |     weights = {os.path.normpath(key): value for key, value in weights.items()}
29 |     weights = {replace_base_scope(key, policy.scope): value for key, value in weights.items()}
30 |     assign_ops = []
31 |     for var in policy.get_variables():
32 |         var_name = os.path.normpath(var.name)
33 |         if var_name not in weights:
34 |             logging.warning(f"{var_name} was not found in weights dict. This will be reinitialized.")
35 |             tf.get_default_session().run(var.initializer)
36 |         else:
37 |             try:
38 |                 assert np.all(np.array(shape_list(var)) == np.array(weights[var_name].shape))
39 |                 assign_ops.append(var.assign(weights[var_name]))
40 |             except Exception:
41 |                 traceback.print_exc(file=sys.stdout)
42 |                 print(f"Error assigning weights of shape {weights[var_name].shape} to {var}")
43 |                 sys.exit()
44 |     tf.get_default_session().run(assign_ops)
45 | 
46 | 
47 | def load_policy(path, env=None, scope='policy'):
48 |     '''
49 |         Load a policy.
50 |         Args:
51 |             path (string): policy path
52 |             env (Gym.Env): This will update the observation space of the
53 |                 policy that is returned
54 |             scope (string): The base scope for the policy variables
55 |     '''
56 |     # TODO this will probably need to be changed when trying to run policy on GPU
57 |     if tf.get_default_session() is None:
58 |         tf_config = tf.ConfigProto(
59 |             inter_op_parallelism_threads=1,
60 |             intra_op_parallelism_threads=1)
61 |         sess = tf.Session(config=tf_config)
62 |         sess.__enter__()
63 | 
64 |     policy_dict = dict(np.load(path))
65 |     policy_fn_and_args_raw = pickle.loads(policy_dict['policy_fn_and_args'])
66 |     policy_args = policy_fn_and_args_raw['args']
67 |     policy_args['scope'] = scope
68 | 
69 |     if env is not None:
70 |         policy_args['ob_space'] = env.observation_space
71 |         policy_args['ac_space'] = env.action_space
72 | 
73 |     policy = MAPolicy(**policy_args)
74 |     del policy_dict['policy_fn_and_args']
75 | 
76 |     load_variables(policy, policy_dict)
77 |     return policy
78 | 


--------------------------------------------------------------------------------
/ma_policy/normalizers.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def _mean_std_update_size(x, axes):
 5 |     x_shape = tf.shape(x)
 6 |     x_dims_to_reduce = tf.gather(x_shape, axes)
 7 |     size = tf.reduce_prod(x_dims_to_reduce)
 8 |     return size
 9 | 
10 | 
11 | def _interpolate(old, new, old_weight, scaled_weight):
12 |     return old * old_weight + new * scaled_weight
13 | 
14 | 
15 | def _std_from_mean_and_square(mean, square):
16 |     var_est = tf.to_float(square) - tf.square(mean)
17 |     return tf.sqrt(tf.maximum(var_est, 1e-2))
18 | 
19 | 
20 | class EMAMeanStd(object):
21 |     """
22 |     Calculates an Exponential Moving Average for each argument with
23 |     exponential coefficient `beta`. The forward relation is:
24 |         mean = beta * old_mean + (1.0 - beta) * observation
25 |     The algorithm removes the bias introduced from setting ema[-1] = 0.0
26 | 
27 |     Note: `beta` parameter is defined with respect to a single observation within a batch
28 |     if `per_element_update=True` (if a batch has 1000 elements of an observation, this is
29 |     considered to be a 1000 updates), else it is considered to be the size of an update for a full
30 |     batch (1 update if `per_element_update=False`).
31 |     """
32 | 
33 |     def __init__(self, beta, scope="ema", reuse=None, epsilon=1e-6, per_element_update=False, shape=(), version=1):
34 |         self._version = version
35 |         self._per_element_update = per_element_update
36 |         with tf.variable_scope(scope, reuse=reuse):
37 |             # Expected value of x
38 |             self._biased_mean = tf.get_variable(
39 |                 dtype=tf.float32,
40 |                 shape=shape,
41 |                 initializer=tf.constant_initializer(0.0),
42 |                 name="mean",
43 |                 trainable=False)
44 |             # Expected value of x^2
45 |             self._biased_sq = tf.get_variable(
46 |                 dtype=tf.float32,
47 |                 shape=shape,
48 |                 initializer=tf.constant_initializer(0.0),
49 |                 name="sq",
50 |                 trainable=False)
51 |             # How to integrate observations of x over time
52 |             self._one_minus_beta = 1.0 - beta
53 |             # Weight placed on ema[-1] == 0.0 which we divide out to debias
54 |             self._debiasing_term = tf.get_variable(
55 |                 dtype=tf.float32,
56 |                 shape=shape,
57 |                 initializer=tf.constant_initializer(0.0),
58 |                 name="debiasing_term",
59 |                 trainable=False)
60 |             self.shape = shape
61 | 
62 |             # the stored mean and square are biased due to setting ema[-1] = 0.0,
63 |             # we correct for this by dividing by the debiasing term:
64 |             self.mean = self._biased_mean / tf.maximum(self._debiasing_term, epsilon)
65 |             self.std = _std_from_mean_and_square(mean=self.mean, square=self._biased_sq / tf.maximum(self._debiasing_term, epsilon))
66 | 
67 |     def update_op(self, x, axes=(0,)):
68 |         scaled_weight = tf.cast(self._one_minus_beta, tf.float64)
69 |         if self._per_element_update:
70 |             # many updates were done at once in a batch, so we figure out what power
71 |             # to raise `1-beta` to.
72 |             # using the fact that for small 1.0 - beta we have:
73 |             # 1 - beta^N ~= (1.0 - beta) * N
74 |             size = _mean_std_update_size(x, axes)
75 |             scaled_weight *= tf.cast(size, tf.float64)
76 |         one = tf.constant(1.0, dtype=tf.float64)
77 |         old_weight = one - scaled_weight
78 |         old_weight_fp32 = tf.to_float(old_weight)
79 |         scaled_weight_fp32 = tf.to_float(scaled_weight)
80 |         return tf.group(
81 |             # increment the running debiasing term by the contribution of the initial ema[-1] == 0.0 observation
82 |             # (e.g. boost the observed value by how much it was initially discounted on step 1)
83 |             tf.assign(self._debiasing_term, tf.to_float(_interpolate(old=tf.cast(self._debiasing_term, tf.float64), new=one, old_weight=old_weight, scaled_weight=scaled_weight))),
84 |             # do an interpolation on the expected value of X
85 |             tf.assign(self._biased_mean, _interpolate(old=self._biased_mean, new=tf.reduce_mean(tf.to_float(x), axis=axes), old_weight=old_weight_fp32, scaled_weight=scaled_weight_fp32)),
86 |             # do an interpolation on the expected value of X^2
87 |             tf.assign(self._biased_sq, _interpolate(old=self._biased_sq, new=tf.reduce_mean(tf.square(tf.to_float(x)), axis=axes), old_weight=old_weight_fp32, scaled_weight=scaled_weight_fp32)),
88 |         )
89 | 


--------------------------------------------------------------------------------
/ma_policy/util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | def normc_initializer(std=1.0, axis=0):
 6 |     def _initializer(shape, dtype=None, partition_info=None):  # pylint: disable=W0613
 7 |         out = np.random.randn(*shape).astype(np.float32)
 8 |         out *= std / np.sqrt(np.square(out).sum(axis=axis, keepdims=True))
 9 |         return tf.constant(out)
10 |     return _initializer
11 | 
12 | 
13 | def listdict2dictnp(l, keepdims=False):
14 |     '''
15 |         Convert a list of dicts of numpy arrays to a dict of numpy arrays.
16 |         If keepdims is False the new outer dimension in each dict element will be
17 |             the length of the list
18 |         If keepdims is True, then the new outdimension in each dict will be the sum of the
19 |             outer dimensions of each item in the list
20 |     '''
21 |     if keepdims:
22 |         return {k: np.concatenate([d[k] for d in l]) for k in l[0]}
23 |     else:
24 |         return {k: np.array([d[k] for d in l]) for k in l[0]}
25 | 
26 | 
27 | def shape_list(x):
28 |     '''
29 |         deal with dynamic shape in tensorflow cleanly
30 |     '''
31 |     ps = x.get_shape().as_list()
32 |     ts = tf.shape(x)
33 |     return [ts[i] if ps[i] is None else ps[i] for i in range(len(ps))]
34 | 
35 | 
36 | def l2_loss(pred, label, std, mask):
37 |     '''
38 |         Masked L2 loss with a scaling paramter (std). We made the choice that
39 |             the loss would scale with the number of unmasked data points rather
40 |             than have the same magnitude regardless of how many samples came in.
41 |             TODO: Revisit whether this is the right choice.
42 |     '''
43 |     if mask is None:
44 |         return 0.5 * tf.reduce_mean(tf.square((pred - label) / std))
45 |     else:
46 |         return 0.5 * tf.reduce_mean(mask * tf.square((pred - label) / std))
47 | 


--------------------------------------------------------------------------------
/ma_policy/variable_schema.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | BATCH = "batch"
 5 | TIMESTEPS = "timesteps"
 6 | 
 7 | 
 8 | class VariableSchema(object):
 9 |     def __init__(self, shape, dtype):
10 |         """Creates a schema for a variable used in policy.
11 |         Allows for symbolic definition of shape. Shape can consist of integers, as well as
12 |         strings BATCH and TIMESTEPS. This is taken advantage of in the optimizers, to
13 |         create placeholders or variables that asynchronously prefetch the inputs.
14 | 
15 |         Parameters
16 |         ----------
17 |         shape: [int, np.int64, np.int32, or str]
18 |             shape of the variable, e.g. [12, 4], [BATCH, 12], [BATCH, 'timestep']
19 |         dtype:
20 |             tensorflow type of the variable, e.g. tf.float32, tf.int32
21 |         """
22 |         assert all(isinstance(s, (int, np.int64, np.int32)) or s in [BATCH, TIMESTEPS] for s in shape), 'Bad shape %s' % shape
23 |         self.shape = shape
24 |         self.dtype = tf.as_dtype(dtype)
25 | 
26 |     def _substituted_shape(self, batch=None, timesteps=None):
27 |         feeds = dict(batch=batch, timesteps=timesteps)
28 |         return [feeds.get(v, v) for v in self.shape]
29 | 
30 |     def substitute(self, *, batch=BATCH, timesteps=TIMESTEPS):
31 |         """Make a new VariableSchema with batch or timesteps optionally filled in."""
32 |         # Coerse None to default value.
33 |         batch = batch or BATCH
34 |         timesteps = timesteps or TIMESTEPS
35 |         shape = self._substituted_shape(batch, timesteps)
36 |         return VariableSchema(shape=shape, dtype=self.dtype)
37 | 
38 |     def placeholder(self, *, batch=None, timesteps=None, name=None):
39 |         real_shape = self._substituted_shape(batch, timesteps)
40 |         return tf.placeholder(self.dtype, real_shape, name=name)
41 | 
42 |     def variable(self, *, name, batch=None, timesteps=None, **kwargs):
43 |         real_shape = self._substituted_shape(batch, timesteps)
44 |         assert None not in real_shape
45 |         return tf.get_variable(name, real_shape, self.dtype, **kwargs)
46 | 
47 |     def np_zeros(self, *, batch=None, timesteps=None, **kwargs):
48 |         real_shape = self._substituted_shape(batch, timesteps)
49 |         np_dtype = self.dtype.as_numpy_dtype
50 |         return np.zeros(shape=real_shape, dtype=np_dtype, **kwargs)
51 | 
52 |     def match_shape(self, shape, *, batch=None, timesteps=None):
53 |         expected = self._substituted_shape(batch, timesteps)
54 |         if len(expected) != len(shape):
55 |             return False
56 |         for expected, actual in zip(expected, shape):
57 |             if expected is not None and expected != actual:
58 |                 return False
59 |         return True
60 | 


--------------------------------------------------------------------------------
/mae_envs/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/mae_envs/envs/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/mae_envs/envs/base.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import logging
  3 | from mujoco_worldgen import Floor, WorldBuilder, WorldParams, Env
  4 | from mae_envs.wrappers.multi_agent import (SplitMultiAgentActions, SplitObservations,
  5 |                                            SelectKeysWrapper)
  6 | from mae_envs.wrappers.util import DiscretizeActionWrapper, DiscardMujocoExceptionEpisodes
  7 | from mae_envs.wrappers.line_of_sight import AgentAgentObsMask2D
  8 | from mae_envs.modules.agents import Agents
  9 | from mae_envs.modules.walls import RandomWalls
 10 | from mae_envs.modules.objects import Boxes, Ramps
 11 | 
 12 | 
 13 | class Base(Env):
 14 |     '''
 15 |         Multi-agent Base Environment.
 16 |         Args:
 17 |             horizon (int): Number of steps agent gets to act
 18 |             n_substeps (int): Number of internal mujoco steps per outer environment step;
 19 |                 essentially this is action repeat.
 20 |             n_agents (int): number of agents in the environment
 21 |             floor_size (float or (float, float)): size of the floor. If a list of 2 floats, the floorsize
 22 |                 will be randomized between them on each episode
 23 |             grid_size (int): size of the grid that we'll use to place objects on the floor
 24 |             action_lims (float tuple): lower and upper limit of mujoco actions
 25 |             deterministic_mode (bool): if True, seeds are incremented rather than randomly sampled.
 26 |     '''
 27 |     def __init__(self, horizon=250, n_substeps=5, n_agents=2,
 28 |                  floor_size=6., grid_size=30,
 29 |                  action_lims=(-1.0, 1.0), deterministic_mode=False,
 30 |                  **kwargs):
 31 |         super().__init__(get_sim=self._get_sim,
 32 |                          get_obs=self._get_obs,
 33 |                          action_space=tuple(action_lims),
 34 |                          horizon=horizon,
 35 |                          deterministic_mode=deterministic_mode)
 36 |         self.n_agents = n_agents
 37 |         self.metadata = {}
 38 |         self.metadata['n_actors'] = n_agents
 39 |         self.horizon = horizon
 40 |         self.n_substeps = n_substeps
 41 |         if not isinstance(floor_size, (tuple, list, np.ndarray)):
 42 |             self.floor_size_dist = [floor_size, floor_size]
 43 |         else:
 44 |             self.floor_size_dist = floor_size
 45 |         self.grid_size = grid_size
 46 |         self.kwargs = kwargs
 47 |         self.placement_grid = np.zeros((grid_size, grid_size))
 48 |         self.modules = []
 49 | 
 50 |     def add_module(self, module):
 51 |         self.modules.append(module)
 52 | 
 53 |     def _get_obs(self, sim):
 54 |         '''
 55 |             Loops through modules, calls their observation_step functions, and
 56 |                 adds the result to the observation dictionary.
 57 |         '''
 58 |         obs = {}
 59 |         for module in self.modules:
 60 |             obs.update(module.observation_step(self, self.sim))
 61 |         return obs
 62 | 
 63 |     def _get_sim(self, seed):
 64 |         '''
 65 |             Calls build_world_step and then modify_sim_step for each module. If
 66 |             a build_world_step failed, then restarts.
 67 |         '''
 68 |         self.floor_size = np.random.uniform(self.floor_size_dist[0], self.floor_size_dist[1])
 69 |         self.metadata['floor_size'] = self.floor_size
 70 |         world_params = WorldParams(size=(self.floor_size, self.floor_size, 2.5),
 71 |                                    num_substeps=self.n_substeps)
 72 |         successful_placement = False
 73 |         failures = 0
 74 |         while not successful_placement:
 75 |             if (failures + 1) % 10 == 0:
 76 |                 logging.warning(f"Failed {failures} times in creating environment")
 77 |             builder = WorldBuilder(world_params, seed)
 78 |             floor = Floor()
 79 | 
 80 |             builder.append(floor)
 81 | 
 82 |             self.placement_grid = np.zeros((self.grid_size, self.grid_size))
 83 | 
 84 |             successful_placement = np.all([module.build_world_step(self, floor, self.floor_size)
 85 |                                            for module in self.modules])
 86 |             failures += 1
 87 | 
 88 |         sim = builder.get_sim()
 89 | 
 90 |         for module in self.modules:
 91 |             module.modify_sim_step(self, sim)
 92 | 
 93 |         return sim
 94 | 
 95 | 
 96 | def make_env(n_substeps=5, horizon=250, deterministic_mode=False, n_agents=2,
 97 |              n_boxes=2, n_ramps=1):
 98 |     '''
 99 |         This make_env function is not used anywhere; it exists to provide a simple, bare-bones
100 |         example of how to construct a multi-agent environment using the modules framework.
101 |     '''
102 |     env = Base(n_agents=n_agents, n_substeps=n_substeps, horizon=horizon,
103 |                deterministic_mode=deterministic_mode)
104 |     env.add_module(RandomWalls(grid_size=30, num_rooms=4, min_room_size=6, door_size=2))
105 |     if n_boxes > 0:
106 |         env.add_module(Boxes(n_boxes=n_boxes))
107 |     if n_ramps > 0:
108 |         env.add_module(Ramps(n_ramps=n_ramps))
109 |     env.add_module(Agents(n_agents))
110 |     env.reset()
111 |     keys_self = ['agent_qpos_qvel']
112 |     keys_mask_self = ['mask_aa_obs']
113 |     keys_external = ['agent_qpos_qvel']
114 |     keys_mask_external = []
115 |     env = SplitMultiAgentActions(env)
116 |     env = DiscretizeActionWrapper(env, 'action_movement')
117 |     env = AgentAgentObsMask2D(env)
118 |     env = SplitObservations(env, keys_self + keys_mask_self)
119 |     env = SelectKeysWrapper(env, keys_self=keys_self,
120 |                             keys_other=keys_external + keys_mask_self + keys_mask_external)
121 |     env = DiscardMujocoExceptionEpisodes(env)
122 |     return env
123 | 


--------------------------------------------------------------------------------
/mae_envs/envs/blueprint_construction.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import gym
  3 | from mae_envs.wrappers.multi_agent import (SplitMultiAgentActions, SplitObservations,
  4 |                                            SelectKeysWrapper)
  5 | from mae_envs.wrappers.util import (DiscretizeActionWrapper, MaskActionWrapper,
  6 |                                     DiscardMujocoExceptionEpisodes, SpoofEntityWrapper,
  7 |                                     AddConstantObservationsWrapper,
  8 |                                     ConcatenateObsWrapper, NumpyArrayRewardWrapper)
  9 | from mae_envs.wrappers.manipulation import (GrabObjWrapper, GrabClosestWrapper,
 10 |                                             LockObjWrapper, LockAllWrapper)
 11 | from mae_envs.wrappers.lidar import Lidar
 12 | from mae_envs.wrappers.team import TeamMembership
 13 | from mae_envs.wrappers.line_of_sight import AgentAgentObsMask2D, AgentGeomObsMask2D
 14 | from mae_envs.envs.base import Base
 15 | from mae_envs.modules.agents import Agents, AgentManipulation
 16 | from mae_envs.modules.construction_sites import ConstructionSites
 17 | from mae_envs.modules.walls import WallScenarios, RandomWalls
 18 | from mae_envs.modules.objects import Boxes, LidarSites
 19 | from mae_envs.modules.world import FloorAttributes, WorldConstants
 20 | from mae_envs.modules.util import (uniform_placement, center_placement,
 21 |                                    uniform_placement_middle)
 22 | 
 23 | 
 24 | class ConstructionDistancesWrapper(gym.ObservationWrapper):
 25 |     '''
 26 |         Calculates the distance between every pair of boxes, between boxes and
 27 |         construction sites, and between box corners and construction site corners.
 28 |         This wrapper should be only be applied if the both the Boxes module (with
 29 |         mark_box_corners set to True) and the ConstructionSites module have been
 30 |         added to the environment.
 31 |     '''
 32 |     def __init__(self, env):
 33 |         super().__init__(env)
 34 | 
 35 |     def observation(self, obs):
 36 |         box_xpos = obs['box_xpos']
 37 |         boxcorner_pos = obs['box_corner_pos']
 38 |         site_pos = obs['construction_site_pos']
 39 |         sitecorner_pos = obs['construction_site_corner_pos']
 40 | 
 41 |         box_box_dist = np.linalg.norm(box_xpos[..., None] - box_xpos.T[None, ...], axis=1)
 42 |         box_site_dist = np.linalg.norm(box_xpos[..., None] - site_pos.T[None, ...], axis=1)
 43 |         boxcorner_sitecorner_dist = (
 44 |             np.linalg.norm(boxcorner_pos[..., None] - sitecorner_pos.T[None, ...], axis=1))
 45 | 
 46 |         obs.update({'box_box_dist': box_box_dist,
 47 |                     'box_site_dist': box_site_dist,
 48 |                     'boxcorner_sitecorner_dist': boxcorner_sitecorner_dist})
 49 | 
 50 |         return obs
 51 | 
 52 | 
 53 | class ConstructionDenseRewardWrapper(gym.Wrapper):
 54 |     '''
 55 |         Adds a dense reward for placing the boxes at the construction site locations.
 56 |         Reward is based on the smoothmin distance between each site and all the boxes.
 57 |         Args:
 58 |             use_corners (bool): Whether to calculate reward based solely on the distances
 59 |                 between box centers and site centers, or also based on the distances
 60 |                 between box corners and site corners.
 61 |             alpha (float): Smoothing parameter. Should be nonpositive.
 62 |             reward_scale (float): scales the reward by this factor
 63 |     '''
 64 |     def __init__(self, env, use_corners=False, alpha=-8, reward_scale=1):
 65 |         super().__init__(env)
 66 |         assert alpha < 0, 'alpha must be negative for the SmoothMin function to work'
 67 |         self.alpha = alpha
 68 |         self.reward_scale = reward_scale
 69 |         self.use_corners = use_corners
 70 | 
 71 |     def step(self, action):
 72 |         obs, rew, done, info = self.env.step(action)
 73 |         box_site_dist = (obs['boxcorner_sitecorner_dist']
 74 |                          if self.use_corners
 75 |                          else obs['box_site_dist'])
 76 |         scaling_factors = np.exp(self.alpha * box_site_dist)
 77 |         site_box_smoothmin_dists = (np.sum(box_site_dist * scaling_factors, axis=0) /
 78 |                                     np.sum(scaling_factors, axis=0))
 79 |         rew -= np.mean(site_box_smoothmin_dists) * self.reward_scale
 80 |         return obs, rew, done, info
 81 | 
 82 | 
 83 | class ConstructionCompletedRewardWrapper(gym.Wrapper):
 84 |     '''
 85 |         Adds a sparse reward and ends the episode after all construction sites have been
 86 |         'activated' by having a box within a certain distance of them. The reward is based
 87 |         on the number of construction sites in the episode.
 88 |         Args:
 89 |             use_corners (bool): Whether to calculate if construction is finished based
 90 |                 solely on the distances between box centers and site centers, or also
 91 |                 based on the distances between box corners and site corners.
 92 |             site_activation_radius (float): a site is considered 'activated' if there is
 93 |                 at least one box within the site activation radius.
 94 |             reward_scale (float): scales the reward by this factor
 95 |     '''
 96 |     def __init__(self, env, use_corners=False, site_activation_radius=0.2, reward_scale=1):
 97 |         super().__init__(env)
 98 |         self.n_sites = self.metadata['curr_n_sites']
 99 |         self.site_activation_radius = site_activation_radius
100 |         self.reward_scale = reward_scale
101 |         self.use_corners = use_corners
102 | 
103 |     def reset(self):
104 |         obs = self.env.reset()
105 |         self.n_sites = self.metadata['curr_n_sites']
106 |         return obs
107 | 
108 |     def step(self, action):
109 |         obs, rew, done, info = self.env.step(action)
110 |         site_dist_to_closest_box = obs['box_site_dist'].min(axis=0)
111 |         sitecorner_dist_to_closest_boxcorner = obs['boxcorner_sitecorner_dist'].min(axis=0)
112 |         activated_sites = site_dist_to_closest_box < self.site_activation_radius
113 |         aligned_corners = sitecorner_dist_to_closest_boxcorner < self.site_activation_radius
114 | 
115 |         all_sites_activated = np.all(activated_sites)
116 |         all_corners_aligned = np.all(aligned_corners)
117 |         construction_completed = ((all_sites_activated and not self.use_corners) or
118 |                                   (all_sites_activated and all_corners_aligned))
119 | 
120 |         if construction_completed:
121 |             rew += self.n_sites * self.reward_scale
122 |             done = True
123 | 
124 |         return obs, rew, done, info
125 | 
126 | 
127 | def make_env(n_substeps=15, horizon=80, deterministic_mode=False,
128 |              floor_size=6.0, grid_size=30,
129 |              n_agents=1,
130 |              n_rooms=4, random_room_number=True, scenario='empty', door_size=2,
131 |              n_sites=3, n_elongated_sites=0, site_placement='uniform_away_from_walls',
132 |              reward_infos=[{'type': 'construction_dense'}],
133 |              n_boxes=2, n_elongated_boxes=0,
134 |              n_min_boxes=None, box_size=0.5, box_only_z_rot=False,
135 |              lock_box=True, grab_box=True, grab_selective=False, lock_grab_radius=0.25,
136 |              lock_type='any_lock_specific', grab_exclusive=False,
137 |              grab_out_of_vision=False, lock_out_of_vision=True,
138 |              box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50],
139 |              action_lims=(-0.9, 0.9), polar_obs=True,
140 |              n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None,
141 |              boxid_obs=True, boxsize_obs=True, team_size_obs=False, additional_obs={}):
142 | 
143 |     grab_radius_multiplier = lock_grab_radius / box_size
144 |     lock_radius_multiplier = lock_grab_radius / box_size
145 | 
146 |     if type(n_sites) not in [list, np.ndarray]:
147 |         n_sites = [n_sites, n_sites]
148 | 
149 |     env = Base(n_agents=n_agents, n_substeps=n_substeps, horizon=horizon,
150 |                floor_size=floor_size, grid_size=grid_size,
151 |                action_lims=action_lims, deterministic_mode=deterministic_mode)
152 | 
153 |     if scenario == 'randomwalls':
154 |         env.add_module(RandomWalls(grid_size=grid_size, num_rooms=n_rooms,
155 |                                    random_room_number=random_room_number, min_room_size=6,
156 |                                    door_size=door_size, gen_door_obs=False))
157 |     elif scenario == 'empty':
158 |         env.add_module(WallScenarios(grid_size=grid_size, door_size=door_size,
159 |                                      scenario='empty',
160 |                                      friction=other_friction))
161 | 
162 |     env.add_module(Agents(n_agents,
163 |                           placement_fn=uniform_placement,
164 |                           color=[np.array((66., 235., 244., 255.)) / 255] * n_agents,
165 |                           friction=other_friction,
166 |                           polar_obs=polar_obs))
167 |     if np.max(n_boxes) > 0:
168 |         env.add_module(Boxes(n_boxes=n_boxes, placement_fn=uniform_placement,
169 |                              friction=box_floor_friction, polar_obs=polar_obs,
170 |                              n_elongated_boxes=n_elongated_boxes,
171 |                              boxid_obs=boxid_obs, boxsize_obs=boxsize_obs,
172 |                              box_size=box_size,
173 |                              box_only_z_rot=box_only_z_rot,
174 |                              mark_box_corners=True))
175 |     if n_sites[1] > 0:
176 |         if site_placement == 'center':
177 |             site_placement_fn = center_placement
178 |         elif site_placement == 'uniform':
179 |             site_placement_fn = uniform_placement
180 |         elif site_placement == 'uniform_away_from_walls':
181 |             site_placement_fn = uniform_placement_middle(0.85)
182 |         else:
183 |             raise ValueError(f'Site placement option: {site_placement} not implemented.'
184 |                              ' Please choose from center, uniform and uniform_away_from_walls.')
185 | 
186 |         env.add_module(ConstructionSites(n_sites, placement_fn=site_placement_fn,
187 |                                          site_size=box_size, site_height=box_size / 2,
188 |                                          n_elongated_sites=n_elongated_sites))
189 |     if n_lidar_per_agent > 0 and visualize_lidar:
190 |         env.add_module(LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent))
191 |     if np.max(n_boxes) > 0 and grab_box:
192 |         env.add_module(AgentManipulation())
193 |     if box_floor_friction is not None:
194 |         env.add_module(FloorAttributes(friction=box_floor_friction))
195 |     env.add_module(WorldConstants(gravity=gravity))
196 |     env.reset()
197 |     keys_self = ['agent_qpos_qvel', 'hider', 'prep_obs']
198 |     keys_mask_self = ['mask_aa_obs']
199 |     keys_external = ['agent_qpos_qvel', 'construction_site_obs']
200 |     keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
201 |     keys_mask_external = []
202 | 
203 |     env = AddConstantObservationsWrapper(env, new_obs=additional_obs)
204 |     keys_external += list(additional_obs)
205 |     keys_mask_external += [ob for ob in additional_obs if 'mask' in ob]
206 | 
207 |     env = SplitMultiAgentActions(env)
208 |     if team_size_obs:
209 |         keys_self += ['team_size']
210 |     env = TeamMembership(env, np.zeros((n_agents,)))
211 |     env = AgentAgentObsMask2D(env)
212 |     env = DiscretizeActionWrapper(env, 'action_movement')
213 |     if np.max(n_boxes) > 0:
214 |         env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs',
215 |                                  geom_idxs_obs_key='box_geom_idxs')
216 |         keys_external += ['mask_ab_obs', 'box_obs']
217 |         keys_mask_external.append('mask_ab_obs')
218 |     if lock_box and np.max(n_boxes) > 0:
219 |         agent_allowed_to_lock_keys = None if lock_out_of_vision else ["mask_ab_obs"]
220 |         env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(n_boxes)],
221 |                              agent_idx_allowed_to_lock=np.arange(n_agents),
222 |                              lock_type=lock_type,
223 |                              radius_multiplier=lock_radius_multiplier,
224 |                              obj_in_game_metadata_keys=["curr_n_boxes"],
225 |                              agent_allowed_to_lock_keys=agent_allowed_to_lock_keys)
226 |     if grab_box and np.max(n_boxes) > 0:
227 |         env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)],
228 |                              radius_multiplier=grab_radius_multiplier,
229 |                              grab_exclusive=grab_exclusive,
230 |                              obj_in_game_metadata_keys=['curr_n_boxes'])
231 | 
232 |     if n_lidar_per_agent > 0:
233 |         env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar,
234 |                     compress_lidar_scale=compress_lidar_scale)
235 |         keys_copy += ['lidar']
236 |         keys_external += ['lidar']
237 | 
238 |     env = ConstructionDistancesWrapper(env)
239 |     env = NumpyArrayRewardWrapper(env)
240 | 
241 |     reward_wrappers = {
242 |         'construction_dense': ConstructionDenseRewardWrapper,
243 |         'construction_completed': ConstructionCompletedRewardWrapper,
244 |     }
245 | 
246 |     for rew_info in reward_infos:
247 |         rew_type = rew_info['type']
248 |         del rew_info['type']
249 |         env = reward_wrappers[rew_type](env, **rew_info)
250 | 
251 |     env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy)
252 |     if n_agents == 1:
253 |         env = SpoofEntityWrapper(env, 2, ['agent_qpos_qvel', 'hider', 'prep_obs'], ['mask_aa_obs'])
254 |     env = SpoofEntityWrapper(env, n_boxes,
255 |                              ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
256 |                              ['mask_ab_obs'])
257 |     env = SpoofEntityWrapper(env, n_sites[1], ['construction_site_obs'], ['mask_acs_obs'])
258 |     keys_mask_external += ['mask_ab_obs_spoof', 'mask_acs_obs_spoof']
259 |     env = LockAllWrapper(env, remove_object_specific_lock=True)
260 |     if not grab_out_of_vision and grab_box:
261 |         env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs'])  # Can only pull if in vision
262 |     if not grab_selective and grab_box:
263 |         env = GrabClosestWrapper(env)
264 |     env = DiscardMujocoExceptionEpisodes(env)
265 |     env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
266 |                                       'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']})
267 |     env = SelectKeysWrapper(env, keys_self=keys_self,
268 |                             keys_other=keys_external + keys_mask_self + keys_mask_external)
269 |     return env
270 | 


--------------------------------------------------------------------------------
/mae_envs/envs/shelter_construction.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import gym
  3 | from mujoco_worldgen.util.types import store_args
  4 | from mujoco_worldgen.util.geometry import raycast
  5 | from mae_envs.wrappers.multi_agent import (SplitMultiAgentActions, SplitObservations,
  6 |                                            SelectKeysWrapper)
  7 | from mae_envs.wrappers.util import (DiscretizeActionWrapper, MaskActionWrapper,
  8 |                                     DiscardMujocoExceptionEpisodes, SpoofEntityWrapper,
  9 |                                     AddConstantObservationsWrapper,
 10 |                                     ConcatenateObsWrapper)
 11 | from mae_envs.wrappers.manipulation import (GrabObjWrapper, GrabClosestWrapper,
 12 |                                             LockObjWrapper, LockAllWrapper)
 13 | from mae_envs.wrappers.lidar import Lidar
 14 | from mae_envs.wrappers.team import TeamMembership
 15 | from mae_envs.wrappers.line_of_sight import AgentAgentObsMask2D, AgentGeomObsMask2D
 16 | from mae_envs.envs.base import Base
 17 | from mae_envs.modules.agents import Agents, AgentManipulation
 18 | from mae_envs.modules.walls import WallScenarios
 19 | from mae_envs.modules.objects import Boxes, Cylinders, LidarSites
 20 | from mae_envs.modules.world import FloorAttributes, WorldConstants
 21 | from mae_envs.modules.util import (uniform_placement, center_placement,
 22 |                                    uniform_placement_middle)
 23 | 
 24 | 
 25 | class ShelterRewardWrapper(gym.Wrapper):
 26 |     '''
 27 |         Reward wrapper for the shelter construction task. There are invisible rays
 28 |         going from the edge of the playing area to the cylinder that needs to be 
 29 |         guarded; at each timestep the agent receives negative reward proportional
 30 |         to the number of rays that make contact with the cylinder.
 31 |         Args:
 32 |             num_rays_per_side (int): Number of rays that shoot out of each side of the
 33 |                 square playing area. The ray starting points are spaced out evenly.
 34 |             reward_scale (float): scales the reward by this factor
 35 |     '''
 36 |     @store_args
 37 |     def __init__(self, env, num_rays_per_side=30, reward_scale=1):
 38 |         super().__init__(env)
 39 |         self.ray_start_points = []
 40 | 
 41 |         grid_cell_size = self.unwrapped.floor_size / self.unwrapped.grid_size
 42 |         # start points for the rays should not be exactly on the edge of the floor,
 43 |         # so that they do not hit the outside walls
 44 |         sp_min_xy = 1.01 * grid_cell_size
 45 |         sp_max_xy = self.unwrapped.floor_size - (1.01 * grid_cell_size)
 46 |         for i in range(num_rays_per_side):
 47 |             sp_offset = i / num_rays_per_side * (sp_max_xy - sp_min_xy)
 48 |             new_start_points = [(sp_min_xy + sp_offset, sp_min_xy, 0),
 49 |                                 (sp_max_xy, sp_min_xy + sp_offset, 0),
 50 |                                 (sp_max_xy - sp_offset, sp_max_xy, 0),
 51 |                                 (sp_min_xy, sp_max_xy - sp_offset, 0)]
 52 |             self.ray_start_points.extend(new_start_points)
 53 | 
 54 |         self.ray_start_points = np.array(self.ray_start_points)
 55 | 
 56 |     def reset(self):
 57 |         obs = self.env.reset()
 58 |         self.sim = self.unwrapped.sim
 59 |         return obs
 60 | 
 61 |     def step(self, action):
 62 |         obs, rew, done, info = self.env.step(action)
 63 |         target_geom = obs['static_cylinder_geom_idxs'][0, 0]
 64 |         rew = rew + np.zeros((self.unwrapped.n_agents, 1))
 65 |         for pt in self.ray_start_points:
 66 |             _, collision_geom = raycast(self.sim, pt1=pt, geom2_id=target_geom)
 67 |             if collision_geom == target_geom:
 68 |                 rew -= 1
 69 | 
 70 |         rew *= self.reward_scale
 71 |         return obs, rew, done, info
 72 | 
 73 | 
 74 | def make_env(n_substeps=15, horizon=80, deterministic_mode=False,
 75 |              floor_size=6.0, grid_size=30,
 76 |              n_agents=1,
 77 |              objective_diameter=[1, 1], objective_placement='center',
 78 |              num_rays_per_side=25, shelter_reward_scale=1,
 79 |              n_boxes=2, n_elongated_boxes=0,
 80 |              box_size=0.5, box_only_z_rot=False,
 81 |              lock_box=True, grab_box=True, grab_selective=False, lock_grab_radius=0.25,
 82 |              lock_type='any_lock_specific', grab_exclusive=False,
 83 |              grab_out_of_vision=False, lock_out_of_vision=True,
 84 |              box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50],
 85 |              action_lims=(-0.9, 0.9), polar_obs=True,
 86 |              n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None,
 87 |              boxid_obs=True, boxsize_obs=True, team_size_obs=False, additional_obs={}):
 88 | 
 89 |     grab_radius_multiplier = lock_grab_radius / box_size
 90 |     lock_radius_multiplier = lock_grab_radius / box_size
 91 | 
 92 |     env = Base(n_agents=n_agents, n_substeps=n_substeps, horizon=horizon,
 93 |                floor_size=floor_size, grid_size=grid_size,
 94 |                action_lims=action_lims, deterministic_mode=deterministic_mode)
 95 | 
 96 |     env.add_module(WallScenarios(grid_size=grid_size, door_size=2, scenario='empty',
 97 |                                  friction=other_friction))
 98 | 
 99 |     if objective_placement == 'center':
100 |         objective_placement_fn = center_placement
101 |     elif objective_placement == 'uniform_away_from_walls':
102 |         objective_placement_fn = uniform_placement_middle(0.7)
103 | 
104 |     env.add_module(Cylinders(1, diameter=objective_diameter, height=box_size,
105 |                              make_static=True, placement_fn=objective_placement_fn))
106 | 
107 |     env.add_module(Agents(n_agents,
108 |                           placement_fn=uniform_placement,
109 |                           color=[np.array((66., 235., 244., 255.)) / 255] * n_agents,
110 |                           friction=other_friction,
111 |                           polar_obs=polar_obs))
112 |     if np.max(n_boxes) > 0:
113 |         env.add_module(Boxes(n_boxes=n_boxes, placement_fn=uniform_placement,
114 |                              friction=box_floor_friction, polar_obs=polar_obs,
115 |                              n_elongated_boxes=n_elongated_boxes,
116 |                              boxid_obs=boxid_obs, boxsize_obs=boxsize_obs,
117 |                              box_size=box_size,
118 |                              box_only_z_rot=box_only_z_rot))
119 |     if n_lidar_per_agent > 0 and visualize_lidar:
120 |         env.add_module(LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent))
121 | 
122 |     env.add_module(AgentManipulation())
123 |     if box_floor_friction is not None:
124 |         env.add_module(FloorAttributes(friction=box_floor_friction))
125 |     env.add_module(WorldConstants(gravity=gravity))
126 |     env.reset()
127 |     keys_self = ['agent_qpos_qvel', 'hider', 'prep_obs']
128 |     keys_mask_self = ['mask_aa_obs']
129 |     keys_external = ['agent_qpos_qvel']
130 |     keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
131 |     keys_mask_external = []
132 | 
133 |     env = AddConstantObservationsWrapper(env, new_obs=additional_obs)
134 |     keys_external += list(additional_obs)
135 |     keys_mask_external += [ob for ob in additional_obs if 'mask' in ob]
136 | 
137 |     env = ShelterRewardWrapper(env, num_rays_per_side=num_rays_per_side,
138 |                                reward_scale=shelter_reward_scale)
139 |     env = SplitMultiAgentActions(env)
140 | 
141 |     if team_size_obs:
142 |         keys_self += ['team_size']
143 |     env = TeamMembership(env, np.zeros((n_agents,)))
144 |     env = AgentAgentObsMask2D(env)
145 |     env = DiscretizeActionWrapper(env, 'action_movement')
146 |     if np.max(n_boxes) > 0:
147 |         env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs',
148 |                                  geom_idxs_obs_key='box_geom_idxs')
149 |         keys_external += ['mask_ab_obs', 'box_obs']
150 |         keys_mask_external.append('mask_ab_obs')
151 |     if lock_box and np.max(n_boxes) > 0:
152 |         env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(n_boxes)],
153 |                              agent_idx_allowed_to_lock=np.arange(n_agents),
154 |                              lock_type=lock_type,
155 |                              radius_multiplier=lock_radius_multiplier,
156 |                              obj_in_game_metadata_keys=["curr_n_boxes"],
157 |                              agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"])
158 | 
159 |     if grab_box and np.max(n_boxes) > 0:
160 |         env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)],
161 |                              radius_multiplier=grab_radius_multiplier,
162 |                              grab_exclusive=grab_exclusive,
163 |                              obj_in_game_metadata_keys=['curr_n_boxes'])
164 | 
165 |     if n_lidar_per_agent > 0:
166 |         env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar,
167 |                     compress_lidar_scale=compress_lidar_scale)
168 |         keys_copy += ['lidar']
169 |         keys_external += ['lidar']
170 | 
171 |     env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy)
172 |     if n_agents == 1:
173 |         env = SpoofEntityWrapper(env, 2, ['agent_qpos_qvel', 'hider', 'prep_obs'], ['mask_aa_obs'])
174 |     env = SpoofEntityWrapper(env, n_boxes, ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs'])
175 |     keys_mask_external += ['mask_ab_obs_spoof']
176 |     env = LockAllWrapper(env, remove_object_specific_lock=True)
177 |     if not grab_out_of_vision and grab_box:
178 |         env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs'])  # Can only pull if in vision
179 |     if not grab_selective and grab_box:
180 |         env = GrabClosestWrapper(env)
181 |     env = DiscardMujocoExceptionEpisodes(env)
182 |     env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
183 |                                       'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']})
184 |     env = SelectKeysWrapper(env, keys_self=keys_self,
185 |                             keys_other=keys_external + keys_mask_self + keys_mask_external)
186 |     return env
187 | 


--------------------------------------------------------------------------------
/mae_envs/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .module import *
2 | from .util import *
3 | 


--------------------------------------------------------------------------------
/mae_envs/modules/agents.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from mujoco_worldgen.util.types import store_args
  3 | from mujoco_worldgen.util.sim_funcs import (qpos_idxs_from_joint_prefix,
  4 |                                             qvel_idxs_from_joint_prefix)
  5 | from mujoco_worldgen.transforms import set_geom_attr_transform
  6 | from mujoco_worldgen.util.rotation import normalize_angles
  7 | from mae_envs.util.transforms import (add_weld_equality_constraint_transform,
  8 |                                       set_joint_damping_transform)
  9 | from mae_envs.modules import EnvModule, rejection_placement, get_size_from_xml
 10 | from mujoco_worldgen import ObjFromXML
 11 | 
 12 | 
 13 | class Agents(EnvModule):
 14 |     '''
 15 |         Add Agents to the environment.
 16 |         Args:
 17 |             n_agents (int): number of agents
 18 |             placement_fn (fn or list of fns): See mae_envs.modules.util:rejection_placement for
 19 |                 spec. If list of functions, then it is assumed there is one function given
 20 |                 per agent
 21 |             color (tuple or list of tuples): rgba for agent. If list of tuples, then it is
 22 |                 assumed there is one color given per agent
 23 |             friction (float): agent friction
 24 |             damp_z (bool): if False, reduce z damping to 1
 25 |             polar_obs (bool): Give observations about rotation in polar coordinates
 26 |     '''
 27 |     @store_args
 28 |     def __init__(self, n_agents, placement_fn=None, color=None, friction=None,
 29 |                  damp_z=False, polar_obs=True):
 30 |         pass
 31 | 
 32 |     def build_world_step(self, env, floor, floor_size):
 33 |         env.metadata['n_agents'] = self.n_agents
 34 |         successful_placement = True
 35 | 
 36 |         for i in range(self.n_agents):
 37 |             env.metadata.pop(f"agent{i}_initpos", None)
 38 | 
 39 |         for i in range(self.n_agents):
 40 |             obj = ObjFromXML("particle_hinge", name=f"agent{i}")
 41 |             if self.friction is not None:
 42 |                 obj.add_transform(set_geom_attr_transform('friction', self.friction))
 43 |             if self.color is not None:
 44 |                 _color = (self.color[i]
 45 |                           if isinstance(self.color[0], (list, tuple, np.ndarray))
 46 |                           else self.color)
 47 |                 obj.add_transform(set_geom_attr_transform('rgba', _color))
 48 |             if not self.damp_z:
 49 |                 obj.add_transform(set_joint_damping_transform(1, 'tz'))
 50 | 
 51 |             if self.placement_fn is not None:
 52 |                 _placement_fn = (self.placement_fn[i]
 53 |                                  if isinstance(self.placement_fn, list)
 54 |                                  else self.placement_fn)
 55 |                 obj_size = get_size_from_xml(obj)
 56 |                 pos, pos_grid = rejection_placement(env, _placement_fn, floor_size, obj_size)
 57 |                 if pos is not None:
 58 |                     floor.append(obj, placement_xy=pos)
 59 |                     # store spawn position in metadata. This allows sampling subsequent agents
 60 |                     # close to previous agents
 61 |                     env.metadata[f"agent{i}_initpos"] = pos_grid
 62 |                 else:
 63 |                     successful_placement = False
 64 |             else:
 65 |                 floor.append(obj)
 66 |         return successful_placement
 67 | 
 68 |     def modify_sim_step(self, env, sim):
 69 |         # Cache qpos, qvel idxs
 70 |         self.agent_qpos_idxs = np.array([qpos_idxs_from_joint_prefix(sim, f'agent{i}')
 71 |                                          for i in range(self.n_agents)])
 72 |         self.agent_qvel_idxs = np.array([qvel_idxs_from_joint_prefix(sim, f'agent{i}')
 73 |                                         for i in range(self.n_agents)])
 74 |         env.metadata['agent_geom_idxs'] = [sim.model.geom_name2id(f'agent{i}:agent')
 75 |                                            for i in range(self.n_agents)]
 76 | 
 77 |     def observation_step(self, env, sim):
 78 |         qpos = sim.data.qpos.copy()
 79 |         qvel = sim.data.qvel.copy()
 80 | 
 81 |         agent_qpos = qpos[self.agent_qpos_idxs]
 82 |         agent_qvel = qvel[self.agent_qvel_idxs]
 83 |         agent_angle = agent_qpos[:, [-1]] - np.pi / 2  # Rotate the angle to match visual front
 84 |         agent_qpos_qvel = np.concatenate([agent_qpos, agent_qvel], -1)
 85 |         polar_angle = np.concatenate([np.cos(agent_angle), np.sin(agent_angle)], -1)
 86 |         if self.polar_obs:
 87 |             agent_qpos = np.concatenate([agent_qpos[:, :-1], polar_angle], -1)
 88 |         agent_angle = normalize_angles(agent_angle)
 89 |         obs = {
 90 |             'agent_qpos_qvel': agent_qpos_qvel,
 91 |             'agent_angle': agent_angle,
 92 |             'agent_pos': agent_qpos[:, :3]}
 93 | 
 94 |         return obs
 95 | 
 96 | 
 97 | class AgentManipulation(EnvModule):
 98 |     '''
 99 |         Adding this module is necessary for the grabbing mechanic implemented in GrabObjWrapper
100 |         (found in mae_envs/wrappers/manipulation.py) to work correctly.
101 |     '''
102 |     @store_args
103 |     def __init__(self):
104 |         pass
105 | 
106 |     def build_world_step(self, env, floor, floor_size):
107 |         for i in range(env.n_agents):
108 |             floor.add_transform(add_weld_equality_constraint_transform(
109 |                 f'agent{i}:gripper', f'agent{i}:particle', 'floor0'))
110 |         return True
111 | 
112 |     def modify_sim_step(self, env, sim):
113 |         sim.model.eq_active[:] = 0
114 | 


--------------------------------------------------------------------------------
/mae_envs/modules/construction_sites.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from mujoco_worldgen.util.types import store_args
  3 | from mae_envs.modules import EnvModule, rejection_placement
  4 | 
  5 | 
  6 | class ConstructionSites(EnvModule):
  7 |     '''
  8 |         Adds construction sites to the environment. A construction site consists of 5
  9 |         regular mujoco sites, with four of them (the 'corner' sites) forming a rectangle
 10 |         and the last site being placed in the center of the rectangle.
 11 |         Args:
 12 |             n_sites (int or (int, int)): Number of construction sites. If tuple of ints, every
 13 |                 episode the number of sites is drawn uniformly from
 14 |                 range(n_sites[0], n_sites[1] + 1)
 15 |             placement_fn (fn or list of fns): See mae_envs.modules.util:rejection_placement for spec
 16 |                 If list of functions, then it is assumed there is one function given per agent
 17 |             site_name (str): Name for the sites.
 18 |             site_size (float): Site size
 19 |             site_height (float): Site height
 20 |             n_elongated_sites (int or (int, int)): Number of elongated sites. If tuple of ints,
 21 |                 every episode the number of elongated sites is drawn uniformly from
 22 |                 range(n_elongated_sites[0], n_elongated_sited[1] + 1)
 23 |     '''
 24 |     @store_args
 25 |     def __init__(self, n_sites, placement_fn=None, site_name='construction_site',
 26 |                  site_size=0.5, site_height=0.25, n_elongated_sites=0):
 27 |         if type(n_sites) not in [tuple, list, np.ndarray]:
 28 |             self.n_sites = [n_sites, n_sites]
 29 |         if type(n_elongated_sites) not in [tuple, list, np.ndarray]:
 30 |             self.n_elongated_sites = [n_elongated_sites, n_elongated_sites]
 31 | 
 32 |     def _mark_site_square(self, floor, floor_size, site_name,
 33 |                           site_relative_xyz, site_dims):
 34 |         x, y, z = site_relative_xyz
 35 |         floor.mark(site_name, relative_xyz=(x, y, z),
 36 |                    rgba=[1., 1., 1., 1.], size=0.1)
 37 | 
 38 |         corner_rel_offset_x, corner_rel_offset_y = (site_dims / floor_size) / 2
 39 |         corner_rel_xy = [[x - corner_rel_offset_x, y - corner_rel_offset_y],
 40 |                          [x - corner_rel_offset_x, y + corner_rel_offset_y],
 41 |                          [x + corner_rel_offset_x, y - corner_rel_offset_y],
 42 |                          [x + corner_rel_offset_x, y + corner_rel_offset_y]]
 43 |         for i, (x_corner, y_corner) in enumerate(corner_rel_xy):
 44 |             floor.mark(f'{site_name}_corner{i}',
 45 |                        relative_xyz=(x_corner, y_corner, z),
 46 |                        size=0.05, rgba=[0.8, 0.8, 0.8, 1.])
 47 | 
 48 |     def build_world_step(self, env, floor, floor_size):
 49 |         self.curr_n_sites = env._random_state.randint(self.n_sites[0], self.n_sites[1] + 1)
 50 |         self.curr_n_elongated_sites = env._random_state.randint(
 51 |             self.n_elongated_sites[0], self.n_elongated_sites[1] + 1)
 52 | 
 53 |         env.metadata['curr_n_sites'] = self.curr_n_sites
 54 |         env.metadata['curr_n_elongated_sites'] = self.curr_n_elongated_sites
 55 | 
 56 |         self.site_size_array = self.site_size * np.ones((self.curr_n_sites, 2))
 57 |         if self.curr_n_elongated_sites > 0:
 58 |             n_xaligned = env._random_state.randint(self.curr_n_elongated_sites + 1)
 59 |             self.site_size_array[:n_xaligned, :] = self.site_size * np.array([3.3, 0.3])
 60 |             self.site_size_array[n_xaligned:self.curr_n_elongated_sites, :] = (
 61 |                 self.site_size * np.array([0.3, 3.3]))
 62 | 
 63 |         successful_placement = True
 64 |         for i in range(self.curr_n_sites):
 65 |             if self.placement_fn is not None:
 66 |                 _placement_fn = (self.placement_fn[i]
 67 |                                  if isinstance(self.placement_fn, list)
 68 |                                  else self.placement_fn)
 69 |                 pos, _ = rejection_placement(env, _placement_fn, floor_size,
 70 |                                              self.site_size_array[i])
 71 |                 if pos is not None:
 72 |                     self._mark_site_square(floor, floor_size, f'{self.site_name}{i}',
 73 |                                            (pos[0], pos[1], self.site_height),
 74 |                                            self.site_size_array[i])
 75 |                 else:
 76 |                     successful_placement = False
 77 |             else:
 78 |                 # place the site so that all the corners are still within the play area
 79 |                 pos_min = self.site_size_array[i].max() / (floor_size * 1.1) / 2
 80 |                 pos = env._random_state.uniform(pos_min, 1 - pos_min, 2)
 81 |                 self._mark_site_square(floor, floor_size, f'{self.site_name}{i}',
 82 |                                        (pos[0], pos[1], self.site_height),
 83 |                                        self.site_size_array[i])
 84 | 
 85 |         return successful_placement
 86 | 
 87 |     def modify_sim_step(self, env, sim):
 88 |         self.construction_site_idxs = np.array(
 89 |             [sim.model.site_name2id(f'{self.site_name}{i}')
 90 |              for i in range(self.curr_n_sites)]
 91 |             )
 92 |         self.construction_site_corner_idxs = np.array(
 93 |             [sim.model.site_name2id(f'{self.site_name}{i}_corner{j}')
 94 |              for i in range(self.curr_n_sites) for j in range(4)]
 95 |             )
 96 | 
 97 |     def observation_step(self, env, sim):
 98 |         site_pos = sim.data.site_xpos[self.construction_site_idxs]
 99 |         site_corner_pos = sim.data.site_xpos[self.construction_site_corner_idxs]
100 |         site_obs = np.concatenate((site_pos,
101 |                                   site_corner_pos.reshape((self.curr_n_sites, 12))),
102 |                                   axis=-1)
103 | 
104 |         mask_site_obs = np.ones((env.n_agents, self.curr_n_sites))
105 | 
106 |         obs = {'construction_site_pos': site_pos,
107 |                'construction_site_corner_pos': site_corner_pos,
108 |                'construction_site_obs': site_obs,
109 |                'mask_acs_obs': mask_site_obs}
110 | 
111 |         return obs
112 | 


--------------------------------------------------------------------------------
/mae_envs/modules/food.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from mujoco_worldgen.util.types import store_args
 3 | from mae_envs.modules import EnvModule, rejection_placement
 4 | 
 5 | 
 6 | class Food(EnvModule):
 7 |     '''
 8 |     Add food sites to the environment.
 9 |         Args:
10 |             n_food (int or (int, int)): number of food items. If tuple of ints, every episode the
11 |                 number of food items is drawn uniformly from range(n_food[0], n_food[1] + 1)
12 |             food_size (float): (visual) size of food items
13 |             placement_fn (fn or list of fns): See mae_envs.modules.util:rejection_placement for spec
14 |                 If list of functions, then it is assumed there is one function given per food site
15 |     '''
16 |     @store_args
17 |     def __init__(self, n_food, food_size=0.1, placement_fn=None):
18 |         if type(n_food) not in [tuple, list, np.ndarray]:
19 |             self.n_food = [n_food, n_food]
20 |         pass
21 | 
22 |     def build_world_step(self, env, floor, floor_size):
23 |         env.metadata['food_size'] = self.food_size
24 |         self.curr_n_food = env._random_state.randint(self.n_food[0], self.n_food[1] + 1)
25 |         env.metadata['max_n_food'] = self.n_food[1]
26 |         env.metadata['curr_n_food'] = self.curr_n_food
27 |         successful_placement = True
28 | 
29 |         for i in range(self.curr_n_food):
30 |             env.metadata.pop(f"food{i}_initpos", None)
31 | 
32 |         # Add food sites
33 |         for i in range(self.curr_n_food):
34 |             if self.placement_fn is not None:
35 |                 _placement_fn = (self.placement_fn[i]
36 |                                  if isinstance(self.placement_fn, list)
37 |                                  else self.placement_fn)
38 |                 pos, pos_grid = rejection_placement(env, _placement_fn, floor_size,
39 |                                                     np.array([self.food_size, self.food_size]))
40 |                 if pos is not None:
41 |                     floor.mark(f"food{i}", relative_xyz=np.append(pos, [self.food_size / 2]),
42 |                                size=(self.food_size, self.food_size, self.food_size),
43 |                                rgba=(0., 1., 0., 1.))
44 | 
45 |                     # store spawn position in metadata. This allows sampling subsequent food items
46 |                     # close to previous food items
47 |                     env.metadata[f"food{i}_initpos"] = pos_grid
48 |                 else:
49 |                     successful_placement = False
50 |             else:
51 |                 floor.mark(f"food{i}", rgba=(0., 1., 0., 1.),
52 |                            size=(self.food_size, self.food_size, self.food_size))
53 |         return successful_placement
54 | 
55 |     def modify_sim_step(self, env, sim):
56 |         self.food_site_ids = np.array([sim.model.site_name2id(f'food{i}')
57 |                                        for i in range(self.curr_n_food)])
58 | 
59 |     def observation_step(self, env, sim):
60 |         if self.curr_n_food > 0:
61 |             obs = {'food_pos': sim.data.site_xpos[self.food_site_ids]}
62 |         else:
63 |             obs = {'food_pos': np.zeros((0, 3))}
64 |         return obs
65 | 


--------------------------------------------------------------------------------
/mae_envs/modules/module.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class EnvModule():
 3 |     '''
 4 |         Dummy class outline for "Environment Modules".
 5 |         NOTE: If in any function you are going to randomly sample a number,
 6 |             use env._random_state instead of numpy.random
 7 |     '''
 8 |     def build_world_step(self, env, floor, floor_size):
 9 |         '''
10 |             This function allows you to add objects to worldgen floor object.
11 |                 You could also cache variables needed for observations or add
12 |                 information to the env.metadata dict
13 |             Args:
14 |                 env (gym.Env): the environment
15 |                 floor (worldgen.Floor): square worldgen floor object
16 |                 floor_size (float): size of the worlgen floor object
17 |             Returns: True if the the build_world_step was successful, False if it failed
18 |                 e.g. your build_world_step might fail because no valid object placements
19 |                 were found.
20 |         '''
21 |         return True
22 | 
23 |     def modify_sim_step(self, env, sim):
24 |         '''
25 |             After an MJSim has been created, this function can be used to modify that sim
26 |                 and cache any variables you can only get after the sim is created
27 |             Args:
28 |                 env (gym.env): the environment
29 |                 sim (mujoco_py.MJSim): mujoco simulation object
30 |             Returns: None
31 |         '''
32 |         pass
33 | 
34 |     def observation_step(self, env, sim):
35 |         '''
36 |             Create any observations specific to this module.
37 |             Args:
38 |                 env (gym.env): the environment
39 |                 sim (mujoco_py.MJSim): mujoco simulation object
40 |             Returns: dict of observations
41 |         '''
42 |         return {}
43 | 


--------------------------------------------------------------------------------
/mae_envs/modules/util.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from mujoco_worldgen.parser import parse_file
  3 | 
  4 | 
  5 | def get_size_from_xml(obj):
  6 |     '''
  7 |         Args:
  8 |             obj (worldgen.Obj): worldgen object
  9 |         Returns: size of object annotation:outerbound if it exists, None if it doesn't
 10 |     '''
 11 |     outer_bound = None
 12 |     for body in parse_file(obj._generate_xml_path())['worldbody']['body']:
 13 |         if body.get('@name', '') == 'annotation:outer_bound':
 14 |             outer_bound = body
 15 |     if outer_bound is None:
 16 |         return None
 17 |     else:
 18 |         return outer_bound['geom'][0]['@size'][:2] * 2
 19 | 
 20 | 
 21 | def rejection_placement(env, placement_fn, floor_size, obj_size, num_tries=10):
 22 |     '''
 23 |         Args:
 24 |             env (gym.Env): environment
 25 |             placement_fn (function): Function that returns a position on a grid
 26 |                 Args:
 27 |                     grid (np.ndarray): 2D occupancy grid. 1's mean occupied
 28 |                     obj_size_in_cells (int np.ndarray): number of cells in [x, y]
 29 |                         that this object would occupy on the grid. Currently only supports
 30 |                         rectangular object sizes (but so does worldgen)
 31 |                     env.metadata (dict): environment metadata
 32 |                     random_state (np.random.RandomState): numpy random state
 33 |                 Returns: x, y placement position on grid
 34 |             floor_size (float): size of floor
 35 |             obj_size (float np.ndarray): [x, y] size of object
 36 |             num_tries (int): number of tries to place object
 37 |         Returns: int np.ndarray([x, y]) position on grid or None if no placement was found.
 38 |     '''
 39 |     grid = env.placement_grid
 40 |     grid_size = len(grid)
 41 |     cell_size = floor_size / grid_size
 42 |     obj_size_in_cells = np.ceil(obj_size / cell_size).astype(int)
 43 | 
 44 |     for i in range(num_tries):
 45 |         if placement_fn is not None:
 46 |             pos = placement_fn(grid, obj_size_in_cells, env.metadata, env._random_state)
 47 |         else:
 48 |             # Assume that we'll always have boundary walls so don't sample there
 49 |             pos = np.array([env._random_state.randint(1, grid_size - obj_size_in_cells[0] - 1),
 50 |                             env._random_state.randint(1, grid_size - obj_size_in_cells[1] - 1)])
 51 |         if np.any(grid[pos[0]:pos[0] + obj_size_in_cells[0], pos[1]:pos[1] + obj_size_in_cells[1]]):
 52 |             continue
 53 |         else:
 54 |             extra_room = obj_size_in_cells * cell_size - obj_size
 55 |             pos_on_floor = pos / grid_size * floor_size
 56 |             pos_on_floor += env._random_state.uniform([0, 0], extra_room)
 57 |             placement = pos_on_floor / (floor_size - obj_size)
 58 |             grid[pos[0]:pos[0] + obj_size_in_cells[0], pos[1]:pos[1] + obj_size_in_cells[1]] = 1
 59 |             return placement, pos
 60 |     return None, None
 61 | 
 62 | 
 63 | def uniform_placement(grid, obj_size, metadata, random_state):
 64 |     grid_size = len(grid)
 65 |     pos = np.array([random_state.randint(1, grid_size - obj_size[0] - 1),
 66 |                     random_state.randint(1, grid_size - obj_size[1] - 1)])
 67 | 
 68 |     return pos
 69 | 
 70 | 
 71 | def close_to_other_object_placement(object_type, object_index, radius_key):
 72 |     def close_placement_fn(grid, obj_size, metadata, random_state):
 73 |         init_pos_key = f"{object_type}{object_index}_initpos"
 74 | 
 75 |         assert init_pos_key in metadata, \
 76 |             f"First object position must be specified in metadata['{init_pos_key}']"
 77 |         assert radius_key in metadata, \
 78 |             f"metadata['{radius_key}'] mus be specified."
 79 | 
 80 |         grid_size = len(grid)
 81 | 
 82 |         anchor_obj_pos = metadata[f"{init_pos_key}"]
 83 |         rad_in_cells = metadata[radius_key]
 84 | 
 85 |         distr_limits_min = np.maximum(1, anchor_obj_pos - rad_in_cells)
 86 |         distr_limits_max = np.minimum(grid_size - 1, anchor_obj_pos + rad_in_cells)
 87 | 
 88 |         pos = np.array([random_state.randint(distr_limits_min[0], distr_limits_max[0]),
 89 |                         random_state.randint(distr_limits_min[1], distr_limits_max[1])])
 90 | 
 91 |         return pos
 92 | 
 93 |     return close_placement_fn
 94 | 
 95 | 
 96 | def uniform_placement_middle(area_side_length_fraction):
 97 |     '''
 98 |         Creates a sampling function that samples object position uniformly within the
 99 |         middle of the playing area. E.g. if the playing area is
100 |            ------
101 |            |AAAA|
102 |            |ABBA|
103 |            |ABBA|
104 |            |AAAA|
105 |            ------
106 |         then uniform_placement_middle(0.5) will returned a function that samples the object position
107 |         from any of the B cells.
108 |         Args:
109 |             area_side_length_fraction (float, between 0 and 1): Length of the sides of the middle
110 |                 square being sampled from, as fraction of the overall playing field
111 |     '''
112 |     def uniform_placement_middle_fn(grid, obj_size, metadata, random_state):
113 |         grid_size = len(grid)
114 |         distr_limits_min = ((grid_size - obj_size) * (1 - area_side_length_fraction) / 2 + area_side_length_fraction).astype(int)
115 |         distr_limits_max = ((grid_size - obj_size) * (1 + area_side_length_fraction) / 2 - area_side_length_fraction).astype(int)
116 | 
117 |         pos = np.array([random_state.randint(distr_limits_min[0], distr_limits_max[0]),
118 |                         random_state.randint(distr_limits_min[1], distr_limits_max[1])])
119 | 
120 |         return pos
121 | 
122 |     return uniform_placement_middle_fn
123 | 
124 | 
125 | def center_placement(grid, obj_size_in_cells, metadata, random_state):
126 |     half_grid_size = int(len(grid) / 2)
127 |     pos = np.array([half_grid_size - int(obj_size_in_cells[0]/2),
128 |                     half_grid_size - int(obj_size_in_cells[1]/2)])
129 |     return pos
130 | 


--------------------------------------------------------------------------------
/mae_envs/modules/world.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from mujoco_worldgen.transforms import set_geom_attr_transform
 3 | from mae_envs.modules import EnvModule
 4 | 
 5 | 
 6 | class FloorAttributes(EnvModule):
 7 |     '''
 8 |         For each (key, value) in kwargs, sets the floor geom attribute key to value.
 9 |     '''
10 |     def __init__(self, **kwargs):
11 |         self.kwargs = kwargs
12 | 
13 |     def build_world_step(self, env, floor, floor_size):
14 |         for k, v in self.kwargs.items():
15 |             floor.add_transform(set_geom_attr_transform(k, v))
16 |         return True
17 | 
18 | 
19 | class WorldConstants(EnvModule):
20 |     '''
21 |         For each (key, value) in kwargs, sets sim.model.opt[key] = value
22 |     '''
23 |     def __init__(self, **kwargs):
24 |         self.kwargs = kwargs
25 | 
26 |     def modify_sim_step(self, env, sim):
27 |         for k, v in self.kwargs.items():
28 |             if not hasattr(sim.model.opt, k):
29 |                 logging.warning(f"sim.model.opt does not have attribute {k}")
30 |             else:
31 |                 getattr(sim.model.opt, k)[:] = v
32 | 


--------------------------------------------------------------------------------
/mae_envs/util/geometry.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from mujoco_worldgen.util.rotation import quat_mul, quat_conjugate
 3 | 
 4 | 
 5 | def dist_pt_to_cuboid(pt1, cuboid_center, cuboid_dims, cuboid_quat):
 6 |     '''
 7 |         This function calculates the shortest distance between test points
 8 |         and cuboids at arbitrary locations, widths and rotations
 9 | 
10 |         Args:
11 |             pt1 (num points x 3): test point positions
12 |             cuboid_center (num cuboids x 3): cuboid centers
13 |             cuboid_dims (num cuboids x 3): cuboid half-width
14 |             cuboid_quat (num cuboids x 4): cuboid quaternion
15 | 
16 |         Returns:
17 |             Distance array of size num points x num cuboids
18 |     '''
19 |     assert cuboid_center.shape[0] == cuboid_dims.shape[0] == cuboid_quat.shape[0], \
20 |         "First dimension of cuboid_center, cuboid_dims and cuboid_quat need to match, " + \
21 |         f"but were {cuboid_center.shape[0]}, {cuboid_dims.shape[0]} and {cuboid_quat.shape[0]}."
22 |     assert pt1.shape[1] == cuboid_center.shape[1] == cuboid_dims.shape[1] == 3, \
23 |         "Second dimension of pt1, cuboid_center and cuboid_dims needs to be 3, " + \
24 |         f"but were {pt1.shape[1]}, {cuboid_center.shape[1]} and {cuboid_dims.shape[1]}."
25 |     assert cuboid_quat.shape[1] == 4, \
26 |         f"Second dimension of cuboid_quat needs to be 4, but was {cuboid_quat.shape[1]}."
27 | 
28 |     # calculate relative position of test points
29 |     rel_pos = pt1[:, None, :] - cuboid_center[None, :, :]
30 | 
31 |     # convert into quaternion (leading dimension is zero)
32 |     q_rel_pos = np.concatenate([np.zeros_like(rel_pos[:, :, [0]]), rel_pos], axis=-1)
33 | 
34 |     # broadcast cuboid_quat by hand
35 |     cuboid_quat = np.repeat(cuboid_quat[None, :], pt1.shape[0], axis=0)
36 | 
37 |     # rotate relative position in cuboid frame
38 |     # since cuboid_quat specifies how the cuboid is rotated wrt to the standard coordinate system,
39 |     # we need to rotate the test points using the inverse rotation (i.e. conjugate quaternion)
40 |     #
41 |     # For rotation of vectors using quaternions see
42 |     # https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation
43 |     q_rel_pos = quat_mul(quat_conjugate(cuboid_quat), quat_mul(q_rel_pos, cuboid_quat))
44 | 
45 |     # now we can pretend that the cuboid is aligned to x-axis
46 |     # calculate vector to closest point on the cuboid
47 |     # this can be done as described here:
48 |     # https://gamedev.stackexchange.com/questions/44483/how-do-i-calculate-distance-between-a-point-and-an-axis-aligned-rectangle
49 |     dist_vec = np.maximum(0, np.abs(q_rel_pos[:, :, 1:]) - cuboid_dims[None, :, :])
50 | 
51 |     # distance is length of distance vector
52 |     dist = np.linalg.norm(dist_vec, axis=-1)
53 | 
54 |     return dist
55 | 


--------------------------------------------------------------------------------
/mae_envs/util/transforms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | from mujoco_worldgen.transforms import closure_transform
 4 | 
 5 | 
 6 | def add_weld_equality_constraint_transform(name, body_name1, body_name2):
 7 |     '''
 8 |         Creates a weld constraint that maintains relative position and orientation between
 9 |         two objects
10 |     '''
11 |     def fun(xml_dict):
12 |         if 'equality' not in xml_dict:
13 |             xml_dict['equality'] = OrderedDict()
14 |             xml_dict['equality']['weld'] = []
15 |         constraint = OrderedDict()
16 |         constraint['@name'] = name
17 |         constraint['@body1'] = body_name1
18 |         constraint['@body2'] = body_name2
19 |         constraint['@active'] = False
20 |         xml_dict['equality']['weld'].append(constraint)
21 |         return xml_dict
22 | 
23 |     return fun
24 | 
25 | 
26 | def set_joint_damping_transform(damping, joint_name):
27 |     ''' Set joints damping to a single value.
28 |         Args:
29 |             damping (float): damping to set
30 |             joint_name (string): partial name of joint. Any joint with joint_name
31 |                 as a substring will be affected.
32 |     '''
33 |     def closure(node):
34 |         for joint in node.get('joint', []):
35 |             if joint_name in joint['@name']:
36 |                 joint['@damping'] = damping
37 |     return closure_transform(closure)
38 | 
39 | 
40 | def remove_hinge_axis_transform(axis):
41 |     ''' Removes specific hinge axis from the body. '''
42 |     def fun(xml_dict):
43 |         def closure(node):
44 |             if 'joint' in node:
45 |                 node["joint"] = [j for j in node["joint"]
46 |                                  if j["@type"] != "hinge"
47 |                                  or np.linalg.norm(j["@axis"] - axis) >= 1e-5]
48 |         return closure_transform(closure)(xml_dict)
49 |     return fun
50 | 


--------------------------------------------------------------------------------
/mae_envs/util/vision.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from mujoco_worldgen.util.rotation import normalize_angles
 3 | from mujoco_worldgen.util.geometry import raycast
 4 | 
 5 | 
 6 | def in_cone2d(origin_pts, origin_angles, cone_angle, target_pts):
 7 |     '''
 8 |         Computes whether 2D points target_pts are in the cones originating from
 9 |             origin_pts at angle origin_angles with cone spread angle cone_angle.
10 |         Args:
11 |             origin_pts (np.ndarray): array with shape (n_points, 2) of origin points
12 |             origin_angles (np.ndarray): array with shape (n_points,) of origin angles
13 |             cone_angle (float): cone angle width
14 |             target_pts (np.ndarray): target points to check whether in cones
15 |         Returns:
16 |             np.ndarray of bools. Each row corresponds to origin cone, and columns to
17 |                 target points
18 |     '''
19 |     assert isinstance(origin_pts, np.ndarray)
20 |     assert isinstance(origin_angles, np.ndarray)
21 |     assert isinstance(cone_angle, float)
22 |     assert isinstance(target_pts, np.ndarray)
23 |     assert origin_pts.shape[0] == origin_angles.shape[0]
24 |     assert len(origin_angles.shape) == 1, "Angles should only have 1 dimension"
25 |     np.seterr(divide='ignore', invalid='ignore')
26 |     cone_vec = np.array([np.cos(origin_angles), np.sin(origin_angles)]).T
27 |     # Compute normed vectors between all pairs of agents
28 |     pos_diffs = target_pts[None, ...] - origin_pts[:, None, :]
29 |     norms = np.sqrt(np.sum(np.square(pos_diffs), -1, keepdims=True))
30 |     unit_diffs = pos_diffs / norms
31 |     # Dot product between unit vector in middle of cone and the vector
32 |     dot_cone_diff = np.sum(unit_diffs * cone_vec[:, None, :], -1)
33 |     angle_between = np.arccos(dot_cone_diff)
34 |     # Right now the only thing that should be nan will be targets that are on the origin point
35 |     # This can only happen for the origin looking at itself, so just make this always true
36 |     angle_between[np.isnan(angle_between)] = 0.
37 | 
38 |     return np.abs(normalize_angles(angle_between)) <= cone_angle
39 | 
40 | 
41 | def insight(sim, geom1_id, geom2_id=None, pt2=None, dist_thresh=np.inf, check_body=True):
42 |     '''
43 |         Check if geom2 or pt2 is in line of sight of geom1.
44 |         Args:
45 |             sim: Mujoco sim object
46 |             geom1 (int): geom id
47 |             geom2 (int): geom id
48 |             pt2 (tuple): xy point
49 |             dist_thresh (float): Adds a distance threshold for vision. Objects beyond the threshold
50 |                 are considered out of sight.
51 |             check_body (bool): Check whether the raycast hit any geom in the body that geom2 is in
52 |                 rather than if it just hit geom2
53 |     '''
54 |     dist, collision_geom = raycast(sim, geom1_id, geom2_id=geom2_id, pt2=pt2)
55 |     if geom2_id is not None:
56 |         if check_body:
57 |             body2_id, collision_body_id = sim.model.geom_bodyid[[geom2_id, collision_geom]]
58 |             return (collision_body_id == body2_id and dist < dist_thresh)
59 |         else:
60 |             return (collision_geom == geom2_id and dist < dist_thresh)
61 |     else:
62 |         pt1 = sim.data.geom_xpos[geom1_id]
63 |         dist_pt2 = np.linalg.norm(pt2 - pt1)
64 |         # if dist == -1 then we're raycasting from a geom to a point within itself,
65 |         #   and all objects have line of sight of themselves.
66 |         return (dist == -1.0 or dist > dist_pt2) and dist_pt2 < dist_thresh
67 | 


--------------------------------------------------------------------------------
/mae_envs/viewer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/mae_envs/viewer/__init__.py


--------------------------------------------------------------------------------
/mae_envs/viewer/env_viewer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import time
  3 | from mujoco_py import const, MjViewer
  4 | import glfw
  5 | from gym.spaces import Box, MultiDiscrete, Discrete
  6 | 
  7 | 
  8 | class EnvViewer(MjViewer):
  9 | 
 10 |     def __init__(self, env):
 11 |         self.env = env
 12 |         self.elapsed = [0]
 13 |         self.seed = self.env.seed()
 14 |         super().__init__(self.env.unwrapped.sim)
 15 |         self.n_agents = self.env.metadata['n_actors']
 16 |         self.action_types = list(self.env.action_space.spaces.keys())
 17 |         self.num_action_types = len(self.env.action_space.spaces)
 18 |         self.num_action = self.num_actions(self.env.action_space)
 19 |         self.agent_mod_index = 0
 20 |         self.action_mod_index = 0
 21 |         self.action_type_mod_index = 0
 22 |         self.action = self.zero_action(self.env.action_space)
 23 |         self.env_reset()
 24 | 
 25 |     def num_actions(self, ac_space):
 26 |         n_actions = []
 27 |         for k, tuple_space in ac_space.spaces.items():
 28 |             s = tuple_space.spaces[0]
 29 |             if isinstance(s, Box):
 30 |                 n_actions.append(s.shape[0])
 31 |             elif isinstance(s, Discrete):
 32 |                 n_actions.append(1)
 33 |             elif isinstance(s, MultiDiscrete):
 34 |                 n_actions.append(s.nvec.shape[0])
 35 |             else:
 36 |                 raise NotImplementedError(f"not NotImplementedError")
 37 | 
 38 |         return n_actions
 39 | 
 40 |     def zero_action(self, ac_space):
 41 |         ac = {}
 42 |         for k, space in ac_space.spaces.items():
 43 |             if isinstance(space.spaces[0], Box):
 44 |                 ac[k] = np.zeros_like(space.sample())
 45 |             elif isinstance(space.spaces[0], Discrete):
 46 |                 ac[k] = np.ones_like(space.sample()) * (space.spaces[0].n // 2)
 47 |             elif isinstance(space.spaces[0], MultiDiscrete):
 48 |                 ac[k] = np.ones_like(space.sample(), dtype=int) * (space.spaces[0].nvec // 2)
 49 |             else:
 50 |                 raise NotImplementedError("MultiDiscrete not NotImplementedError")
 51 |                 # return action_space.nvec // 2  # assume middle element is "no action" action
 52 |         return ac
 53 | 
 54 |     def env_reset(self):
 55 |         start = time.time()
 56 |         # get the seed before calling env.reset(), so we display the one
 57 |         # that was used for the reset.
 58 |         self.seed = self.env.seed()
 59 |         self.env.reset()
 60 |         self.elapsed.append(time.time() - start)
 61 |         self.update_sim(self.env.unwrapped.sim)
 62 | 
 63 |     def key_callback(self, window, key, scancode, action, mods):
 64 |         # Trigger on keyup only:
 65 |         if action != glfw.RELEASE:
 66 |             return
 67 |         if key == glfw.KEY_ESCAPE:
 68 |             self.env.close()
 69 | 
 70 |         # Increment experiment seed
 71 |         elif key == glfw.KEY_N:
 72 |             self.seed[0] += 1
 73 |             self.env.seed(self.seed)
 74 |             self.env_reset()
 75 |             self.action = self.zero_action(self.env.action_space)
 76 |         # Decrement experiment trial
 77 |         elif key == glfw.KEY_P:
 78 |             self.seed = [max(self.seed[0] - 1, 0)]
 79 |             self.env.seed(self.seed)
 80 |             self.env_reset()
 81 |             self.action = self.zero_action(self.env.action_space)
 82 |         current_action_space = self.env.action_space.spaces[self.action_types[self.action_type_mod_index]].spaces[0]
 83 |         if key == glfw.KEY_A:
 84 |             if isinstance(current_action_space, Box):
 85 |                 self.action[self.action_types[self.action_type_mod_index]][self.agent_mod_index][self.action_mod_index] -= 0.05
 86 |             elif isinstance(current_action_space, Discrete):
 87 |                 self.action[self.action_types[self.action_type_mod_index]][self.agent_mod_index] = \
 88 |                     (self.action[self.action_types[self.action_type_mod_index]][self.agent_mod_index] - 1) % current_action_space.n
 89 |             elif isinstance(current_action_space, MultiDiscrete):
 90 |                 self.action[self.action_types[self.action_type_mod_index]][self.agent_mod_index][self.action_mod_index] = \
 91 |                     (self.action[self.action_types[self.action_type_mod_index]][self.agent_mod_index][self.action_mod_index] - 1) \
 92 |                     % current_action_space.nvec[self.action_mod_index]
 93 |         elif key == glfw.KEY_Z:
 94 |             if isinstance(current_action_space, Box):
 95 |                 self.action[self.action_types[self.action_type_mod_index]][self.agent_mod_index][self.action_mod_index] += 0.05
 96 |             elif isinstance(current_action_space, Discrete):
 97 |                 self.action[self.action_types[self.action_type_mod_index]][self.agent_mod_index] = \
 98 |                     (self.action[self.action_types[self.action_type_mod_index]][self.agent_mod_index] + 1) % current_action_space.n
 99 |             elif isinstance(current_action_space, MultiDiscrete):
100 |                 self.action[self.action_types[self.action_type_mod_index]][self.agent_mod_index][self.action_mod_index] = \
101 |                     (self.action[self.action_types[self.action_type_mod_index]][self.agent_mod_index][self.action_mod_index] + 1) \
102 |                     % current_action_space.nvec[self.action_mod_index]
103 |         elif key == glfw.KEY_K:
104 |             self.action_mod_index = (self.action_mod_index + 1) % self.num_action[self.action_type_mod_index]
105 |         elif key == glfw.KEY_J:
106 |             self.action_mod_index = (self.action_mod_index - 1) % self.num_action[self.action_type_mod_index]
107 |         elif key == glfw.KEY_Y:
108 |             self.agent_mod_index = (self.agent_mod_index + 1) % self.n_agents
109 |         elif key == glfw.KEY_U:
110 |             self.agent_mod_index = (self.agent_mod_index - 1) % self.n_agents
111 |         elif key == glfw.KEY_G:
112 |             self.action_type_mod_index = (self.action_type_mod_index + 1) % self.num_action_types
113 |             self.action_mod_index = 0
114 |         elif key == glfw.KEY_B:
115 |             self.action_type_mod_index = (self.action_type_mod_index - 1) % self.num_action_types
116 |             self.action_mod_index = 0
117 | 
118 |         super().key_callback(window, key, scancode, action, mods)
119 | 
120 |     def run(self, once=False):
121 |         while True:
122 |             _, _, _, env_info = self.env.step(self.action)
123 |             if env_info.get('discard_episode', False):
124 |                 self.env.reset()
125 |             self.add_overlay(const.GRID_TOPRIGHT, "Reset env; (current seed: {})".format(self.seed), "N - next / P - previous ")
126 |             self.add_overlay(const.GRID_TOPRIGHT, "Apply action", "A (-0.05) / Z (+0.05)")
127 |             self.add_overlay(const.GRID_TOPRIGHT, "on agent index %d out %d" % (self.agent_mod_index, self.n_agents), "Y / U")
128 |             self.add_overlay(const.GRID_TOPRIGHT, f"on action type {self.action_types[self.action_type_mod_index]}", "G / B")
129 |             self.add_overlay(const.GRID_TOPRIGHT, "on action index %d out %d" % (self.action_mod_index, self.num_action[self.action_type_mod_index]), "J / K")
130 |             self.add_overlay(const.GRID_BOTTOMRIGHT, "Reset took", "%.2f sec." % (sum(self.elapsed) / len(self.elapsed)))
131 |             self.add_overlay(const.GRID_BOTTOMRIGHT, "Action", str(self.action))
132 |             self.render()
133 |             if once:
134 |                 return
135 | 


--------------------------------------------------------------------------------
/mae_envs/viewer/policy_viewer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import time
  3 | import glfw
  4 | import numpy as np
  5 | from operator import itemgetter
  6 | from mujoco_py import const, MjViewer
  7 | from mujoco_worldgen.util.types import store_args
  8 | from ma_policy.util import listdict2dictnp
  9 | 
 10 | 
 11 | def splitobs(obs, keepdims=True):
 12 |     '''
 13 |         Split obs into list of single agent obs.
 14 |         Args:
 15 |             obs: dictionary of numpy arrays where first dim in each array is agent dim
 16 |     '''
 17 |     n_agents = obs[list(obs.keys())[0]].shape[0]
 18 |     return [{k: v[[i]] if keepdims else v[i] for k, v in obs.items()} for i in range(n_agents)]
 19 | 
 20 | 
 21 | class PolicyViewer(MjViewer):
 22 |     '''
 23 |     PolicyViewer runs a policy with an environment and optionally displays it.
 24 |         env - environment to run policy in
 25 |         policy - policy object to run
 26 |         display_window - if true, show the graphical viewer
 27 |         seed - environment seed to view
 28 |         duration - time in seconds to run the policy, run forever if duration=None
 29 |     '''
 30 |     @store_args
 31 |     def __init__(self, env, policies, display_window=True, seed=None, duration=None):
 32 |         if seed is None:
 33 |             self.seed = env.seed()[0]
 34 |         else:
 35 |             self.seed = seed
 36 |             env.seed(seed)
 37 |         self.total_rew = 0.0
 38 |         self.ob = env.reset()
 39 |         for policy in self.policies:
 40 |             policy.reset()
 41 |         assert env.metadata['n_actors'] % len(policies) == 0
 42 |         if hasattr(env, "reset_goal"):
 43 |             self.goal = env.reset_goal()
 44 |         super().__init__(self.env.unwrapped.sim)
 45 |         # TO DO: remove circular dependency on viewer object. It looks fishy.
 46 |         self.env.unwrapped.viewer = self
 47 |         if self.render and self.display_window:
 48 |             self.env.render()
 49 | 
 50 |     def key_callback(self, window, key, scancode, action, mods):
 51 |         super().key_callback(window, key, scancode, action, mods)
 52 |         # Trigger on keyup only:
 53 |         if action != glfw.RELEASE:
 54 |             return
 55 |         # Increment experiment seed
 56 |         if key == glfw.KEY_N:
 57 |             self.reset_increment()
 58 |         # Decrement experiment trial
 59 |         elif key == glfw.KEY_P:
 60 |             print("Pressed P")
 61 |             self.seed = max(self.seed - 1, 0)
 62 |             self.env.seed(self.seed)
 63 |             self.ob = self.env.reset()
 64 |             for policy in self.policies:
 65 |                 policy.reset()
 66 |             if hasattr(self.env, "reset_goal"):
 67 |                 self.goal = self.env.reset_goal()
 68 |             self.update_sim(self.env.unwrapped.sim)
 69 | 
 70 |     def run(self):
 71 |         if self.duration is not None:
 72 |             self.end_time = time.time() + self.duration
 73 |         self.total_rew_avg = 0.0
 74 |         self.n_episodes = 0
 75 |         while self.duration is None or time.time() < self.end_time:
 76 |             if len(self.policies) == 1:
 77 |                 action, _ = self.policies[0].act(self.ob)
 78 |             else:
 79 |                 self.ob = splitobs(self.ob, keepdims=False)
 80 |                 ob_policy_idx = np.split(np.arange(len(self.ob)), len(self.policies))
 81 |                 actions = []
 82 |                 for i, policy in enumerate(self.policies):
 83 |                     inp = itemgetter(*ob_policy_idx[i])(self.ob)
 84 |                     inp = listdict2dictnp([inp] if ob_policy_idx[i].shape[0] == 1 else inp)
 85 |                     ac, info = policy.act(inp)
 86 |                     actions.append(ac)
 87 |                 action = listdict2dictnp(actions, keepdims=True)
 88 | 
 89 |             self.ob, rew, done, env_info = self.env.step(action)
 90 |             self.total_rew += rew
 91 | 
 92 |             if done or env_info.get('discard_episode', False):
 93 |                 self.reset_increment()
 94 | 
 95 |             if self.display_window:
 96 |                 self.add_overlay(const.GRID_TOPRIGHT, "Reset env; (current seed: {})".format(self.seed), "N - next / P - previous ")
 97 |                 self.add_overlay(const.GRID_TOPRIGHT, "Reward", str(self.total_rew))
 98 |                 if hasattr(self.env.unwrapped, "viewer_stats"):
 99 |                     for k, v in self.env.unwrapped.viewer_stats.items():
100 |                         self.add_overlay(const.GRID_TOPRIGHT, k, str(v))
101 | 
102 |                 self.env.render()
103 | 
104 |     def reset_increment(self):
105 |         self.total_rew_avg = (self.n_episodes * self.total_rew_avg + self.total_rew) / (self.n_episodes + 1)
106 |         self.n_episodes += 1
107 |         print(f"Reward: {self.total_rew} (rolling average: {self.total_rew_avg})")
108 |         self.total_rew = 0.0
109 |         self.seed += 1
110 |         self.env.seed(self.seed)
111 |         self.ob = self.env.reset()
112 |         for policy in self.policies:
113 |             policy.reset()
114 |         if hasattr(self.env, "reset_goal"):
115 |             self.goal = self.env.reset_goal()
116 |         self.update_sim(self.env.unwrapped.sim)
117 | 


--------------------------------------------------------------------------------
/mae_envs/wrappers/food.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | from mae_envs.wrappers.util import update_obs_space
  4 | from mujoco_worldgen.util.types import store_args
  5 | from gym.spaces import Tuple, MultiDiscrete
  6 | 
  7 | 
  8 | class FoodHealthWrapper(gym.Wrapper):
  9 |     '''
 10 |         Adds food health to underlying env.
 11 |         Manages food levels.
 12 | 
 13 |         Args:
 14 |             eat_thresh (float): radius within which food items can be eaten
 15 |             max_food_health (int): number of times a food item can be eaten
 16 |                                    before it disappears
 17 |             respawn_time (int): Number of time steps after which food items
 18 |                                 that have been eaten reappear
 19 |             food_rew_type (string): can be
 20 |                 'selfish': each agent gets an inividual reward for the food they eat
 21 |                 'joint_mean': food rewards are averaged over teams
 22 |             reward_scale (float or (float, float)): scales the reward by this amount. If tuple of
 23 |                 floats, the exact reward scaling is uniformly sampled from
 24 |                 (reward_scale[0], reward_scale[1]) at the beginning of every episode.
 25 |             reward_scale_obs (bool): If true, adds the reward scale for the current
 26 |                 episode to food_obs
 27 |     '''
 28 |     @store_args
 29 |     def __init__(self, env, eat_thresh=0.5, max_food_health=10, respawn_time=np.inf,
 30 |                  food_rew_type='selfish', reward_scale=1.0, reward_scale_obs=False,
 31 |                  split_eat_between_agents=False):
 32 |         super().__init__(env)
 33 |         self.n_agents = self.metadata['n_agents']
 34 | 
 35 |         if type(reward_scale) not in [list, tuple, np.ndarray]:
 36 |             self.reward_scale = [reward_scale, reward_scale]
 37 | 
 38 |         # Reset obs/action space to match
 39 |         self.max_n_food = self.metadata['max_n_food']
 40 |         self.curr_n_food = self.metadata['curr_n_food']
 41 |         self.max_food_size = self.metadata['food_size']
 42 |         food_dim = 5 if self.reward_scale_obs else 4
 43 |         self.observation_space = update_obs_space(self.env, {'food_obs': (self.max_n_food, food_dim),
 44 |                                                              'food_health': (self.max_n_food, 1),
 45 |                                                              'food_eat': (self.max_n_food, 1)})
 46 |         self.action_space.spaces['action_eat_food'] = Tuple([MultiDiscrete([2] * self.max_n_food)
 47 |                                                              for _ in range(self.n_agents)])
 48 | 
 49 |     def reset(self):
 50 |         obs = self.env.reset()
 51 |         sim = self.unwrapped.sim
 52 | 
 53 |         # Reset obs/action space to match
 54 |         self.curr_n_food = self.metadata['curr_n_food']
 55 | 
 56 |         self.food_site_ids = np.array([sim.model.site_name2id(f'food{i}')
 57 |                                        for i in range(self.curr_n_food)])
 58 |         # Reset food healths
 59 |         self.food_healths = np.ones((self.curr_n_food, 1)) * self.max_food_health
 60 |         self.eat_per_food = np.zeros((self.curr_n_food, 1))
 61 | 
 62 |         # Reset food size
 63 |         self.respawn_counters = np.zeros((self.curr_n_food,))
 64 | 
 65 |         self.curr_reward_scale = np.random.uniform(self.reward_scale[0], self.reward_scale[1])
 66 | 
 67 |         return self.observation(obs)
 68 | 
 69 |     def observation(self, obs):
 70 |         # Add food position and healths to obersvations
 71 |         food_pos = obs['food_pos']
 72 |         obs['food_health'] = self.food_healths
 73 |         obs['food_obs'] = np.concatenate([food_pos, self.food_healths], 1)
 74 |         if self.reward_scale_obs:
 75 |             obs['food_obs'] = np.concatenate([obs['food_obs'], np.ones((self.curr_n_food, 1)) * self.curr_reward_scale], 1)
 76 |         obs['food_eat'] = self.eat_per_food
 77 |         return obs
 78 | 
 79 |     def step(self, action):
 80 |         action_eat_food = action.pop('action_eat_food')
 81 |         obs, rew, done, info = self.env.step(action)
 82 | 
 83 |         if self.curr_n_food > 0:
 84 |             # Eat food that is close enough
 85 |             dist_to_food = np.linalg.norm(obs['agent_pos'][:, None] - obs['food_pos'][None], axis=-1)
 86 |             eat = np.logical_and(dist_to_food < self.eat_thresh, self.food_healths.T > 0)
 87 |             eat = np.logical_and(eat, action_eat_food).astype(np.float32)
 88 |             if self.split_eat_between_agents:
 89 |                 eat_per_food = np.sum(eat, 0)
 90 |                 eat[:, eat_per_food > 0] /= eat_per_food[eat_per_food > 0]
 91 |             eat_per_food = np.sum(eat, 0)
 92 | 
 93 |             # Make sure that all agents can't have the last bite of food.
 94 |             # At that point, food is split evenly
 95 |             over_eat = self.food_healths[:, 0] < eat_per_food
 96 |             eat[:, over_eat] *= (self.food_healths[over_eat, 0] / eat_per_food[over_eat])
 97 |             eat_per_food = np.sum(eat, 0)
 98 |             self.eat_per_food = eat_per_food[:, None]
 99 | 
100 |             # Update food healths and sizes
101 |             self.food_healths -= eat_per_food[:, None]
102 |             health_diff = eat_per_food[:, None]
103 |             size_diff = health_diff * (self.max_food_size / self.max_food_health)
104 |             size = self.unwrapped.sim.model.site_size[self.food_site_ids] - size_diff
105 |             size = np.maximum(0, size)
106 |             self.unwrapped.sim.model.site_size[self.food_site_ids] = size
107 | 
108 |             self.food_healths[self.respawn_counters == self.respawn_time] = self.max_food_health
109 |             self.unwrapped.sim.model.site_size[self.food_site_ids[self.respawn_counters == self.respawn_time]] = self.max_food_size
110 |             self.respawn_counters[self.food_healths[:, 0] == 0] += 1
111 |             self.respawn_counters[self.food_healths[:, 0] != 0] = 0
112 | 
113 |             assert np.all(self.food_healths >= 0), \
114 |                 f"There is a food health below 0: {self.food_healths}"
115 | 
116 |             # calculate food reward
117 |             if self.food_rew_type == 'selfish':
118 |                 food_rew = np.sum(eat, axis=1)
119 |             elif self.food_rew_type == 'joint_mean':
120 |                 food_rew = np.sum(eat, axis=1)
121 |                 team_index = self.metadata['team_index']
122 |                 for team_index_number in np.unique(team_index):
123 |                     food_rew[team_index == team_index_number] = np.mean(food_rew[team_index == team_index_number])
124 |             else:
125 |                 raise ValueError(f"Food reward type {self.food_rew_type} unknown.")
126 |         else:
127 |             food_rew = 0.0
128 | 
129 |         info['agents_eat'] = eat
130 |         rew += food_rew * self.curr_reward_scale
131 |         return self.observation(obs), rew, done, info
132 | 
133 | 
134 | class AlwaysEatWrapper(gym.ActionWrapper):
135 |     '''
136 |         Remove eat action and replace it with always eating.
137 |         Args:
138 |             agent_idx_allowed (ndarray): indicies of agents allowed to eat.
139 |     '''
140 |     def __init__(self, env, agent_idx_allowed):
141 |         super().__init__(env)
142 |         self.action_space.spaces.pop('action_eat_food')
143 |         self.agent_idx_allowed = agent_idx_allowed
144 | 
145 |     def action(self, action):
146 |         action['action_eat_food'] = np.zeros((self.metadata['n_agents'], self.metadata['curr_n_food']))
147 |         action['action_eat_food'][self.agent_idx_allowed] = 1.
148 |         return action
149 | 


--------------------------------------------------------------------------------
/mae_envs/wrappers/lidar.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | from mujoco_worldgen.util.rotation import quat_from_angle_and_axis
  4 | from mujoco_worldgen.util.geometry import raycast
  5 | from mae_envs.wrappers.util import update_obs_space
  6 | 
  7 | 
  8 | class Lidar(gym.ObservationWrapper):
  9 |     '''
 10 |         Creates LIDAR-type observations based on Mujoco raycast
 11 | 
 12 |         Args:
 13 |             n_lidar_per_agent (int): Number of concentric lidar rays per agent
 14 |             lidar_range (float): Maximum range of lidar
 15 |             compress_lidar_scale (float): Scale for non-linear compression of
 16 |                 lidar range
 17 |             visualize_lidar (bool): If true, visualize lidar using thin cylinders
 18 |                 representing lidar rays (requires environment to create corresponding
 19 |                 sites)
 20 |     '''
 21 |     def __init__(self, env, n_lidar_per_agent=30, lidar_range=6.0,
 22 |                  compress_lidar_scale=None, visualize_lidar=False):
 23 |         super().__init__(env)
 24 |         self.n_lidar_per_agent = n_lidar_per_agent
 25 |         self.lidar_range = lidar_range
 26 |         self.compress_lidar_scale = compress_lidar_scale
 27 |         self.visualize_lidar = visualize_lidar
 28 |         self.n_agents = self.unwrapped.n_agents
 29 | 
 30 |         self.observation_space = update_obs_space(
 31 |             env, {'lidar': (self.n_agents, self.n_lidar_per_agent, 1)})
 32 | 
 33 |         # generate concentric lidar rays centered at origin
 34 |         self.lidar_angles = np.linspace(0, 2*np.pi, num=self.n_lidar_per_agent, endpoint=False)
 35 |         self.lidar_rays = self.lidar_range * np.array([np.cos(self.lidar_angles),
 36 |                                                        np.sin(self.lidar_angles),
 37 |                                                        np.zeros_like(self.lidar_angles)]).T
 38 |         self.lidar_rays = self.lidar_rays[None, :]
 39 | 
 40 |     def reset(self):
 41 |         obs = self.env.reset()
 42 | 
 43 |         sim = self.unwrapped.sim
 44 | 
 45 |         # Cache ids
 46 |         self.agent_body_ids = np.array([sim.model.body_name2id(f"agent{i}:particle")
 47 |                                         for i in range(self.n_agents)])
 48 |         self.agent_geom_ids = np.array([sim.model.geom_name2id(f'agent{i}:agent')
 49 |                                         for i in range(self.n_agents)])
 50 | 
 51 |         if self.visualize_lidar:
 52 |             self.lidar_ids = np.array([[sim.model.site_name2id(f"agent{i}:lidar{j}")
 53 |                                         for j in range(self.n_lidar_per_agent)]
 54 |                                        for i in range(self.n_agents)])
 55 | 
 56 |         return self.observation(obs)
 57 | 
 58 |     def place_lidar_ray_markers(self, agent_pos, lidar_endpoints):
 59 |         sim = self.unwrapped.sim
 60 | 
 61 |         site_offset = sim.data.site_xpos[self.lidar_ids, :] - sim.model.site_pos[self.lidar_ids, :]
 62 | 
 63 |         # compute location of lidar rays
 64 |         sim.model.site_pos[self.lidar_ids, :] = .5 * (agent_pos[:, None, :] + lidar_endpoints) - site_offset
 65 | 
 66 |         # compute length of lidar rays
 67 |         rel_vec = lidar_endpoints - agent_pos[:, None, :]
 68 |         rel_vec_length = np.linalg.norm(rel_vec, axis=-1)
 69 |         sim.model.site_size[self.lidar_ids, 1] = rel_vec_length / 2
 70 | 
 71 |         # compute rotation of lidar rays
 72 |         # normalize relative vector
 73 |         rel_vec_norm = rel_vec / rel_vec_length[:, :, None]
 74 |         # set small relative vectors to zero instead
 75 |         rel_vec_norm[rel_vec_length <= 1e-8, :] = 0.0
 76 |         # start vector
 77 |         start_vec = np.array([0.0, 0.0, 1.0])
 78 |         # calculate rotation axis: cross product between start and goal vector
 79 |         rot_axis = np.cross(start_vec, rel_vec_norm)
 80 |         norm_rot_axis = np.linalg.norm(rot_axis, axis=-1)
 81 | 
 82 |         # calculate rotation angle and quaternion
 83 |         rot_angle = np.arctan2(norm_rot_axis, np.dot(rel_vec_norm, start_vec))
 84 |         quat = quat_from_angle_and_axis(rot_angle, rot_axis)
 85 | 
 86 |         # if norm of cross product is very small, set rotation to identity
 87 |         eps = 1e-3
 88 |         quat[norm_rot_axis <= eps, :] = np.array([1.0, 0.0, 0.0, 0.0])
 89 | 
 90 |         sim.model.site_quat[self.lidar_ids, :] = quat
 91 | 
 92 |     def observation(self, obs):
 93 |         sim = self.unwrapped.sim
 94 |         agent_pos = sim.data.body_xpos[self.agent_body_ids]
 95 | 
 96 |         lidar_endpoints = agent_pos[:, None, :] + self.lidar_rays
 97 | 
 98 |         # Would be nice to vectorize in the future with better mujoco-py interface
 99 |         lidar = np.zeros((self.n_agents, self.n_lidar_per_agent))
100 |         for i in range(self.n_agents):
101 |             for j in range(self.n_lidar_per_agent):
102 |                 lidar[i, j] = raycast(sim, geom1_id=self.agent_geom_ids[i],
103 |                                       pt2=lidar_endpoints[i, j], geom_group=None)[0]
104 | 
105 |         lidar[lidar < 0.0] = self.lidar_range
106 | 
107 |         if self.compress_lidar_scale is not None:
108 |             obs['lidar'] = (self.compress_lidar_scale *
109 |                             np.tanh(lidar[..., None] / self.compress_lidar_scale))
110 |         else:
111 |             obs['lidar'] = lidar[..., None]
112 | 
113 |         if self.visualize_lidar:
114 |             # recalculate lidar endpoints
115 |             lidar_endpoints = agent_pos[:, None, :] + \
116 |                     lidar[:, :, None] / self.lidar_range * self.lidar_rays
117 |             self.place_lidar_ray_markers(agent_pos, lidar_endpoints)
118 |             sim.model.site_rgba[self.lidar_ids, :] = np.array([0.0, 0.0, 1.0, 0.2])
119 |             sim.forward()
120 | 
121 |         return obs
122 | 


--------------------------------------------------------------------------------
/mae_envs/wrappers/limit_mvmnt.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | 
 4 | 
 5 | class RestrictAgentsRect(gym.RewardWrapper):
 6 |     '''
 7 |         Give subset of agents negative reward if they leave a given area
 8 |         Args:
 9 |             restrict_rect (list of four floats): coordinates of rectangle
10 |                 defined as [x_min, y_min, x_max, y_max]
11 |             reward_scale (float): reward for going out of bounds is -reward_scale at each
12 |                 timestep
13 |             penalize_objects_out (bool): If true, penalizes all agents whenever an object is
14 |                 outside the specified area.
15 |     '''
16 |     def __init__(self, env, restrict_rect, reward_scale=10., penalize_objects_out=False):
17 |         super().__init__(env)
18 |         self.n_agents = self.unwrapped.n_agents
19 |         self.restrict_rect = np.array(restrict_rect)
20 |         self.reward_scale = reward_scale
21 |         self.penalize_objects_out = penalize_objects_out
22 | 
23 |         assert len(self.restrict_rect) == 4, \
24 |             "Restriction rectangle must be of format [x_min, y_min, x_max, y_max]"
25 | 
26 |         self.rect_middle = 0.5 * np.array([restrict_rect[0] + restrict_rect[2],
27 |                                            restrict_rect[1] + restrict_rect[3]])
28 | 
29 |         self.rect_size = np.array([restrict_rect[2] - restrict_rect[0],
30 |                                    restrict_rect[3] - restrict_rect[1]])
31 | 
32 |     def reset(self):
33 |         obs = self.env.reset()
34 |         sim = self.unwrapped.sim
35 |         self.agent_body_idxs = np.array([sim.model.body_name2id(f"agent{i}:particle")
36 |                                          for i in range(self.n_agents)])
37 |         if self.penalize_objects_out:
38 |             obj_body_idxs = ([sim.model.body_name2id(f'moveable_box{i}') for i in np.where(self.metadata['curr_n_boxes'])[0]] +
39 |                              [sim.model.body_name2id(f'ramp{i}:ramp') for i in np.where(self.metadata['curr_n_ramps'])[0]])
40 |             self.obj_body_idxs = np.array(obj_body_idxs)
41 | 
42 |         return obs
43 | 
44 |     def reward(self, reward):
45 |         sim = self.unwrapped.sim
46 |         agent_pos = sim.data.body_xpos[self.agent_body_idxs, :2]
47 |         outside_rect = np.any(np.abs(agent_pos - self.rect_middle) > (self.rect_size / 2), axis=1)
48 |         if self.penalize_objects_out:
49 |             obj_pos = sim.data.body_xpos[self.obj_body_idxs, :2]
50 |             any_obj_outside_rect = np.any(np.abs(obj_pos - self.rect_middle) > (self.rect_size / 2))
51 |             if any_obj_outside_rect:
52 |                 reward[:] = - self.reward_scale
53 |         reward[outside_rect] = - self.reward_scale
54 | 
55 |         return reward
56 | 


--------------------------------------------------------------------------------
/mae_envs/wrappers/line_of_sight.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | from mae_envs.util.vision import insight, in_cone2d
  4 | from mae_envs.wrappers.util import update_obs_space
  5 | 
  6 | 
  7 | class AgentAgentObsMask2D(gym.ObservationWrapper):
  8 |     """ Adds an mask observation that states which agents are visible to which agents.
  9 |         Args:
 10 |             cone_angle: (float) the angle in radians btw the axis and edge of the observation cone
 11 |     """
 12 |     def __init__(self, env, cone_angle=3/8 * np.pi):
 13 |         super().__init__(env)
 14 |         self.cone_angle = cone_angle
 15 |         self.n_agents = self.unwrapped.n_agents
 16 |         self.observation_space = update_obs_space(env, {'mask_aa_obs': (self.n_agents, self.n_agents)})
 17 | 
 18 |     def observation(self, obs):
 19 |         # Agent to agent obs mask
 20 |         agent_pos2d = obs['agent_pos'][:, :-1]
 21 |         agent_angle = obs['agent_angle']
 22 |         cone_mask = in_cone2d(agent_pos2d, np.squeeze(agent_angle, -1), self.cone_angle, agent_pos2d)
 23 |         # Make sure they are in line of sight
 24 |         for i, j in np.argwhere(cone_mask):
 25 |             if i != j:
 26 |                 cone_mask[i, j] = insight(self.unwrapped.sim,
 27 |                                           self.metadata['agent_geom_idxs'][i],
 28 |                                           self.metadata['agent_geom_idxs'][j])
 29 |         obs['mask_aa_obs'] = cone_mask
 30 |         return obs
 31 | 
 32 | 
 33 | class AgentSiteObsMask2D(gym.ObservationWrapper):
 34 |     """ Adds an mask observation that states which sites are visible to which agents.
 35 |         Args:
 36 |             pos_obs_key: (string) the name of the site position observation of shape (n_sites, 3)
 37 |             mask_obs_key: (string) the name of the mask observation to output
 38 |             cone_angle: (float) the angle in radians btw the axis and edge of the observation cone
 39 |     """
 40 |     def __init__(self, env, pos_obs_key, mask_obs_key, cone_angle=3/8 * np.pi):
 41 |         super().__init__(env)
 42 |         self.cone_angle = cone_angle
 43 |         self.n_agents = self.unwrapped.n_agents
 44 |         assert(self.n_agents == self.observation_space.spaces['agent_pos'].shape[0])
 45 |         self.n_objects = self.observation_space.spaces[pos_obs_key].shape[0]
 46 |         self.observation_space = update_obs_space(env, {mask_obs_key: (self.n_agents, self.n_objects)})
 47 |         self.pos_obs_key = pos_obs_key
 48 |         self.mask_obs_key = mask_obs_key
 49 | 
 50 |     def observation(self, obs):
 51 |         agent_pos2d = obs['agent_pos'][:, :-1]
 52 |         agent_angle = obs['agent_angle']
 53 |         pos2d = obs[self.pos_obs_key][:, :2]
 54 |         cone_mask = in_cone2d(agent_pos2d, np.squeeze(agent_angle, -1), self.cone_angle, pos2d)
 55 |         # Make sure they are in line of sight
 56 |         for i, j in np.argwhere(cone_mask):
 57 |             agent_geom_id = self.metadata['agent_geom_idxs'][i]
 58 |             pt2 = obs[self.pos_obs_key][j]
 59 |             cone_mask[i, j] = insight(self.unwrapped.sim, agent_geom_id, pt2=pt2)
 60 |         obs[self.mask_obs_key] = cone_mask
 61 |         return obs
 62 | 
 63 | 
 64 | class AgentGeomObsMask2D(gym.ObservationWrapper):
 65 |     """ Adds an mask observation that states which geoms are visible to which agents.
 66 |         Args:
 67 |             pos_obs_key: (string) the name of the site position observation of shape (n_geoms, 3)
 68 |             geom_idxs_obs_key: (string) the name of an observation that, for each object to be
 69 |                                 masked, gives the Mujoco index of the geom (e.g. in sim.geom_names)
 70 |                                 as an array of shape (n_geoms, 1)
 71 |             mask_obs_key: (string) the name of the mask observation to output
 72 |             cone_angle: (float) the angle in radians btw the axis and edge of the observation cone
 73 |     """
 74 |     def __init__(self, env, pos_obs_key, geom_idxs_obs_key, mask_obs_key, cone_angle=3/8 * np.pi):
 75 |         super().__init__(env)
 76 |         self.cone_angle = cone_angle
 77 |         self.n_agents = self.unwrapped.n_agents
 78 |         assert(self.n_agents == self.observation_space.spaces['agent_pos'].shape[0])
 79 |         self.n_objects = self.observation_space.spaces[pos_obs_key].shape[0]
 80 |         self.observation_space = update_obs_space(env, {mask_obs_key: (self.n_agents, self.n_objects)})
 81 |         self.pos_obs_key = pos_obs_key
 82 |         self.mask_obs_key = mask_obs_key
 83 |         self.geom_idxs_obs_key = geom_idxs_obs_key
 84 | 
 85 |     def observation(self, obs):
 86 |         agent_pos2d = obs['agent_pos'][:, :-1]
 87 |         agent_angle = obs['agent_angle']
 88 |         pos2d = obs[self.pos_obs_key][:, :2]
 89 |         cone_mask = in_cone2d(agent_pos2d, np.squeeze(agent_angle, -1), self.cone_angle, pos2d)
 90 |         # Make sure they are in line of sight
 91 |         for i, j in np.argwhere(cone_mask):
 92 |             agent_geom_id = self.metadata['agent_geom_idxs'][i]
 93 |             geom_id = obs[self.geom_idxs_obs_key][j, 0]
 94 |             if geom_id == -1:
 95 |                 # This option is helpful if the number of geoms varies between episodes
 96 |                 # If geoms don't exists this wrapper expects that the geom idx is
 97 |                 # set to -1
 98 |                 cone_mask[i, j] = 0
 99 |             else:
100 |                 cone_mask[i, j] = insight(self.unwrapped.sim, agent_geom_id, geom2_id=geom_id)
101 |         obs[self.mask_obs_key] = cone_mask
102 |         return obs
103 | 


--------------------------------------------------------------------------------
/mae_envs/wrappers/multi_agent.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | from scipy.linalg import circulant
  4 | from gym.spaces import Tuple, Box, Dict
  5 | from copy import deepcopy
  6 | 
  7 | 
  8 | class SplitMultiAgentActions(gym.ActionWrapper):
  9 |     '''
 10 |         Splits mujoco generated actions into a dict of tuple actions.
 11 |     '''
 12 |     def __init__(self, env):
 13 |         super().__init__(env)
 14 |         self.n_agents = self.metadata['n_actors']
 15 |         lows = np.split(self.action_space.low, self.n_agents)
 16 |         highs = np.split(self.action_space.high, self.n_agents)
 17 |         self.action_space = Dict({
 18 |             'action_movement': Tuple([Box(low=low, high=high, dtype=self.action_space.dtype)
 19 |                                       for low, high in zip(lows, highs)])
 20 |         })
 21 | 
 22 |     def action(self, action):
 23 |         return action['action_movement'].flatten()
 24 | 
 25 | 
 26 | class JoinMultiAgentActions(gym.ActionWrapper):
 27 |     def __init__(self, env):
 28 |         super().__init__(env)
 29 |         self.n_agents = self.metadata['n_actors']
 30 |         low = np.concatenate([space.low for space in self.action_space.spaces])
 31 |         high = np.concatenate([space.high for space in self.action_space.spaces])
 32 |         self.action_space = Box(low=low, high=high, dtype=self.action_space.spaces[0].dtype)
 33 | 
 34 |     def action(self, action):
 35 |         # action should be a tuple of different agent actions
 36 |         return np.split(action, self.n_agents)
 37 | 
 38 | 
 39 | class SplitObservations(gym.ObservationWrapper):
 40 |     """
 41 |         Split observations for each agent.
 42 |         Args:
 43 |             keys_self: list of observation names which are agent specific. E.g. this will
 44 |                     permute qpos such that each agent sees its own qpos as the first numbers
 45 |             keys_copy: list of observation names that are just passed down as is
 46 |             keys_self_matrices: list of observation names that should be (n_agent, n_agent, dim) where
 47 |                 each agent has a custom observation of another agent. This is different from self_keys
 48 |                 in that self_keys we assume that observations are symmetric, whereas these can represent
 49 |                 unique pairwise interactions/observations
 50 |     """
 51 |     def __init__(self, env, keys_self, keys_copy=[], keys_self_matrices=[]):
 52 |         super().__init__(env)
 53 |         self.keys_self = sorted(keys_self)
 54 |         self.keys_copy = sorted(keys_copy)
 55 |         self.keys_self_matrices = sorted(keys_self_matrices)
 56 |         self.n_agents = self.metadata['n_agents']
 57 |         new_spaces = {}
 58 |         for k, v in self.observation_space.spaces.items():
 59 |             # If obs is a self obs, then we only want to include other agents obs,
 60 |             # as we will pass the self obs separately.
 61 |             assert len(v.shape) > 1, f'Obs {k} has shape {v.shape}'
 62 |             if 'mask' in k and k not in self.keys_self_matrices:
 63 |                 new_spaces[k] = v
 64 |             elif k in self.keys_self_matrices:
 65 |                 new_spaces[k] = Box(low=v.low[:, 1:], high=v.high[:, 1:], dtype=v.dtype)
 66 |             elif k in self.keys_self:
 67 |                 assert v.shape[0] == self.n_agents, \
 68 |                     f"For self obs, obs dim 0 should equal number of agents. {k} has shape {v.shape}"
 69 |                 obs_shape = (v.shape[0], self.n_agents - 1, v.shape[1])
 70 |                 lows = np.tile(v.low, self.n_agents - 1).reshape(obs_shape)
 71 |                 highs = np.tile(v.high, self.n_agents - 1).reshape(obs_shape)
 72 |                 new_spaces[k] = Box(low=lows, high=highs, dtype=v.dtype)
 73 |             elif k in self.keys_copy:
 74 |                 new_spaces[k] = deepcopy(v)
 75 |             else:
 76 |                 obs_shape = (v.shape[0], self.n_agents, v.shape[1])
 77 |                 lows = np.tile(v.low, self.n_agents).reshape(obs_shape).transpose((1, 0, 2))
 78 |                 highs = np.tile(v.high, self.n_agents).reshape(obs_shape).transpose((1, 0, 2))
 79 |                 new_spaces[k] = Box(low=lows, high=highs, dtype=v.dtype)
 80 | 
 81 |         for k in self.keys_self:
 82 |             new_spaces[k + '_self'] = self.observation_space.spaces[k]
 83 | 
 84 |         self.observation_space = Dict(new_spaces)
 85 | 
 86 |     def observation(self, obs):
 87 |         new_obs = {}
 88 |         for k, v in obs.items():
 89 |             # Masks that aren't self matrices should just be copied
 90 |             if 'mask' in k and k not in self.keys_self_matrices:
 91 |                 new_obs[k] = obs[k]
 92 |             # Circulant self matrices
 93 |             elif k in self.keys_self_matrices:
 94 |                 new_obs[k] = self._process_self_matrix(obs[k])
 95 |             # Circulant self keys
 96 |             elif k in self.keys_self:
 97 |                 new_obs[k + '_self'] = obs[k]
 98 |                 new_obs[k] = obs[k][circulant(np.arange(self.n_agents))]
 99 |                 new_obs[k] = new_obs[k][:, 1:, :]  # Remove self observation
100 |             elif k in self.keys_copy:
101 |                 new_obs[k] = obs[k]
102 |             # Everything else should just get copied for each agent (e.g. external obs)
103 |             else:
104 |                 new_obs[k] = np.tile(v, self.n_agents).reshape([v.shape[0], self.n_agents, v.shape[1]]).transpose((1, 0, 2))
105 | 
106 |         return new_obs
107 | 
108 |     def _process_self_matrix(self, self_matrix):
109 |         '''
110 |             self_matrix will be a (n_agent, n_agent) boolean matrix. Permute each row such that the matrix is consistent with
111 |                 the circulant permutation used for self observations. E.g. this should be used for agent agent masks
112 |         '''
113 |         assert np.all(self_matrix.shape[:2] == np.array((self.n_agents, self.n_agents))), \
114 |             f"The first two dimensions of {self_matrix} were not (n_agents, n_agents)"
115 | 
116 |         new_mat = self_matrix.copy()
117 |         # Permute each row to the right by one more than the previous
118 |         # E.g., [[1,2],[3,4]] -> [[1,2],[4,3]]
119 |         idx = circulant(np.arange(self.n_agents))
120 |         new_mat = new_mat[np.arange(self.n_agents)[:, None], idx]
121 |         new_mat = new_mat[:, 1:]  # Remove self observation
122 |         return new_mat
123 | 
124 | 
125 | class SelectKeysWrapper(gym.ObservationWrapper):
126 |     """
127 |         Select keys for final observations.
128 |         Expects that all observations come in shape (n_agents, n_objects, n_dims)
129 |         Args:
130 |             keys_self (list): observation names that are specific to an agent
131 |                 These will be concatenated into 'observation_self' observation
132 |             keys_other (list): observation names that should be passed through
133 |             flatten (bool): if true, internal and external observations
134 |     """
135 | 
136 |     def __init__(self, env, keys_self, keys_other, flatten=False):
137 |         super().__init__(env)
138 |         self.keys_self = sorted([k + '_self' for k in keys_self])
139 |         self.keys_other = sorted(keys_other)
140 |         self.flatten = flatten
141 | 
142 |         # Change observation space to look like a single agent observation space.
143 |         # This makes constructing policies much easier
144 |         if flatten:
145 |             size_self = sum([np.prod(self.env.observation_space.spaces[k].shape[1:])
146 |                              for k in self.keys_self + self.keys_other])
147 |             self.observation_space = Dict(
148 |                 {'observation_self': Box(-np.inf, np.inf, (size_self,), np.float32)})
149 |         else:
150 |             size_self = sum([self.env.observation_space.spaces[k].shape[1]
151 |                              for k in self.keys_self])
152 |             obs_self = {'observation_self': Box(-np.inf, np.inf, (size_self,), np.float32)}
153 |             obs_extern = {k: Box(-np.inf, np.inf, v.shape[1:], np.float32)
154 |                           for k, v in self.observation_space.spaces.items()
155 |                           if k in self.keys_other}
156 |             obs_self.update(obs_extern)
157 |             self.observation_space = Dict(obs_self)
158 | 
159 |     def observation(self, observation):
160 |         if self.flatten:
161 |             other_obs = [observation[k].reshape((observation[k].shape[0], -1))
162 |                          for k in self.keys_other]
163 |             obs = np.concatenate([observation[k] for k in self.keys_self] + other_obs, axis=-1)
164 |             return {'observation_self': obs}
165 |         else:
166 |             obs = np.concatenate([observation[k] for k in self.keys_self], -1)
167 |             obs = {'observation_self': obs}
168 |             other_obs = {k: v for k, v in observation.items() if k in self.keys_other}
169 |             obs.update(other_obs)
170 |             return obs
171 | 


--------------------------------------------------------------------------------
/mae_envs/wrappers/prep_phase.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | from copy import deepcopy
  4 | from mae_envs.wrappers.util import update_obs_space
  5 | 
  6 | 
  7 | class PreparationPhase(gym.Wrapper):
  8 |     '''
  9 |         Rewards are switched off during preparation.
 10 | 
 11 |         Args: prep_fraction (float): Fraction of total time that is preparation time
 12 |     '''
 13 |     def __init__(self, env, prep_fraction=.2):
 14 |         super().__init__(env)
 15 |         self.prep_fraction = prep_fraction
 16 |         self.prep_time = self.prep_fraction * self.unwrapped.horizon
 17 |         self.n_agents = self.metadata['n_agents']
 18 |         self.step_counter = 0
 19 |         self.observation_space = update_obs_space(self, {'prep_obs': [self.n_agents, 1]})
 20 | 
 21 |     def reset(self):
 22 |         self.step_counter = 0
 23 |         self.in_prep_phase = True
 24 |         return self.observation(self.env.reset())
 25 | 
 26 |     def reward(self, reward):
 27 |         if self.in_prep_phase:
 28 |             reward = np.zeros_like(reward)
 29 | 
 30 |         return reward
 31 | 
 32 |     def observation(self, obs):
 33 |         obs['prep_obs'] = (np.ones((self.n_agents, 1)) *
 34 |                            np.minimum(1.0, self.step_counter / (self.prep_time + 1e-5)))
 35 | 
 36 |         return obs
 37 | 
 38 |     def step(self, action):
 39 |         obs, rew, done, info = self.env.step(action)
 40 |         rew = self.reward(rew)
 41 |         self.step_counter += 1
 42 |         self.in_prep_phase = self.step_counter < self.prep_time
 43 |         info['in_prep_phase'] = self.in_prep_phase
 44 | 
 45 |         return self.observation(obs), rew, done, info
 46 | 
 47 | 
 48 | class NoActionsInPrepPhase(gym.Wrapper):
 49 |     '''Agents have all actions turned off during preparation phase.
 50 |         For MultiDiscrete and Discrete, assumes zero action is the rounded down middle action'''
 51 | 
 52 |     def __init__(self, env, agent_idxs):
 53 |         super().__init__(env)
 54 |         self.agent_idxs = np.array(agent_idxs)
 55 | 
 56 |     def reset(self):
 57 |         obs = self.env.reset()
 58 |         self.in_prep_phase = True
 59 |         return obs
 60 | 
 61 |     def step(self, action):
 62 |         obs, rew, done, info = self.env.step(self.action(action))
 63 |         self.in_prep_phase = info['in_prep_phase']
 64 |         return obs, rew, done, info
 65 | 
 66 |     def action(self, action):
 67 |         ac = deepcopy(action)
 68 |         if self.in_prep_phase:
 69 |             for k, space in self.action_space.spaces.items():
 70 |                 _space = space.spaces[0]
 71 |                 if isinstance(_space, gym.spaces.MultiDiscrete):
 72 |                     zero_ac = (_space.nvec - 1) // 2
 73 |                 elif isinstance(_space, gym.spaces.Discrete):
 74 |                     zero_ac = (_space.n - 1) // 2
 75 |                 else:
 76 |                     zero_ac = 0.0
 77 |                 ac[k][self.agent_idxs] = zero_ac
 78 | 
 79 |         return ac
 80 | 
 81 | 
 82 | class MaskPrepPhaseAction(gym.Wrapper):
 83 |     '''
 84 |         Masks a (binary) action during preparation phase
 85 |     '''
 86 |     def __init__(self, env, action_key):
 87 |         super().__init__(env)
 88 |         self.action_key = action_key
 89 | 
 90 |     def reset(self):
 91 |         obs = self.env.reset()
 92 |         self.in_prep_phase = True
 93 |         return obs
 94 | 
 95 |     def step(self, action):
 96 |         action[self.action_key] = (action[self.action_key] * (1 - self.in_prep_phase)).astype(bool)
 97 | 
 98 |         obs, rew, done, info = self.env.step(action)
 99 |         self.in_prep_phase = info['in_prep_phase']
100 | 
101 |         return obs, rew, done, info
102 | 


--------------------------------------------------------------------------------
/mae_envs/wrappers/team.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | from mae_envs.wrappers.util import update_obs_space
 4 | 
 5 | 
 6 | class TeamMembership(gym.ObservationWrapper):
 7 |     '''
 8 |         This wrapper just stores team membership information at initialization.
 9 |         The information is stored as a key in the self.metadata property, which ensures
10 |         that it is available even if this wrapper is not on top of the wrapper
11 |         hierarchy.
12 | 
13 |         Arguments:
14 |             team_index: list/numpy vector of team membership index
15 |                         length must be equal to number of agents
16 |                         e.g. [0,0,0,1,1,1] means first 3 agents are in team 0,
17 |                         second 3 agents in team 1
18 |             n_teams: if team_index is None, agents are split in n_teams number
19 |                      of teams, with as equal team sizes as possible.
20 |                      if team_index is set, this argument is ignored
21 | 
22 |         One planned use of this wrapper is to evaluate the "TrueSkill" score
23 |         during training, which requires knowing which agent belongs to which team
24 | 
25 |         Note: This wrapper currently does not align the reward structure with the
26 |               teams, but that could be easily implemented if desired.
27 |     '''
28 |     def __init__(self, env, team_index=None, n_teams=2):
29 |         super().__init__(env)
30 |         self.n_agents = self.metadata['n_actors']
31 | 
32 |         if team_index is None:
33 |             assert n_teams >= 1, "Number of teams must be at least 1"
34 |             # split teams: 5 agents and 3 teams will result in team_index = [0,0,1,1,2]
35 |             team_index = np.array_split(np.arange(self.n_agents), n_teams)
36 |             team_index = np.concatenate([np.ones_like(ar) * i for i, ar in enumerate(team_index)])
37 | 
38 |         assert len(team_index) == self.n_agents, (
39 |             "team_index parameter length must be equal to number of agents")
40 |         if isinstance(team_index, np.ndarray):
41 |             assert team_index.ndim == 1, (
42 |                 "team_index parameter must be numpy array of dimension 1")
43 | 
44 |         # store in metadata property that gets automatically inherited
45 |         # make sure we copy value of team_index if it's a numpy array
46 |         self.metadata['team_index'] = np.array(team_index)
47 |         self.team_idx = np.array(team_index)
48 |         self.observation_space = update_obs_space(env, {'team_size': (self.n_agents, 1)})
49 | 
50 |     def observation(self, obs):
51 |         obs['team_size'] = np.sum(self.team_idx[:, None] == self.team_idx[None, :],
52 |                                   axis=1, keepdims=True)
53 |         return obs
54 | 


--------------------------------------------------------------------------------
/mae_envs/wrappers/util.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | from mujoco_py import MujocoException
  3 | from gym.spaces import Dict, Box
  4 | import numpy as np
  5 | from copy import deepcopy
  6 | import logging
  7 | 
  8 | 
  9 | def update_obs_space(env, delta):
 10 |     spaces = env.observation_space.spaces.copy()
 11 |     for key, shape in delta.items():
 12 |         spaces[key] = Box(-np.inf, np.inf, shape, np.float32)
 13 |     return Dict(spaces)
 14 | 
 15 | 
 16 | class NumpyArrayRewardWrapper(gym.RewardWrapper):
 17 |     """
 18 |         Convenience wrapper that casts rewards to the multiagent format
 19 |         (numpy array of shape (n_agents,))
 20 |     """
 21 |     def __init__(self, env):
 22 |         super().__init__(env)
 23 | 
 24 |     def reward(self, rew):
 25 |         return np.zeros((self.unwrapped.n_agents,)) + rew
 26 | 
 27 | 
 28 | class DiscretizeActionWrapper(gym.ActionWrapper):
 29 |     '''
 30 |         Take a Box action and convert it to a MultiDiscrete Action through quantization
 31 |         Args:
 32 |             action_key: (string) action to discretize
 33 |             nbuckets: (int) number of discrete actions per dimension. It should be odd such
 34 |                         that actions centered around 0 will have the middle action be 0.
 35 |     '''
 36 |     def __init__(self, env, action_key, nbuckets=11):
 37 |         super().__init__(env)
 38 |         self.action_key = action_key
 39 |         self.discrete_to_continuous_act_map = []
 40 |         for i, ac_space in enumerate(self.action_space.spaces[action_key].spaces):
 41 |             assert isinstance(ac_space, Box)
 42 |             action_map = np.array([np.linspace(low, high, nbuckets)
 43 |                                    for low, high in zip(ac_space.low, ac_space.high)])
 44 |             _nbuckets = np.ones((len(action_map))) * nbuckets
 45 |             self.action_space.spaces[action_key].spaces[i] = gym.spaces.MultiDiscrete(_nbuckets)
 46 |             self.discrete_to_continuous_act_map.append(action_map)
 47 |         self.discrete_to_continuous_act_map = np.array(self.discrete_to_continuous_act_map)
 48 | 
 49 |     def action(self, action):
 50 |         action = deepcopy(action)
 51 |         ac = action[self.action_key]
 52 | 
 53 |         # helper variables for indexing the discrete-to-continuous action map
 54 |         agent_idxs = np.tile(np.arange(ac.shape[0])[:, None], ac.shape[1])
 55 |         ac_idxs = np.tile(np.arange(ac.shape[1]), ac.shape[0]).reshape(ac.shape)
 56 | 
 57 |         action[self.action_key] = self.discrete_to_continuous_act_map[agent_idxs, ac_idxs, ac]
 58 |         return action
 59 | 
 60 | 
 61 | class DiscardMujocoExceptionEpisodes(gym.Wrapper):
 62 |     '''
 63 |         Catches Mujoco Exceptions. Sends signal to discard Episode.
 64 |     '''
 65 |     def __init__(self, env):
 66 |         super().__init__(env)
 67 |         self.episode_error = False
 68 | 
 69 |     def step(self, action):
 70 |         assert not self.episode_error, "Won't Continue Episode After Mujoco Exception -- \
 71 |             Please discard episode and reset. If info['discard_episode'] is True the episode\
 72 |             should be discarded"
 73 |         try:
 74 |             obs, rew, done, info = self.env.step(action)
 75 |             info['discard_episode'] = False
 76 |         except MujocoException as e:
 77 |             self.episode_error = True
 78 |             # Done is set to False such that rollout workers do not accidently send data in
 79 |             # the event that timelimit is up in the same step as an error occured.
 80 |             obs, rew, done, info = {}, 0.0, False, {'discard_episode': True}
 81 |             logging.info(str(e))
 82 |             logging.info("Encountered Mujoco Exception During Environment Step.\
 83 |                           Reset Episode Required")
 84 | 
 85 |         return obs, rew, done, info
 86 | 
 87 |     def reset(self):
 88 |         try:
 89 |             obs = self.env.reset()
 90 |         except MujocoException:
 91 |             logging.info("Encountered Mujoco Exception During Environment Reset.\
 92 |                           Trying Reset Again")
 93 |             obs = self.reset()
 94 |         self.episode_error = False
 95 |         return obs
 96 | 
 97 | 
 98 | class MaskActionWrapper(gym.Wrapper):
 99 |     '''
100 |         For a boolean action, sets it to zero given a mask from the previous step.
101 |             For example you could mask the grab action based on whether you can see the box
102 |         Args:
103 |             action_key (string): key in action dictionary to be masked
104 |             mask_keys (string): keys in observation dictionary with which to mask. The shape
105 |                 of the concatenation of the masks (along the 1st dimension) should exactly
106 |                 match that of action_key
107 |     '''
108 |     def __init__(self, env, action_key, mask_keys):
109 |         super().__init__(env)
110 |         self.action_key = action_key
111 |         self.mask_keys = mask_keys
112 | 
113 |     def reset(self):
114 |         self.prev_obs = self.env.reset()
115 |         return deepcopy(self.prev_obs)
116 | 
117 |     def step(self, action):
118 |         mask = np.concatenate([self.prev_obs[k] for k in self.mask_keys], -1)
119 |         action[self.action_key] = np.logical_and(action[self.action_key], mask)
120 |         self.prev_obs, rew, done, info = self.env.step(action)
121 |         return deepcopy(self.prev_obs), rew, done, info
122 | 
123 | 
124 | class AddConstantObservationsWrapper(gym.ObservationWrapper):
125 |     '''
126 |         Adds new constant observations to the environment.
127 |         Args:
128 |             new_obs: Dictionary with the new observations.
129 |     '''
130 |     def __init__(self, env, new_obs):
131 |         super().__init__(env)
132 |         self.new_obs = new_obs
133 |         for obs_key in self.new_obs:
134 |             assert obs_key not in self.observation_space.spaces, (
135 |                 f'Observation key {obs_key} exists in original observation space')
136 |             if type(self.new_obs[obs_key]) in [list, tuple]:
137 |                 self.new_obs[obs_key] = np.array(self.new_obs[obs_key])
138 |             shape = self.new_obs[obs_key].shape
139 |             self.observation_space = update_obs_space(self, {obs_key: shape})
140 | 
141 |     def observation(self, obs):
142 |         for key, val in self.new_obs.items():
143 |             obs[key] = val
144 |         return obs
145 | 
146 | 
147 | class SpoofEntityWrapper(gym.ObservationWrapper):
148 |     '''
149 |         Add extra entities along entity dimension such that shapes can match between
150 |             environments with differing number of entities. This is meant to be used
151 |             after SplitObservations and SelectKeysWrapper. This will also add masks that are
152 |             1 except along the new columns (which could be used by fully observed value function)
153 |         Args:
154 |             total_n_entities (int): total number of entities after spoofing (including spoofed ones)
155 |             keys (list): observation keys with which to add entities along the second dimension
156 |             mask_keys (list): mask keys with which to add columns.
157 |     '''
158 |     def __init__(self, env, total_n_entities, keys, mask_keys):
159 |         super().__init__(env)
160 |         self.total_n_entities = total_n_entities
161 |         self.keys = keys
162 |         self.mask_keys = mask_keys
163 |         for key in self.keys + self.mask_keys:
164 |             shape = list(self.observation_space.spaces[key].shape)
165 |             shape[1] = total_n_entities
166 |             self.observation_space = update_obs_space(self, {key: shape})
167 |         for key in self.mask_keys:
168 |             shape = list(self.observation_space.spaces[key].shape)
169 |             self.observation_space = update_obs_space(self, {key + '_spoof': shape})
170 | 
171 |     def observation(self, obs):
172 |         for key in self.keys:
173 |             n_to_spoof = self.total_n_entities - obs[key].shape[1]
174 |             if n_to_spoof > 0:
175 |                 obs[key] = np.concatenate([obs[key], np.zeros((obs[key].shape[0], n_to_spoof, obs[key].shape[-1]))], 1)
176 |         for key in self.mask_keys:
177 |             n_to_spoof = self.total_n_entities - obs[key].shape[1]
178 |             obs[key + '_spoof'] = np.concatenate([np.ones_like(obs[key]), np.zeros((obs[key].shape[0], n_to_spoof))], -1)
179 |             if n_to_spoof > 0:
180 |                 obs[key] = np.concatenate([obs[key], np.zeros((obs[key].shape[0], n_to_spoof))], -1)
181 | 
182 |         return obs
183 | 
184 | 
185 | class ConcatenateObsWrapper(gym.ObservationWrapper):
186 |     '''
187 |         Group multiple observations under the same key in the observation dictionary.
188 |         Args:
189 |             obs_groups: dict of {key_to_save: [keys to concat]}
190 |     '''
191 |     def __init__(self, env, obs_groups):
192 |         super().__init__(env)
193 |         self.obs_groups = obs_groups
194 |         for key_to_save, keys_to_concat in obs_groups.items():
195 |             assert np.all([np.array(self.observation_space.spaces[keys_to_concat[0]].shape[:-1]) ==
196 |                            np.array(self.observation_space.spaces[k].shape[:-1])
197 |                            for k in keys_to_concat]), \
198 |                 f"Spaces were {[(k, v) for k, v in self.observation_space.spaces.items() if k in keys_to_concat]}"
199 |             new_last_dim = sum([self.observation_space.spaces[k].shape[-1] for k in keys_to_concat])
200 |             new_shape = list(self.observation_space.spaces[keys_to_concat[0]].shape[:-1]) + [new_last_dim]
201 |             self.observation_space = update_obs_space(self, {key_to_save: new_shape})
202 | 
203 |     def observation(self, obs):
204 |         for key_to_save, keys_to_concat in self.obs_groups.items():
205 |             obs[key_to_save] = np.concatenate([obs[k] for k in keys_to_concat], -1)
206 |         return obs
207 | 


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/README.md:
--------------------------------------------------------------------------------
 1 | # Randomized Uncertain Social Preferences
 2 | We share the environment code used in the work *Emergent Reciprocity and Team Formation from Randomized Uncertain Social Preferences* (TODO: ADD LINK).
 3 | 
 4 | The relevant code for randomized uncertain social preferences are in wrappers in *wrappers_rusp.py* --- Here we define a wrapper that defines a random reward sharing relationship graph per episode and transforms agents' reward accordingly. Each agent is given an independent uncertainty and noisy sample around this relationship graph. Tests for making sure observations get routed properly are in *test_wrapper_rusp.py*.
 5 | 
 6 | ## Environments
 7 |  * *env_ipd.py*: 2 player infinite horizon prisoner's dilemma
 8 |  * *env_indirect_reciprocity.py*: n-player infinite horizon prisoner's dilemma where at each step 2 agents are randomly chosen to play
 9 |  * *env_prisoners_buddy.py*: an abstract game where agents must mutually choose each other and resist temptation to defect and change teams.
10 |  * *env_oasis.py*: MUJOCO based survival game where the environment is resource constrained such that only a subset of agents can survive.


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/multi-agent-emergence-environments/bafaf1e11e6398624116761f91ae7c93b136f395/randomized_uncertain_social_preferences/rusp/__init__.py


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/abstract_base_env.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | from gym.spaces import Dict
 4 | from mujoco_worldgen.util.types import store_args
 5 | 
 6 | 
 7 | class AbstractBaseEnv(gym.Env):
 8 |     '''
 9 |     Barebones Gym Env that allows game to be constructed soley in wrappers.
10 |     '''
11 |     @store_args
12 |     def __init__(self, n_agents):
13 |         self.metadata = {}
14 |         self.metadata['n_agents'] = n_agents
15 |         self.metadata['n_actors'] = n_agents
16 |         self.observation_space = Dict({})
17 |         self.action_space = Dict({})
18 | 
19 |     def step(self, action):
20 |         return {}, np.zeros(self.n_agents), False, {}
21 | 
22 |     def reset(self, **kwargs):
23 |         return {}
24 | 


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/env_ipd.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import gym
  3 | from gym.spaces import Tuple, Discrete
  4 | from mae_envs.wrappers.util import update_obs_space
  5 | from mujoco_worldgen.util.types import store_args
  6 | from rusp.abstract_base_env import AbstractBaseEnv
  7 | from rusp.wrappers_util import RandomizedHorizonWrapper
  8 | from rusp.wrappers_rusp import RUSPWrapper, add_rew_share_observation_keys
  9 | from mae_envs.wrappers.util import ConcatenateObsWrapper
 10 | from mae_envs.wrappers.multi_agent import (SplitObservations, SelectKeysWrapper)
 11 | 
 12 | 
 13 | class IteratedMatrixGameWrapper(gym.Wrapper):
 14 |     '''
 15 |         2 player matrix game. Agents get a single binary action "action_defect". Agents
 16 |             get to observe the last action each agent took. Agents are rewarded according to
 17 |             payoff_matrix.
 18 |         Args:
 19 |             payoff_matrix (2x2x2 np.ndarray): the payoff payoff_matrix. We index into this payoff_matrix
 20 |                 according to agent actions.
 21 |         Observations:
 22 |             prev_ac (n_agents, 1): previous action each agent took.
 23 |     '''
 24 |     @store_args
 25 |     def __init__(self, env, payoff_matrix):
 26 |         super().__init__(env)
 27 |         self.n_agents = self.metadata['n_agents']
 28 |         self.action_space.spaces['action_defect'] = Tuple([Discrete(n=2) for _ in range(self.n_agents)])
 29 |         self.observation_space = update_obs_space(self, {'prev_ac': [self.n_agents, 1]})
 30 | 
 31 |     def reset(self):
 32 |         self.previous_action = -1 * np.ones(self.n_agents)
 33 |         self.num_defects = np.zeros(self.n_agents)
 34 |         self.num_coops = np.zeros(self.n_agents)
 35 |         return self.observation(self.env.reset())
 36 | 
 37 |     def step(self, action):
 38 |         self.previous_action = action['action_defect'].copy()
 39 |         obs, _, done, info = self.env.step(action)
 40 | 
 41 |         self.num_defects += action['action_defect']
 42 |         self.num_coops += (1 - action['action_defect'])
 43 |         rew = self.payoff_matrix[action['action_defect'][0], action['action_defect'][1]]
 44 | 
 45 |         if done:
 46 |             info.update({f'actor{i}_n_defects': n_defects for i, n_defects in enumerate(self.num_defects)})
 47 |             info.update({f'actor{i}_n_coops': n_coops for i, n_coops in enumerate(self.num_coops)})
 48 |         return self.observation(obs), rew, done, info
 49 | 
 50 |     def observation(self, obs):
 51 |         obs['prev_ac'] = self.previous_action[:, None]
 52 |         return obs
 53 | 
 54 | 
 55 | class LastAgentScripted(gym.Wrapper):
 56 |     '''
 57 |     Replace the last agent with either a all-cooperate, all-defect, or tit-for-tat scripted policy.
 58 |         The last agent is considered part of the environment, so we remove them from the observation
 59 |         and action space.
 60 |     Args:
 61 |         policy_to_play (string): One of "allc", "alld", or "tft"
 62 |     '''
 63 |     def __init__(self, env, policy_to_play):
 64 |         super().__init__(env)
 65 |         assert policy_to_play in ['allc', 'alld', 'tft']
 66 |         self.policy_to_play = policy_to_play
 67 |         self.metadata['n_actors'] -= 1
 68 |         for k, v in self.action_space.spaces.items():
 69 |             self.action_space.spaces[k] = Tuple(v.spaces[:-1])
 70 | 
 71 |     def reset(self):
 72 |         self.previous_action = 0
 73 |         return self.observation(self.env.reset())
 74 | 
 75 |     def step(self, action):
 76 |         if self.policy_to_play == 'allc':
 77 |             ac_to_play = 0
 78 |         elif self.policy_to_play == 'alld':
 79 |             ac_to_play = 1
 80 |         elif self.policy_to_play == 'tft':
 81 |             ac_to_play = self.previous_action
 82 | 
 83 |         self.previous_action = action['action_defect'][0]
 84 |         action['action_defect'] = np.concatenate([action['action_defect'], [ac_to_play]])
 85 | 
 86 |         obs, rew, done, info = self.env.step(action)
 87 |         return self.observation(obs), rew[:-1], done, info
 88 | 
 89 |     def observation(self, obs):
 90 |         obs = {k: v[:-1] for k, v in obs.items()}
 91 |         return obs
 92 | 
 93 | 
 94 | def make_env(horizon=10, horizon_lower=None, horizon_upper=None,
 95 |              prob_per_step_to_stop=0.1,  # If set then we play the infinite game,
 96 |              mutual_cooperate=2, defected_against=-2, successful_defect=4, mutual_defect=0,
 97 |              # Evals
 98 |              against_all_c=False, against_all_d=False, against_tft=False,
 99 |              # Random Teams
100 |              rusp_args={}):
101 |     env = AbstractBaseEnv(2)
102 | 
103 |     env = RandomizedHorizonWrapper(env, lower_lim=horizon_lower or horizon, upper_lim=horizon_upper or horizon,
104 |                                    prob_per_step_to_stop=prob_per_step_to_stop)
105 |     # Construct Payoff Matrix
106 |     cc = [mutual_cooperate, mutual_cooperate]
107 |     cd = [defected_against, successful_defect]
108 |     dc = list(reversed(cd))
109 |     dd = [mutual_defect, mutual_defect]
110 |     payoff_matrix = np.array([[cc, cd],
111 |                               [dc, dd]])
112 |     env = IteratedMatrixGameWrapper(env, payoff_matrix=payoff_matrix)
113 | 
114 |     env = RUSPWrapper(env, **rusp_args)
115 | 
116 |     keys_self = ['prev_ac', 'timestep']
117 |     keys_additional_self_vf = ['fraction_episode_done', 'horizon']
118 | 
119 |     keys_other_agents = ['prev_ac']
120 |     keys_additional_other_agents_vf = []
121 |     keys_self_matrices = []
122 |     add_rew_share_observation_keys(keys_self=keys_self,
123 |                                    keys_additional_self_vf=keys_additional_self_vf,
124 |                                    keys_other_agents=keys_other_agents,
125 |                                    keys_additional_other_agents_vf=keys_additional_other_agents_vf,
126 |                                    keys_self_matrices=keys_self_matrices,
127 |                                    **rusp_args)
128 |     keys_external = ['other_agents',
129 |                      'other_agents_vf',
130 |                      'additional_self_vf_obs']
131 | 
132 |     env = SplitObservations(env, keys_self + keys_additional_self_vf,
133 |                             keys_copy=[], keys_self_matrices=keys_self_matrices)
134 |     env = ConcatenateObsWrapper(env, {'other_agents': keys_other_agents,
135 |                                       'other_agents_vf': ['other_agents'] + keys_additional_other_agents_vf,
136 |                                       'additional_self_vf_obs': [k + '_self' for k in keys_additional_self_vf]})
137 |     env = SelectKeysWrapper(env, keys_self=keys_self,
138 |                             keys_other=keys_external)
139 | 
140 |     if against_all_c or against_all_d or against_tft:
141 |         if against_all_c:
142 |             policy_to_play = 'allc'
143 |         elif against_all_d:
144 |             policy_to_play = 'alld'
145 |         elif against_tft:
146 |             policy_to_play = 'tft'
147 |         env = LastAgentScripted(env, policy_to_play)
148 |     return env
149 | 


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/env_prisoners_buddy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import defaultdict
  3 | from mujoco_worldgen.util.types import store_args
  4 | from mae_envs.wrappers.util import update_obs_space
  5 | from mae_envs.wrappers.util import ConcatenateObsWrapper
  6 | from mae_envs.wrappers.multi_agent import (SplitObservations, SelectKeysWrapper)
  7 | from rusp.wrappers_rusp import RUSPWrapper, add_rew_share_observation_keys
  8 | from rusp.wrappers_util import RandomIdentityVector, RandomizedHorizonWrapper, OtherActorAttentionAction, ActionOptionsWrapper
  9 | from rusp.abstract_base_env import AbstractBaseEnv
 10 | 
 11 | 
 12 | class PrisonersBuddy(OtherActorAttentionAction):
 13 |     '''
 14 |         Agents must mutually choose others to get reward (mutual_cooperate_rew). They can choose to sitout
 15 |             and receive zero reward. If they make an unreciprocated choice, the targeted agent will recieve a defection
 16 |             reward (successful_defect_rew). We call it a defection reward since they could reciprocated the choice.
 17 |             The agent making the unreciprocated choice receives a penalty (defected_against_rew).
 18 | 
 19 |             Agents get a chance to "communicate" in that their choices only elicit rewards every few timesteps (choosing_period).
 20 |             This gives them time to break symmetry.
 21 | 
 22 |         Observations:
 23 |             chose_me (n_agents, n_agents, 1): which other agents (column) chose me (row agent) last step
 24 |             i_chose (n_agents, n_agents, 1): which other agents (column) did I choose (row agent) last step
 25 |             chose_me_rew (n_agents, n_agents, 1): which other agents (column) chose me (row agent) last step reward was given
 26 |             i_chose_rew (n_agents, n_agents, 1): which other agents (column) did I choose (row agent) last step reward was given
 27 |             i_chose_any_rew (n_agents, 1): Did I choose to sitout or choose someone last timestep reward was given
 28 |             previous_choice_identity (n_agents, agent_identity_dim): ID of agent I previously chose
 29 |             next_choice_is_real (n_agents, 1): is the next timestep one in which reward will be given
 30 |     '''
 31 |     @store_args
 32 |     def __init__(self, env, choosing_period,
 33 |                  agent_identity_dim=4,
 34 |                  mutual_cooperate_rew=2,
 35 |                  defected_against_rew=-1,
 36 |                  successful_defect_rew=1):
 37 |         super().__init__(env, 'action_choose_agent')
 38 |         self.observation_space = update_obs_space(self, {
 39 |             'chose_me': [self.n_agents, self.n_agents, 1],
 40 |             'i_chose': [self.n_agents, self.n_agents, 1],
 41 |             'chose_me_rew': [self.n_agents, self.n_agents, 1],
 42 |             'i_chose_rew': [self.n_agents, self.n_agents, 1],
 43 |             'i_chose_any_rew': [self.n_agents, 1],
 44 |             'previous_choice_identity': [self.n_agents, agent_identity_dim],
 45 |             'next_choice_is_real': [self.n_agents, 1],
 46 |         })
 47 | 
 48 |     def reset(self):
 49 |         self._t = 1  # Start t at 1 such that first round is not a reward round
 50 |         self._chose_me = np.zeros((self.n_agents, self.n_agents))
 51 |         self._chose_me_rew = np.zeros((self.n_agents, self.n_agents))
 52 |         self._n_times_not_chosen = np.zeros((self.n_agents))
 53 |         self._n_times_team_changed = np.zeros((self.n_agents))
 54 |         self._n_agents_on_team = []
 55 |         self._previous_choice_identity = np.zeros((self.n_agents, self.agent_identity_dim))
 56 |         self._i_chose_any_rew_obs = np.zeros((self.n_agents, 1))
 57 |         self._team_lengths = []
 58 |         self._n_successful_defections = 0
 59 |         self._current_team_lengths = defaultdict(lambda: 0)
 60 |         self._previous_teams = np.ones(self.n_agents, dtype=int) * -1
 61 |         self._both_chose = np.zeros((self.n_agents, self.n_agents), dtype=bool)
 62 |         self._perfect_game = True
 63 |         self._first_choice = True
 64 | 
 65 |         return self.observation(self.env.reset())
 66 | 
 67 |     def step(self, action):
 68 |         obs, rew, done, info = self.env.step(action)
 69 |         self._chose_me = np.zeros((self.n_agents, self.n_agents), dtype=bool)
 70 |         targets = np.ones(self.n_agents, dtype=int) * -1
 71 |         for i in range(self.n_agents):
 72 |             target = self._get_target_actor(i, action)
 73 |             if len(target):
 74 |                 targets[i] = target[0]
 75 |                 self._chose_me[target[0], i] = 1
 76 | 
 77 |         self._previous_choice_identity = obs['agent_identity'][targets]
 78 |         self._previous_choice_identity[targets == -1] = 0
 79 | 
 80 |         # Reward rounds
 81 |         if self._t % self.choosing_period == 0:
 82 |             self._both_chose = self._chose_me * self._chose_me.T
 83 |             self._chose_me_rew = self._chose_me.copy()
 84 | 
 85 |             self._teams = np.argmax(self._both_chose, axis=1)  # Indicies of teamate
 86 |             self._teams[np.all(self._both_chose == 0, axis=1)] = -1  # Make sure those without team are set to -1 instead of 0
 87 | 
 88 |             rew = self._prisoners_buddy_reward_update(rew)
 89 | 
 90 |             # Track stats
 91 |             self._n_times_not_chosen[np.sum(self._chose_me, 1) == 0] += 1
 92 |             # Since both_chose is symmetric, just get the index of nonzero entry in upper triangle
 93 |             current_team_indices = np.c_[np.nonzero(np.triu(self._both_chose))]
 94 |             current_team_tuples = list(map(tuple, current_team_indices))
 95 |             teams_done = [k for k in self._current_team_lengths.keys() if k not in current_team_tuples]
 96 | 
 97 |             for team_done in teams_done:
 98 |                 self._team_lengths.append(self._current_team_lengths[team_done])
 99 |                 del self._current_team_lengths[team_done]
100 |             for current_team_tuple in current_team_tuples:
101 |                 self._current_team_lengths[current_team_tuple] += 1
102 | 
103 |             self._i_chose_any_rew_obs = np.any(self._chose_me_rew, 0)[:, None]
104 | 
105 |             if self._first_choice:
106 |                 self._first_choice = False
107 |             else:
108 |                 all_teams_didnt_change = np.all(self._previous_teams == self._teams)
109 |                 max_number_of_teams_filled = np.sum(self._teams != -1) == ((self.n_agents // 2) * 2)
110 |                 self._perfect_game = self._perfect_game and all_teams_didnt_change and max_number_of_teams_filled
111 | 
112 |             self._previous_teams = self._teams
113 | 
114 |         self._t += 1
115 | 
116 |         if done:
117 |             self._team_lengths += list(self._current_team_lengths.values())
118 |             info['average_team_length'] = np.mean(self._team_lengths) if len(self._team_lengths) else 0
119 |             info['n_times_team_changed'] = np.sum(self._n_times_team_changed)
120 |             info['n_agents_on_team_per_step'] = np.mean(self._n_agents_on_team)
121 |             info['number_decisions'] = self._t / self.choosing_period
122 |             info['n_unique_not_chosen'] = np.sum(self._n_times_not_chosen > 0)
123 |             info['n_successful_defections'] = self._n_successful_defections
124 |             info['perfect_game'] = self._perfect_game
125 | 
126 |         return self.observation(obs), rew, done, info
127 | 
128 |     def observation(self, obs):
129 |         obs['chose_me'] = self._chose_me[:, :, None]
130 |         obs['i_chose'] = self._chose_me.T[:, :, None]
131 |         obs['chose_me_rew'] = self._chose_me_rew[:, :, None]
132 |         obs['i_chose_rew'] = self._chose_me_rew.T[:, :, None]
133 |         obs['i_chose_any_rew'] = self._i_chose_any_rew_obs
134 |         obs['previous_choice_identity'] = self._previous_choice_identity
135 |         # assumes this is called after t is increased
136 |         obs['next_choice_is_real'] = np.ones((self.n_agents, 1)) if self._t % self.choosing_period == 0 else np.zeros((self.n_agents, 1))
137 |         return obs
138 | 
139 |     def _prisoners_buddy_reward_update(self, rew):
140 |         on_team = np.any(self._both_chose, axis=1)
141 |         chose_me_oneway = (self._chose_me & ~self._both_chose)
142 |         num_chose_me_oneway = np.sum(chose_me_oneway, axis=1)
143 |         i_chose_one_way = np.any(chose_me_oneway, axis=0)
144 | 
145 |         assert np.all(np.sum(chose_me_oneway, axis=0) <= 1)
146 |         assert np.all((i_chose_one_way & on_team) == 0)
147 | 
148 |         previous_has_team = (self._previous_teams != -1)
149 |         your_team_changed = (self._teams != self._previous_teams)
150 | 
151 |         rew[on_team] += self.mutual_cooperate_rew
152 |         rew[i_chose_one_way] += self.defected_against_rew
153 |         rew += num_chose_me_oneway * self.successful_defect_rew
154 | 
155 |         # Stats
156 |         self._n_successful_defections += np.sum(i_chose_one_way)
157 |         self._n_times_team_changed += (previous_has_team & your_team_changed)
158 |         self._n_agents_on_team.append(np.sum(on_team))
159 | 
160 |         return rew
161 | 
162 | 
163 | def make_env(n_agents=5, horizon=50, horizon_lower=None, horizon_upper=None,
164 |              prob_per_step_to_stop=0.02,
165 |              choosing_period=5,
166 |              mutual_cooperate_rew=2, defected_against_rew=-2, successful_defect_rew=1,
167 |              agent_identity_dim=16,
168 |              rusp_args={}):
169 |     env = AbstractBaseEnv(n_agents)
170 |     env = RandomizedHorizonWrapper(env, lower_lim=horizon_lower or horizon, upper_lim=horizon_upper or horizon,
171 |                                    prob_per_step_to_stop=prob_per_step_to_stop)
172 |     env = RandomIdentityVector(env, vector_dim=agent_identity_dim)
173 | 
174 |     env = PrisonersBuddy(env, choosing_period=choosing_period,
175 |                          agent_identity_dim=agent_identity_dim,
176 |                          mutual_cooperate_rew=mutual_cooperate_rew, defected_against_rew=defected_against_rew,
177 |                          successful_defect_rew=successful_defect_rew)
178 | 
179 |     env = ActionOptionsWrapper(env, ['action_choose_agent'], {'action_choose_agent': -1})
180 | 
181 |     env = RUSPWrapper(env, **rusp_args)
182 | 
183 |     keys_self = ['previous_choice',
184 |                  'next_choice_is_real',
185 |                  'i_chose_any_rew',
186 |                  'agent_identity',
187 |                  'previous_choice_identity',
188 |                  'timestep']
189 |     keys_additional_self_vf = ['fraction_episode_done', 'horizon']
190 | 
191 |     keys_other_agents = [
192 |         'previous_choice',
193 |         'chose_me',
194 |         'i_chose',
195 |         'chose_me_rew',
196 |         'i_chose_rew',
197 |         'i_chose_any_rew',
198 |         'agent_identity',
199 |         'previous_choice_identity'
200 |     ]
201 |     keys_additional_other_agents_vf = []
202 |     keys_self_matrices = ['chose_me',
203 |                           'i_chose',
204 |                           'chose_me_rew',
205 |                           'i_chose_rew']
206 | 
207 |     keys_external = ['other_agents',
208 |                      'other_agents_vf',
209 |                      'additional_self_vf_obs']
210 | 
211 |     add_rew_share_observation_keys(keys_self=keys_self,
212 |                                    keys_additional_self_vf=keys_additional_self_vf,
213 |                                    keys_other_agents=keys_other_agents,
214 |                                    keys_additional_other_agents_vf=keys_additional_other_agents_vf,
215 |                                    keys_self_matrices=keys_self_matrices,
216 |                                    **rusp_args)
217 | 
218 |     env = SplitObservations(env, keys_self + keys_additional_self_vf,
219 |                             keys_copy=[], keys_self_matrices=keys_self_matrices)
220 |     env = ConcatenateObsWrapper(env, {'other_agents': keys_other_agents,
221 |                                       'other_agents_vf': ['other_agents'] + keys_additional_other_agents_vf,
222 |                                       'additional_self_vf_obs': [k + '_self' for k in keys_additional_self_vf]})
223 |     env = SelectKeysWrapper(env, keys_self=keys_self,
224 |                             keys_other=keys_external)
225 | 
226 |     return env
227 | 


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/test_env_indirect_reciprocity.py:
--------------------------------------------------------------------------------
  1 | from rusp.env_indirect_reciprocity import make_env
  2 | import numpy as np
  3 | from copy import deepcopy
  4 | 
  5 | 
  6 | def _test_fixed_policy(against_all_d=False, against_all_c=False):
  7 |     env = make_env(against_all_d=against_all_d, against_all_c=against_all_c,
  8 |                    last_agent_always_plays=True)
  9 |     prev_obs = env.reset()
 10 |     for i in range(1000):
 11 |         currently_playing = np.squeeze(prev_obs['youre_playing_self'])
 12 |         ac = {'action_defect': np.random.randint(0, 2, size=(env.metadata['n_actors']))}
 13 | 
 14 |         obs, rew, done, info = env.step(ac)
 15 | 
 16 |         if against_all_d:
 17 |             assert np.all(rew[currently_playing & (ac['action_defect'] == 0)] == -2)
 18 |             assert np.all(rew[currently_playing & (ac['action_defect'] == 1)] == 0)
 19 |         elif against_all_c:
 20 |             assert np.all(rew[currently_playing & (ac['action_defect'] == 0)] == 2)
 21 |             assert np.all(rew[currently_playing & (ac['action_defect'] == 1)] == 4)
 22 |         else:
 23 |             assert False
 24 |         assert np.all(rew[~currently_playing] == 0)
 25 | 
 26 |         prev_obs = obs
 27 | 
 28 |         if done:
 29 |             prev_obs = env.reset()
 30 | 
 31 | 
 32 | def test_all_d():
 33 |     _test_fixed_policy(against_all_d=True)
 34 | 
 35 | 
 36 | def test_all_c():
 37 |     _test_fixed_policy(against_all_c=True)
 38 | 
 39 | 
 40 | # Tests for play orderings
 41 | def test_last_always_plays():
 42 |     env = make_env(last_agent_always_plays=True)
 43 |     obs = env.reset()
 44 |     assert obs['youre_playing_self'][-1, 0]
 45 |     ac = {'action_defect': np.random.randint(0, 2, size=(env.metadata['n_actors']))}
 46 |     for i in range(1000):
 47 |         obs, _, done, _ = env.step(ac)
 48 |         assert obs['youre_playing_self'][-1, 0]
 49 | 
 50 |         if done:
 51 |             obs = env.reset()
 52 |             assert obs['youre_playing_self'][-1, 0]
 53 | 
 54 | 
 55 | def test_last_first_versus_last():
 56 |     env = make_env(last_step_first_agent_vs_last_agent=True)
 57 |     prev_obs = env.reset()
 58 |     ac = {'action_defect': np.random.randint(0, 2, size=(env.metadata['n_actors']))}
 59 |     for i in range(1000):
 60 |         obs, _, done, _ = env.step(ac)
 61 | 
 62 |         if done:
 63 |             assert prev_obs['youre_playing_self'][-1, 0]
 64 |             assert prev_obs['youre_playing_self'][0, 0]
 65 |             obs = env.reset()
 66 | 
 67 |         prev_obs = deepcopy(obs)
 68 | 
 69 | 
 70 | def test_last_doesnt_play_until():
 71 |     env = make_env(last_doesnt_play_until_t=5)
 72 |     ac = {'action_defect': np.random.randint(0, 2, size=(env.metadata['n_actors']))}
 73 |     obs = env.reset()
 74 |     done = False
 75 |     t = 0
 76 |     for i in range(1000):
 77 |         if t < 5:
 78 |             assert not obs['youre_playing_self'][-1, 0]
 79 |         obs, rew, done, info = env.step(ac)
 80 |         t += 1
 81 | 
 82 |         if done:
 83 |             obs = env.reset()
 84 |             done = False
 85 |             t = 0
 86 | 
 87 | 
 88 | def test_last_doesnt_play_until_and_last_must_play_at_t():
 89 |     env = make_env(last_doesnt_play_until_t=5, last_must_play_at_t=True)
 90 |     ac = {'action_defect': np.random.randint(0, 2, size=(env.metadata['n_actors']))}
 91 |     obs = env.reset()
 92 |     done = False
 93 |     t = 0
 94 |     for i in range(1000):
 95 |         if t < 5:
 96 |             assert not obs['youre_playing_self'][-1, 0]
 97 |         if t == 5:
 98 |             assert obs['youre_playing_self'][-1, 0]
 99 |         obs, rew, done, info = env.step(ac)
100 |         t += 1
101 | 
102 |         if done:
103 |             obs = env.reset()
104 |             done = False
105 |             t = 0
106 | 


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/test_env_ipd.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from rusp.env_ipd import make_env
 3 | 
 4 | 
 5 | def test_env_runs():
 6 |     env = make_env()
 7 |     env.reset()
 8 | 
 9 |     action = {'action_defect': np.array([0, 0])}
10 |     obs, rew, done, info = env.step(action)
11 |     assert np.all(rew == np.array([2, 2]))
12 | 
13 |     action = {'action_defect': np.array([1, 0])}
14 |     obs, rew, done, info = env.step(action)
15 |     assert np.all(rew == np.array([4, -2]))
16 | 
17 |     action = {'action_defect': np.array([0, 1])}
18 |     obs, rew, done, info = env.step(action)
19 |     assert np.all(rew == np.array([-2, 4]))
20 | 
21 |     action = {'action_defect': np.array([1, 1])}
22 |     obs, rew, done, info = env.step(action)
23 |     assert np.all(rew == np.array([0, 0]))
24 | 
25 | 
26 | def test_env_against_all_c():
27 |     env = make_env(against_all_c=True)
28 |     env.reset()
29 | 
30 |     action = {'action_defect': np.array([0])}
31 |     obs, rew, done, info = env.step(action)
32 |     assert np.all(rew == np.array([2]))
33 | 
34 |     action = {'action_defect': np.array([1])}
35 |     obs, rew, done, info = env.step(action)
36 |     assert np.all(rew == np.array([4]))
37 | 
38 | 
39 | def test_env_against_all_d():
40 |     env = make_env(against_all_d=True)
41 |     env.reset()
42 | 
43 |     action = {'action_defect': np.array([0])}
44 |     obs, rew, done, info = env.step(action)
45 |     assert np.all(rew == np.array([-2]))
46 | 
47 |     action = {'action_defect': np.array([1])}
48 |     obs, rew, done, info = env.step(action)
49 |     assert np.all(rew == np.array([0]))
50 | 
51 | 
52 | def test_env_against_tft():
53 |     env = make_env(against_tft=True)
54 |     env.reset()
55 | 
56 |     action = {'action_defect': np.array([0])}
57 |     obs, rew, done, info = env.step(action)
58 |     assert np.all(rew == np.array([2]))
59 | 
60 |     action = {'action_defect': np.array([1])}
61 |     obs, rew, done, info = env.step(action)
62 |     assert np.all(rew == np.array([4]))
63 | 
64 |     action = {'action_defect': np.array([1])}
65 |     obs, rew, done, info = env.step(action)
66 |     assert np.all(rew == np.array([0]))
67 | 
68 |     action = {'action_defect': np.array([0])}
69 |     obs, rew, done, info = env.step(action)
70 |     assert np.all(rew == np.array([-2]))
71 | 
72 |     action = {'action_defect': np.array([0])}
73 |     obs, rew, done, info = env.step(action)
74 |     assert np.all(rew == np.array([2]))
75 | 


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/test_env_oasis.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import unittest
 3 | import os
 4 | 
 5 | EXAMPLES_DIR = os.path.dirname(os.path.abspath(__file__))
 6 | EXAMINE_FILE_PATH = os.path.join(EXAMPLES_DIR, "../../bin/examine.py")
 7 | 
 8 | 
 9 | class ExamineTest(unittest.TestCase):
10 |     def test_examine_env(self):
11 |         envs = [
12 |             "env_oasis.py"
13 |         ]
14 |         for env in envs:
15 |             with self.assertRaises(subprocess.TimeoutExpired):
16 |                 subprocess.check_call(
17 |                     ["/usr/bin/env", "python", EXAMINE_FILE_PATH, os.path.join(EXAMPLES_DIR, env)],
18 |                     timeout=10)
19 | 


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/test_env_prisoners_buddy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from rusp.env_prisoners_buddy import make_env
 3 | 
 4 | 
 5 | def test_env_runs():
 6 |     env = make_env()
 7 |     env.reset()
 8 | 
 9 |     action = {'action_choose_agent': [0, 0, 3, 0, 1], 'action_choose_option': [1, 0, 0, 0, 0]}
10 |     for i in range(5):
11 |         obs, rew, done, info = env.step(action)
12 | 
13 |     assert np.all(rew == [1, -2, 3, 2, -2])
14 | 


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/test_wrapper_rusp.py:
--------------------------------------------------------------------------------
 1 | from rusp.wrappers_rusp import RUSPGenerator
 2 | import _jsonnet
 3 | import json
 4 | import os
 5 | import numpy as np
 6 | 
 7 | 
 8 | def test_compute_observations():
 9 |     N_AGENTS = 2
10 |     FILE_PATH = os.path.dirname(os.path.abspath(__file__))
11 |     graph_generator = RUSPGenerator()
12 | 
13 |     graph_generator._generate_social_preferences(N_AGENTS)
14 |     graph_generator._generate_uncertainty(N_AGENTS)
15 | 
16 |     graph_generator.noise_std = np.arange(1, N_AGENTS ** 3 + 1).reshape((N_AGENTS, N_AGENTS, N_AGENTS))
17 |     graph_generator.noise = np.arange(1, N_AGENTS ** 3 + 1).reshape((N_AGENTS, N_AGENTS, N_AGENTS)) * 10
18 |     graph_generator.unnormalized_reward_xform_mat = graph_generator.reward_xform_mat = np.arange(1, N_AGENTS ** 2 + 1).reshape((N_AGENTS, N_AGENTS))
19 | 
20 |     graph_generator._precompute_observations(N_AGENTS)
21 | 
22 |     assert np.all(graph_generator.precomputed_obs['self_rew_value'] == np.array([1, 4]))
23 |     assert np.all(graph_generator.precomputed_obs['self_rew_value_noisy'] == np.array([1, 4]) + np.array([10, 80]))
24 |     assert np.all(graph_generator.precomputed_obs['self_rew_value_noise_level'] == np.array([1, 8]))
25 | 
26 |     assert np.all(graph_generator.precomputed_obs['other_rew_value_s'] == np.array(
27 |         [[1, 4],
28 |          [1, 4]]))
29 |     assert np.all(graph_generator.precomputed_obs['other_rew_value_s_noisy'] == np.array(
30 |         [[1 + 10, 4 + 40],
31 |          [1 + 50, 4 + 80]]))
32 |     assert np.all(graph_generator.precomputed_obs['other_rew_value_s_noise_level'] == np.array(
33 |         [[1, 4],
34 |          [5, 8]]))
35 | 
36 |     assert np.all(graph_generator.precomputed_obs['rew_share_so_s'] == np.array(
37 |         [[1, 2],
38 |          [3, 4]]))
39 |     assert np.all(graph_generator.precomputed_obs['rew_share_so_s_noisy'] == np.array(
40 |         [[1 + 10, 2 + 20],
41 |          [3 + 70, 4 + 80]]))
42 |     assert np.all(graph_generator.precomputed_obs['rew_share_so_s_noise_level'] == np.array(
43 |         [[1, 2],
44 |          [7, 8]]))
45 | 
46 |     assert np.all(graph_generator.precomputed_obs['rew_share_os_o'] == np.array(
47 |         [[1, 3],
48 |          [2, 4]]))
49 |     assert np.all(graph_generator.precomputed_obs['rew_share_os_o_noisy'] == np.array(
50 |         [[1 + 10, 3 + 70],
51 |          [2 + 20, 4 + 80]]))
52 |     assert np.all(graph_generator.precomputed_obs['rew_share_os_o_noise_level'] == np.array(
53 |         [[1, 7],
54 |          [2, 8]]))
55 | 


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/wrappers_rusp.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | from typing import Tuple, List
  4 | from mae_envs.wrappers.util import update_obs_space
  5 | from mujoco_worldgen.util.types import store_args
  6 | 
  7 | 
  8 | def get_all_integer_partitions(n, min_team_size=1, max_team_size=np.inf):
  9 |     '''
 10 |         Return a list of all integer partitions of n.
 11 |         Args:
 12 |             n (int): number of entities.
 13 |             min_team_size (int): minimum number of entities in a partition
 14 |             max_team_size (int): maximum number of entities in a partition
 15 |     '''
 16 |     if n <= max_team_size:
 17 |         yield (n,)
 18 |     for i in range(min_team_size, n // 2 + 1):
 19 |         for p in get_all_integer_partitions(n - i, i, max_team_size):
 20 |             yield (i,) + p
 21 | 
 22 | 
 23 | class RUSPGenerator:
 24 |     '''
 25 |         Helper class to generate the randomized uncertain relationship graph. Agents are first
 26 |             partitioned into groups. Within each group we randomize the amount each agent shares
 27 |             reward with everyone else in the group. We then sample independent noise such that each
 28 |             agent observes an inependent noisy observation of the relationship graph.
 29 | 
 30 |         Reward sharing values are sampled from a beta distribution with parameters alpha and beta. For
 31 |             all results in the paper except where we experiment with team hardness, we set both
 32 |             alpha and beta to 1.
 33 | 
 34 |         To compute noise added to the relationship graphs, we first sample the noise level (standard devation
 35 |             of a gaussian) from a uniform distribution independently per relationship, per agent.
 36 |             We then sample a single value from this Gaussian with sampled standard deviation centered around the true value
 37 | 
 38 |         Args:
 39 |             min_team_size (int): minimum size of a group of agents with non-zero reward sharing amounts
 40 |             max_team_size (int): maximum size of a group of agents with non-zero reward sharing amounts
 41 |             alpha (float): reward sharing beta distribution parameter
 42 |             beta (float): reward sharing beta distribution parameter
 43 |             allow_diagonal_non_1 (bool): if True then diagonal elements of the reward sharing matrix (an agents
 44 |                 weight over its own reward) can be less than 1 (sampled from the same beta distribution as for other
 45 |                 relationships)
 46 |             obs_noise_std_range (tuple of float): Range (maximum and minimum) that noise standard deviation can be sampled
 47 |                 from.
 48 |     '''
 49 |     @store_args
 50 |     def __init__(self, *,
 51 |                  # Prosociality Graph
 52 |                  min_team_size: int = 1,
 53 |                  max_team_size: int = 1,
 54 |                  alpha: float = 1.0,
 55 |                  beta: float = 1.0,
 56 |                  allow_diagonal_non_1: bool = True,
 57 |                  # Uncertainty
 58 |                  obs_noise_std_range: Tuple[float] = [0.0, 1.0],
 59 |                  **kwargs):
 60 |         assert min_team_size >= 1
 61 |         assert max_team_size >= 1
 62 |         assert max_team_size >= min_team_size
 63 |         assert alpha > 0
 64 |         assert beta > 0
 65 |         assert np.all(np.array(obs_noise_std_range) >= 0)
 66 |         self.cached_partitions = {}  # Keys are (n_agents, min_team_size, max_team_size)
 67 | 
 68 |     def _partition_agents(self, n_agents, min_team_size, max_team_size):
 69 |         '''
 70 |             Return a random partition from the set of all integer partitions
 71 |         '''
 72 |         settings = (n_agents, min_team_size, max_team_size)
 73 |         if settings not in self.cached_partitions:
 74 |             self.cached_partitions[settings] = list(get_all_integer_partitions(n_agents, min_team_size, max_team_size))
 75 |         all_partitions = self.cached_partitions[settings]
 76 |         random_partitions = all_partitions[np.random.randint(len(all_partitions))]
 77 | 
 78 |         return random_partitions
 79 | 
 80 |     def _generate_social_preferences(self, n_agents):
 81 |         '''
 82 |             Generate the relationship graph (without uncertainty)
 83 |         '''
 84 |         # Generate random partitions
 85 |         if self.max_team_size != self.min_team_size:
 86 |             random_partitions = self._partition_agents(n_agents, self.min_team_size, self.max_team_size)
 87 |         else:
 88 |             random_partitions = np.random.randint(self.min_team_size, self.max_team_size + 1, (n_agents))
 89 |         random_partitions = np.cumsum(random_partitions)
 90 |         random_partitions = random_partitions[random_partitions <= n_agents]
 91 |         random_partitions = np.concatenate([[0], random_partitions, [n_agents]])
 92 | 
 93 |         # Convert random partitions into a block diagonal matrix
 94 |         self.reward_xform_mat = np.zeros((n_agents, n_agents))
 95 |         for i in range(len(random_partitions) - 1):
 96 |             block = slice(random_partitions[i], random_partitions[i + 1])
 97 |             self.reward_xform_mat[block, block] = 1
 98 | 
 99 |         # Randomize reward sharing values in block diagonal matrix
100 |         self.reward_xform_mat *= np.random.beta(a=self.alpha, b=self.beta, size=(n_agents, n_agents))
101 | 
102 |         # Make sure off-diagonal is symmetric
103 |         self.reward_xform_mat = np.tril(self.reward_xform_mat, -1) + np.tril(self.reward_xform_mat).T
104 | 
105 |         if not self.allow_diagonal_non_1:
106 |             np.fill_diagonal(self.reward_xform_mat, 1.0)
107 | 
108 |         # Randomly shuffle agents so that agent indicies do not matter
109 |         random_shuffle_mat = np.eye(n_agents)
110 |         np.random.shuffle(random_shuffle_mat)
111 |         # We rotate, sum over teams, then unrotate
112 |         self.reward_xform_mat = np.matmul(np.matmul(random_shuffle_mat.T, self.reward_xform_mat), random_shuffle_mat)
113 | 
114 |         # Normalize rows
115 |         self.unnormalized_reward_xform_mat = self.reward_xform_mat.copy()
116 |         self.reward_xform_mat /= np.sum(self.reward_xform_mat, axis=1, keepdims=True)
117 | 
118 |     def _generate_uncertainty(self, n_agents):
119 |         '''
120 |             Generate uncertainty levels and noise to be applied to the matrices
121 |         '''
122 |         self.noise_std = np.random.uniform(low=self.obs_noise_std_range[0],
123 |                                            high=self.obs_noise_std_range[1],
124 |                                            size=(n_agents, n_agents, n_agents))
125 |         self.noise = np.random.normal(scale=self.noise_std)
126 | 
127 |     def _precompute_observations(self, n_agents):
128 |         '''
129 |             Precompute observations since they are static per episode.
130 |         '''
131 |         # We have independent noisy observations per agents, so we copy the reward matrix n_agents times and
132 |         #   then add the noise matrices
133 |         rew_mats = np.repeat(np.expand_dims(self.unnormalized_reward_xform_mat, 0), n_agents, axis=0)
134 |         noisy_rew_mats = rew_mats + self.noise
135 |         self.precomputed_obs = {}
136 | 
137 |         def _index_into_mats(key, *indices):
138 |             '''
139 |                 Helper function to create 3 observation types with the same indices
140 |             '''
141 |             self.precomputed_obs[key] = rew_mats[indices]  # Non-noisy version of the reward matrix
142 |             self.precomputed_obs[key + "_noisy"] = noisy_rew_mats[indices]  # Noisy version of the reward matrix
143 |             self.precomputed_obs[key + '_noise_level'] = self.noise_std[indices]  # Noise level associated with each entry in the noisy reward matrices
144 | 
145 |         def _transpose_existing(new_key, existing_key):
146 |             '''
147 |                 Helper function to transpose all 3 observations for an key. This is useful if an agent policy
148 |                     or value function needs to observe what other agents observe about it.
149 |             '''
150 |             self.precomputed_obs[new_key] = self.precomputed_obs[existing_key].T
151 |             self.precomputed_obs[new_key + "_noisy"] = self.precomputed_obs[existing_key + "_noisy"].T
152 |             self.precomputed_obs[new_key + '_noise_level'] = self.precomputed_obs[existing_key + '_noise_level'].T
153 | 
154 |         # Relationship variable of myself (What is the weight over my own reward) with my own noise variable.
155 |         #   This is in effect the 3D diagonal, so the output shape will be (n_agents,)
156 |         _index_into_mats('self_rew_value', np.arange(n_agents), np.arange(n_agents), np.arange(n_agents))
157 | 
158 |         # Relationship variable of other agents weight over their own reward with my own noise variable (s)
159 |         #   Row i is the diagonal of the ith matrix
160 |         _index_into_mats('other_rew_value_s', slice(None), np.arange(n_agents), np.arange(n_agents))
161 | 
162 |         # My relationship variable with other agents (so) with my noise (s)
163 |         #   Row i is row i of the ith matrix
164 |         _index_into_mats('rew_share_so_s', np.arange(n_agents), np.arange(n_agents), slice(None))
165 | 
166 |         # Others relationship variable with me (os) with their noise (o)
167 |         #   Should only be used in the value function
168 |         _transpose_existing('rew_share_os_o', 'rew_share_so_s')
169 | 
170 | 
171 | class RUSPWrapper(RUSPGenerator, gym.Wrapper):
172 |     '''
173 |         Gym wrapper for generating relationship graphs. Generates a new relationship graph and uncertainties on reset.
174 |             Provides all observations necessary to agents and transforms reward according to the relationship graph.
175 | 
176 |         Observations:
177 |             Each observation has the true value, the noisy value "_noisy" and the uncertainty level "_noise_level"
178 | 
179 |             self_rew_value: Relationship variable of myself (What is the weight over my own reward) with my own noise variable.
180 |             other_rew_value_s: Relationship variable of other agents weight over their own reward with my own noise variable (s)
181 |             rew_share_so_s: My relationship variable with other agents (so) with my noise (s)
182 |             rew_share_os_o: Others relationship variable with me (os) with their noise (o). Should only be used in the value function
183 |     '''
184 |     @store_args
185 |     def __init__(self, env, **graph_kwargs):
186 |         RUSPGenerator.__init__(self, **graph_kwargs)
187 |         gym.Wrapper.__init__(self, env)
188 |         n_a = self.metadata['n_agents']
189 |         self.obs_keys_with_shapes = {
190 |             'self_rew_value': [n_a, 1],
191 |             'self_rew_value_noisy': [n_a, 1],
192 |             'self_rew_value_noise_level': [n_a, 1],
193 |             'other_rew_value_s': [n_a, n_a, 1],
194 |             'other_rew_value_s_noisy': [n_a, n_a, 1],
195 |             'other_rew_value_s_noise_level': [n_a, n_a, 1],
196 |             'rew_share_so_s': [n_a, n_a, 1],
197 |             'rew_share_so_s_noisy': [n_a, n_a, 1],
198 |             'rew_share_so_s_noise_level': [n_a, n_a, 1],
199 |             'rew_share_os_o': [n_a, n_a, 1],
200 |             'rew_share_os_o_noisy': [n_a, n_a, 1],
201 |             'rew_share_os_o_noise_level': [n_a, n_a, 1],
202 |         }
203 |         self.observation_space = update_obs_space(self, self.obs_keys_with_shapes)
204 | 
205 |     def reset(self):
206 |         self._generate_social_preferences(self.metadata['n_agents'])
207 |         self._generate_uncertainty(self.metadata['n_agents'])
208 |         self._precompute_observations(self.metadata['n_agents'])
209 |         return self.observation(self.env.reset())
210 | 
211 |     def step(self, action):
212 |         obs, rew, done, info = self.env.step(action)
213 |         rew = np.matmul(self.reward_xform_mat, rew)
214 |         return self.observation(obs), rew, done, info
215 | 
216 |     def observation(self, obs):
217 |         for k in self.obs_keys_with_shapes:
218 |             obs[k] = np.expand_dims(self.precomputed_obs[k], -1)
219 |         return obs
220 | 
221 | 
222 | def add_rew_share_observation_keys(*, keys_self: List[str],
223 |                                    keys_additional_self_vf: List[str],
224 |                                    keys_other_agents: List[str],
225 |                                    keys_additional_other_agents_vf: List[str],
226 |                                    keys_self_matrices: List[str],
227 |                                    **kwargs):
228 |     '''
229 |         Determines how keys about the relationship graph should be observed.
230 |         Args:
231 |             keys_self: keys that the agent should observe about itself
232 |             keys_additional_self_vf: keys about an agent but only that the value function should observe
233 |             keys_other_agents: keys about other agents
234 |             keys_additional_other_agents_vf: keys about other agents but only that the value function should observe
235 |             keys_self_matrices: keys that are shaped (n_agents, n_agents, X). These need to be dealth with differently
236 |     '''
237 | 
238 |     keys_self += [
239 |         'self_rew_value_noisy',
240 |         'self_rew_value_noise_level',
241 |     ]
242 |     keys_additional_self_vf.append('self_rew_value')
243 | 
244 |     keys_other_agents += [
245 |         'rew_share_so_s_noisy',
246 |         'rew_share_so_s_noise_level',
247 |         'other_rew_value_s_noisy',
248 |         'other_rew_value_s_noise_level'
249 |     ]
250 | 
251 |     other_rew_value_keys = [
252 |         'other_rew_value_s_noisy',
253 |         'other_rew_value_s_noise_level',
254 |     ]
255 | 
256 |     keys_additional_other_agents_vf += [
257 |         'rew_share_so_s',
258 |         'other_rew_value_s',
259 |         'rew_share_os_o_noisy',
260 |         'rew_share_os_o_noise_level',
261 |     ]
262 | 
263 |     keys_self_matrices += [
264 |         'other_rew_value_s',
265 |         'other_rew_value_s_noisy',
266 |         'other_rew_value_s_noise_level',
267 |         'rew_share_so_s',
268 |         'rew_share_so_s_noisy',
269 |         'rew_share_so_s_noise_level',
270 |         'rew_share_os_o',
271 |         'rew_share_os_o_noisy',
272 |         'rew_share_os_o_noise_level',
273 |     ]
274 | 
275 |     return keys_self, keys_additional_self_vf, keys_other_agents, keys_additional_other_agents_vf, keys_self_matrices
276 | 


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/rusp/wrappers_util.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | from gym.spaces import Tuple, Discrete
  4 | from scipy.linalg import circulant
  5 | from mae_envs.wrappers.util import update_obs_space
  6 | from mujoco_worldgen.util.types import store_args
  7 | 
  8 | 
  9 | class RandomizedHorizonWrapper(gym.Wrapper):
 10 |     '''
 11 |         Randomize the horizon for a game.
 12 |             A fixed horizon can be set by setting lower_lim = upper_lim.
 13 |             A randomized but finite horizon can be set by setting upper_lim > lower_lim (randomized uniformly between these bounds)
 14 |             A discounted infinite horizon can be set by setting prob_per_step_to_stop, which is the probability that
 15 |                 the episode will end on any given timestep. This is implemented by sampling the horizon from a geometric distribution.
 16 |         Args:
 17 |             lower_lim (int): Lower limit of the horizon
 18 |             upper_lim (int): Upper limit of the horizon
 19 |             prob_per_step_to_stop (float): probability that episode will end on any given timestep.
 20 | 
 21 |         Either lower and upper lim must both be set or prob_per_step_to_stop must be set.
 22 | 
 23 |         Observations:
 24 |             horizon (n_agents, 1): Episode horizon. Intended for value function
 25 |             fraction_episode_done (n_agents, 1): Fraction of the episode complete. Intended for value function
 26 |             timestep (n_agents, 1): raw timestep. Intended for policy
 27 |     '''
 28 |     @store_args
 29 |     def __init__(self, env, lower_lim=None, upper_lim=None, prob_per_step_to_stop=None):
 30 |         super().__init__(env)
 31 |         assert (lower_lim is not None and upper_lim is not None) or prob_per_step_to_stop is not None
 32 |         if prob_per_step_to_stop is not None:
 33 |             assert prob_per_step_to_stop > 0 and prob_per_step_to_stop < 1
 34 |         self.observation_space = update_obs_space(self, {
 35 |             'fraction_episode_done': [self.metadata['n_agents'], 1],
 36 |             'horizon': [self.metadata['n_agents'], 1],
 37 |             'timestep': [self.metadata['n_agents'], 1]
 38 |         })
 39 |         self.observation_space = update_obs_space(self, {})
 40 | 
 41 |     def reset(self, **kwargs):
 42 |         self._t = 0
 43 |         if self.prob_per_step_to_stop is not None:
 44 |             self._horizon = np.random.geometric(p=self.prob_per_step_to_stop)
 45 |         else:
 46 |             self._horizon = np.random.randint(self.lower_lim, self.upper_lim + 1) if self.lower_lim < self.upper_lim else self.lower_lim
 47 |         return self.observation(self.env.reset())
 48 | 
 49 |     def step(self, action):
 50 |         self._t += 1
 51 |         obs, rew, done, info = self.env.step(action)
 52 |         if self._t >= self._horizon:
 53 |             done = True
 54 |         return self.observation(obs), rew, done, info
 55 | 
 56 |     def observation(self, obs):
 57 |         obs['timestep'] = np.ones((self.metadata['n_agents'], 1), dtype=int) * self._t
 58 |         obs['fraction_episode_done'] = np.ones((self.metadata['n_agents'], 1)) * self._t / self._horizon
 59 |         obs['horizon'] = np.ones((self.metadata['n_agents'], 1)) * self._horizon
 60 |         return obs
 61 | 
 62 | 
 63 | class RandomIdentityVector(gym.Wrapper):
 64 |     '''
 65 |         Give agents a vector_dim dimension random identity sampled uniformly between 0 and 1.
 66 | 
 67 |         Args:
 68 |             vector_dim (int): Dimension of the identity vector
 69 | 
 70 |         Observations:
 71 |             agent_identity (n_agents, vector_dim): identity for each agent
 72 |     '''
 73 |     @store_args
 74 |     def __init__(self, env, vector_dim=16):
 75 |         super().__init__(env)
 76 |         self.observation_space = update_obs_space(self, {'agent_identity': [self.metadata['n_agents'], self.vector_dim]})
 77 | 
 78 |     def reset(self):
 79 |         self.agent_identities = np.random.uniform(0, 1, (self.metadata['n_agents'], self.vector_dim))
 80 |         return self.observation(self.env.reset())
 81 | 
 82 |     def observation(self, obs):
 83 |         obs['agent_identity'] = self.agent_identities
 84 |         return obs
 85 | 
 86 |     def step(self, action):
 87 |         obs, rew, done, info = self.env.step(action)
 88 |         return self.observation(obs), rew, done, info
 89 | 
 90 | 
 91 | class OtherActorAttentionAction(gym.Wrapper):
 92 |     '''
 93 |         Utility class to make actions that attend over other agents possible. Agents will likely recieve an entity
 94 |             based observation of others. The order of these entities is defined by a circulant matrix (see
 95 |             mae_envs.wrappers.multi_agent:SplitObservations). If a policy constructions an attention action head
 96 |             based on these observations we need to properly process its choice as the ordering will be different
 97 |             for every agent.
 98 | 
 99 |         This class defines a Discrete action head with number of options n_agents - 1 (attention over all other agents)
100 |             and it defines a function _get_target_actor that given the choice of a particular agent maps this back
101 |             to the true ordering of agents.
102 |         Args:
103 |             action_name (string): name of the action to create
104 |     '''
105 |     @store_args
106 |     def __init__(self, env, action_name):
107 |         super().__init__(env)
108 |         self.action_name = action_name
109 |         self.n_agents = self.metadata['n_agents']
110 |         self.action_space.spaces[action_name] = Tuple([Discrete(n=self.metadata['n_agents'] - 1)
111 |                                                        for _ in range(self.n_agents)])
112 | 
113 |         # This matches the circulant ordering used for "Others" Observations (see mae_envs.wrappers.multi_agent:SplitObservations)
114 |         self.other_actors = np.arange(self.n_agents)[circulant(np.arange(self.n_agents))[:, 1:]]
115 |         self.other_actors = dict(zip(np.arange(self.n_agents), self.other_actors))
116 | 
117 |     def _get_target_actor(self, actor, action):
118 |         '''
119 |             Return the true index of the targeted agent. Indicies given by the action will be in a rotated space defined
120 |                 based on how entities are presented to the policy, so we must map back to the underlying ordering.
121 | 
122 |             If the index is -1, this means no other agent was chosen.
123 |         '''
124 |         if action[self.action_name][actor] == -1:
125 |             return np.array([])
126 |         else:
127 |             return np.array([self.other_actors[actor][action[self.action_name][actor]]])
128 | 
129 | 
130 | class ActionOptionsWrapper(gym.Wrapper):
131 |     '''
132 |         Allows one to define a hierarchical action space by defining a meta action that chooses which
133 |             sub action head to execute. E.g. you want agents to be only able to attack OR eat.
134 | 
135 |         Args:
136 |             action_keys (list): list of action head names that will be options
137 |             defaults (dict): mapping from action_key to the value that should be passed if that action is NOT chosen.
138 |                 Downstream wrappers for this action_key will handle cases when the default is passed.
139 |             do_nothing_option (bool): If true adds the option to pick none of the available actions and pass the default
140 |                 value for all of them.
141 | 
142 |         Observations:
143 |             previous_choice (n_agents, number options): One hot observation of each agent's previous action choice.
144 |     '''
145 |     @store_args
146 |     def __init__(self, env, action_keys, defaults, do_nothing_option=True):
147 |         super().__init__(env)
148 |         if self.do_nothing_option:
149 |             self.action_keys.append('do_nothing')
150 |         self.n_agents = self.metadata['n_agents']
151 |         self.action_space.spaces['action_choose_option'] = Tuple([Discrete(n=len(self.action_keys))
152 |                                                                   for _ in range(self.metadata['n_agents'])])
153 |         self.observation_space = update_obs_space(self, {'previous_choice': [self.metadata['n_agents'], len(self.action_keys)]})
154 | 
155 |     def reset(self):
156 |         self.previous_choice = np.zeros((self.metadata['n_agents'], len(self.action_keys)))
157 |         return self.observation(self.env.reset())
158 | 
159 |     def step(self, action):
160 |         for i in range(self.n_agents):
161 |             for ac_ind, ac_name in enumerate(self.action_keys):
162 |                 if ac_ind != action['action_choose_option'][i] and ac_name != 'do_nothing':
163 |                     action[ac_name][i] = self.defaults[ac_name]
164 | 
165 |         self.previous_choice = np.eye(len(self.action_keys))[action['action_choose_option']]
166 |         obs, rew, done, info = self.env.step(action)
167 |         return self.observation(obs), rew, done, info
168 | 
169 |     def observation(self, obs):
170 |         obs['previous_choice'] = self.previous_choice
171 |         return obs
172 | 


--------------------------------------------------------------------------------
/randomized_uncertain_social_preferences/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | 
 4 | setup(
 5 |     name='rusp',
 6 |     version='0.0.0',
 7 |     packages=find_packages(),
 8 |     package_data={
 9 |         '': ['*.pyx', '*.pxd', '*.pxi', '*.h'],
10 |     })
11 | 


--------------------------------------------------------------------------------
/requirements_ma_policy.txt:
--------------------------------------------------------------------------------
1 | tensorflow==1.13.1
2 | cloudpickle==0.5.2
3 | baselines==0.1.5
4 | opencv-python>=3.4.3.18  # needed for baselines not to crash
5 | pytest==5.0.1
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from os import getenv
 2 | from os.path import dirname, realpath
 3 | from setuptools import find_packages, setup
 4 | 
 5 | 
 6 | setup(
 7 |     name='mae_envs',
 8 |     version='0.0.0',
 9 |     packages=find_packages(),
10 |     package_data={
11 |         '': ['*.pyx', '*.pxd', '*.pxi', '*.h'],
12 |     })
13 | 


--------------------------------------------------------------------------------