├── PyMARL
    ├── src
    │   ├── __init__.py
    │   ├── components
    │   │   ├── __init__.py
    │   │   ├── transforms.py
    │   │   ├── epsilon_schedules.py
    │   │   ├── standarize_stream.py
    │   │   └── action_selectors.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── mixers
    │   │   │   ├── __init__.py
    │   │   │   ├── vdn.py
    │   │   │   ├── qmix.py
    │   │   │   └── qtran.py
    │   │   ├── agents
    │   │   │   ├── __init__.py
    │   │   │   ├── rnn_agent.py
    │   │   │   └── mlp_agent.py
    │   │   └── critics
    │   │   │   ├── mlp.py
    │   │   │   ├── __init__.py
    │   │   │   ├── maddpg.py
    │   │   │   ├── ac.py
    │   │   │   ├── maddpg_ns.py
    │   │   │   ├── ac_ns.py
    │   │   │   ├── centralV.py
    │   │   │   ├── coma.py
    │   │   │   ├── centralV_ns.py
    │   │   │   └── coma_ns.py
    │   ├── controllers
    │   │   ├── __init__.py
    │   │   └── basic_controller.py
    │   ├── utils
    │   │   ├── dict2namedtuple.py
    │   │   ├── rl_utils.py
    │   │   ├── timehelper.py
    │   │   └── logging.py
    │   ├── runners
    │   │   ├── __init__.py
    │   │   └── episode_runner.py
    │   ├── learners
    │   │   ├── __init__.py
    │   │   └── q_learner.py
    │   ├── config
    │   │   ├── algs
    │   │   │   ├── vdn.yaml
    │   │   │   ├── iql.yaml
    │   │   │   ├── vdn_beta.yaml
    │   │   │   ├── iql_beta.yaml
    │   │   │   ├── qmix_beta.yaml
    │   │   │   ├── qmix.yaml
    │   │   │   ├── qtran.yaml
    │   │   │   ├── coma.yaml
    │   │   │   └── mappo.yaml
    │   │   ├── envs
    │   │   │   ├── sc2_beta.yaml
    │   │   │   ├── sc2.yaml
    │   │   │   ├── multicar.yaml
    │   │   │   ├── free.yaml
    │   │   │   └── flag.yaml
    │   │   └── default.yaml
    │   ├── envs
    │   │   ├── __init__.py
    │   │   ├── multiagentenv.py
    │   │   ├── flagenv.py
    │   │   ├── freeenv.py
    │   │   └── multicar_env.py
    │   ├── main.py
    │   └── painter.py
    ├── README.local
    └── README.md.pymarl
├── CraftEnv
    ├── src
    │   ├── __init__.py
    │   ├── craft
    │   │   ├── planner
    │   │   │   ├── __init__.py
    │   │   │   ├── smartcar_planner.py
    │   │   │   └── breadth_first_search.py
    │   │   ├── data
    │   │   │   └── urdf
    │   │   │   │   ├── block
    │   │   │   │       ├── meshes
    │   │   │   │       │   └── base_link.STL
    │   │   │   │       └── block.urdf
    │   │   │   │   ├── flag
    │   │   │   │       ├── meshes
    │   │   │   │       │   └── base_link.STL
    │   │   │   │       └── block.urdf
    │   │   │   │   ├── goal
    │   │   │   │       ├── meshes
    │   │   │   │       │   └── base_link.STL
    │   │   │   │       └── block.urdf
    │   │   │   │   ├── slope
    │   │   │   │       ├── meshes
    │   │   │   │       │   ├── slope_end.STL
    │   │   │   │       │   ├── slope_base.STL
    │   │   │   │       │   ├── slope_end_collision.STL
    │   │   │   │       │   └── slope_base_collision.STL
    │   │   │   │       └── slope.urdf.xacro
    │   │   │   │   ├── smartcar
    │   │   │   │       ├── meshes
    │   │   │   │       │   ├── board.STL
    │   │   │   │       │   ├── wheel.STL
    │   │   │   │       │   └── body_link.STL
    │   │   │   │       └── smartcar.urdf.xacro
    │   │   │   │   └── wall
    │   │   │   │       ├── meshes
    │   │   │   │           └── base_link.STL
    │   │   │   │       └── block.urdf
    │   │   ├── blueprint
    │   │   │   ├── block_shaped_goal.yaml
    │   │   │   ├── strip_shaped_goal.yaml
    │   │   │   ├── simple_twolayer_goal.yaml
    │   │   │   ├── free_building_goal.yaml
    │   │   │   ├── breaking_barrier_goal.yaml
    │   │   │   ├── complex_twolayer_goal.yaml
    │   │   │   ├── simple_twolayer_init.yaml
    │   │   │   ├── block_shaped_init.yaml
    │   │   │   ├── strip_shaped_init.yaml
    │   │   │   ├── breaking_barrier_init.yaml
    │   │   │   ├── free_building_init.yaml
    │   │   │   └── complex_twolayer_init.yaml
    │   │   ├── __init__.py
    │   │   ├── goal.py
    │   │   ├── bullet_goal.py
    │   │   ├── bullet_flag.py
    │   │   ├── bullet_smartcar.py
    │   │   ├── bullet_slope.py
    │   │   ├── bullet_wall.py
    │   │   ├── action_enum.py
    │   │   ├── bullet_block.py
    │   │   ├── bread_first_search.py
    │   │   ├── grid_objs.py
    │   │   ├── blackboard.py
    │   │   ├── flag_env.py
    │   │   ├── free_env.py
    │   │   ├── matrix_env.py
    │   │   ├── matrix_to_bullet.py
    │   │   ├── utils.py
    │   │   └── scene.py
    │   └── create_pybullet_envs.py
    └── setup.py
├── .code.yml
├── LICENSE.md
├── README.md
└── environment.yaml


/PyMARL/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/PyMARL/src/components/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/mixers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/CraftEnv/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .create_pybullet_envs import *
2 | 


--------------------------------------------------------------------------------
/.code.yml:
--------------------------------------------------------------------------------
1 | source:
2 |   third_party_source:
3 |     filepath_regex:
4 |       - /PyMARL/.*
5 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/planner/__init__.py:
--------------------------------------------------------------------------------
1 | from .a_star import AStarPlanner
2 | from .breadth_first_search import BreadthFirstSearch
3 | 


--------------------------------------------------------------------------------
/PyMARL/src/controllers/__init__.py:
--------------------------------------------------------------------------------
1 | REGISTRY = {}
2 | 
3 | from .basic_controller import BasicMAC
4 | 
5 | REGISTRY["basic_mac"] = BasicMAC


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/block/meshes/base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/block/meshes/base_link.STL


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/flag/meshes/base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/flag/meshes/base_link.STL


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/goal/meshes/base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/goal/meshes/base_link.STL


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/slope/meshes/slope_end.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/slope/meshes/slope_end.STL


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/smartcar/meshes/board.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/smartcar/meshes/board.STL


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/smartcar/meshes/wheel.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/smartcar/meshes/wheel.STL


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/wall/meshes/base_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/wall/meshes/base_link.STL


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/slope/meshes/slope_base.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/slope/meshes/slope_base.STL


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/smartcar/meshes/body_link.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/smartcar/meshes/body_link.STL


--------------------------------------------------------------------------------
/PyMARL/src/utils/dict2namedtuple.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 | 
3 | 
4 | def convert(dictionary):
5 |     return namedtuple('GenericDict', dictionary.keys())(**dictionary)
6 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/slope/meshes/slope_end_collision.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/slope/meshes/slope_end_collision.STL


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/block_shaped_goal.yaml:
--------------------------------------------------------------------------------
1 | block:
2 |   - {id: 0,  x: 2, y: 1, z: 1}
3 |   - {id: 1,  x: 2, y: 2, z: 1}
4 |   - {id: 2,  x: 3, y: 1, z: 1}
5 |   - {id: 3,  x: 3, y: 2, z: 1}


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/strip_shaped_goal.yaml:
--------------------------------------------------------------------------------
1 | block:
2 |   - {id: 0,  x: 3, y: 0, z: 1}
3 |   - {id: 1,  x: 3, y: 1, z: 1}
4 |   - {id: 2,  x: 3, y: 2, z: 1}
5 |   - {id: 3,  x: 3, y: 3, z: 1}


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/slope/meshes/slope_base_collision.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/slope/meshes/slope_base_collision.STL


--------------------------------------------------------------------------------
/PyMARL/src/modules/agents/__init__.py:
--------------------------------------------------------------------------------
1 | REGISTRY = {}
2 | 
3 | from .rnn_agent import RNNAgent
4 | REGISTRY["rnn"] = RNNAgent
5 | 
6 | from .mlp_agent import MLPAgent
7 | REGISTRY["mlp"] = MLPAgent


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/simple_twolayer_goal.yaml:
--------------------------------------------------------------------------------
1 | block:
2 |   - {id: 0,  x: 2, y: 3, z: 1}
3 |   - {id: 1,  x: 2, y: 3, z: 2}
4 | unfolded_body:
5 |   - {id: 0,  x: 2, y: 2, z: 1}
6 | unfolded_foot:
7 |   - {id: 0,  x: 2, y: 1, z: 1}


--------------------------------------------------------------------------------
/PyMARL/src/runners/__init__.py:
--------------------------------------------------------------------------------
1 | REGISTRY = {}
2 | 
3 | from .episode_runner import EpisodeRunner
4 | REGISTRY["episode"] = EpisodeRunner
5 | 
6 | from .parallel_runner import ParallelRunner
7 | REGISTRY["parallel"] = ParallelRunner
8 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def get_data_path():
 5 |     return os.path.join(os.path.dirname(__file__), "data")
 6 | 
 7 | 
 8 | def get_urdf_path():
 9 |     return os.path.join(get_data_path(), "urdf")
10 | 


--------------------------------------------------------------------------------
/PyMARL/README.local:
--------------------------------------------------------------------------------
1 | PyMARL git commit: c971afdceb34635d31b778021b0ef90d7af51e86
2 | EPyMARL git commit: f355a55262ac9afecdb53368fec6337c549cc160
3 | We merged the MAPPO implementation of EPyMARL into PyMARL.
4 | We also made modifications to connect CraftEnv environment to the PyMARL framework.
5 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/mixers/vdn.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class VDNMixer(nn.Module):
 6 |     def __init__(self):
 7 |         super(VDNMixer, self).__init__()
 8 | 
 9 |     def forward(self, agent_qs, batch):
10 |         return th.sum(agent_qs, dim=2, keepdim=True)


--------------------------------------------------------------------------------
/CraftEnv/src/craft/goal.py:
--------------------------------------------------------------------------------
 1 | class Goal:
 2 |     def __init__(self, blackboard):
 3 |         self._blackboard = blackboard
 4 |         self.reset()
 5 | 
 6 |     def reset(self):
 7 |         self.template = self._blackboard.template
 8 |         self.x = self.template["goal"][0]["x"]
 9 |         self.y = self.template["goal"][0]["y"]
10 |         self.z = self.template["goal"][0]["z"]
11 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/free_building_goal.yaml:
--------------------------------------------------------------------------------
 1 | block:
 2 |   - {id: 0,  x: 3, y: 1, z: 1}
 3 |   - {id: 1,  x: 3, y: 2, z: 1}
 4 |   - {id: 2,  x: 4, y: 1, z: 1}
 5 |   - {id: 3,  x: 4, y: 2, z: 1}
 6 | unfolded_body:
 7 |   - {id: 0,  x: 2, y: 1, z: 1}
 8 |   - {id: 1,  x: 2, y: 2, z: 1}
 9 | unfolded_foot:
10 |   - {id: 0,  x: 1, y: 1, z: 1}
11 |   - {id: 1,  x: 1, y: 2, z: 1}
12 | # no use


--------------------------------------------------------------------------------
/PyMARL/src/learners/__init__.py:
--------------------------------------------------------------------------------
 1 | from .q_learner import QLearner
 2 | from .coma_learner import COMALearner
 3 | from .qtran_learner import QLearner as QTranLearner
 4 | from .ppo_learner import PPOLearner
 5 | 
 6 | REGISTRY = {}
 7 | 
 8 | REGISTRY["q_learner"] = QLearner
 9 | REGISTRY["coma_learner"] = COMALearner
10 | REGISTRY["qtran_learner"] = QTranLearner
11 | REGISTRY["ppo_learner"] = PPOLearner


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/breaking_barrier_goal.yaml:
--------------------------------------------------------------------------------
 1 | block:
 2 |   - {id: 0,  x: 3, y: 1, z: 1}
 3 |   - {id: 1,  x: 3, y: 2, z: 1}
 4 |   - {id: 2,  x: 4, y: 1, z: 1}
 5 |   - {id: 3,  x: 4, y: 2, z: 1}
 6 |   - {id: 4,  x: 3, y: 1, z: 2}
 7 | unfolded_body:
 8 |   - {id: 0,  x: 2, y: 1, z: 1}
 9 |   - {id: 1,  x: 2, y: 2, z: 1}
10 | unfolded_foot:
11 |   - {id: 0,  x: 1, y: 1, z: 1}
12 |   - {id: 1,  x: 1, y: 2, z: 1}
13 | # no use


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/complex_twolayer_goal.yaml:
--------------------------------------------------------------------------------
 1 | block:
 2 |   - {id: 0,  x: 3, y: 1, z: 1}
 3 |   - {id: 1,  x: 3, y: 2, z: 1}
 4 |   - {id: 2,  x: 4, y: 1, z: 1}
 5 |   - {id: 3,  x: 4, y: 2, z: 1}
 6 |   - {id: 4,  x: 3, y: 1, z: 2}
 7 |   - {id: 5,  x: 4, y: 2, z: 2}
 8 | unfolded_body:
 9 |   - {id: 0,  x: 2, y: 1, z: 1}
10 |   - {id: 1,  x: 2, y: 2, z: 1}
11 | unfolded_foot:
12 |   - {id: 0,  x: 1, y: 1, z: 1}
13 |   - {id: 1,  x: 1, y: 2, z: 1}
14 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/algs/vdn.yaml:
--------------------------------------------------------------------------------
 1 | # --- VDN specific parameters ---
 2 | 
 3 | # use epsilon greedy action selector
 4 | action_selector: "epsilon_greedy"
 5 | epsilon_start: 0.21 # 1.0
 6 | epsilon_finish: 0.2 # 0.05
 7 | epsilon_anneal_time: 1 # 50000
 8 | 
 9 | runner: "episode"
10 | 
11 | buffer_size: 5000
12 | 
13 | # update the target network every {} episodes
14 | target_update_interval: 200
15 | 
16 | # use the Q_Learner to train
17 | agent_output_type: "q"
18 | learner: "q_learner"
19 | double_q: True
20 | mixer: "vdn"
21 | 
22 | name: "vdn"
23 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/algs/iql.yaml:
--------------------------------------------------------------------------------
 1 | # --- QMIX specific parameters ---
 2 | 
 3 | # use epsilon greedy action selector
 4 | action_selector: "epsilon_greedy"
 5 | epsilon_start: 0.21 # 1.0
 6 | epsilon_finish: 0.2 # 0.05
 7 | epsilon_anneal_time: 1 # 50000
 8 | 
 9 | runner: "episode"
10 | 
11 | buffer_size: 5000
12 | 
13 | # update the target network every {} episodes
14 | target_update_interval: 200
15 | 
16 | # use the Q_Learner to train
17 | agent_output_type: "q"
18 | learner: "q_learner"
19 | double_q: True
20 | mixer: # Mixer becomes None
21 | 
22 | name: "iql"
23 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/algs/vdn_beta.yaml:
--------------------------------------------------------------------------------
 1 | # --- VDN specific parameters ---
 2 | 
 3 | # use epsilon greedy action selector
 4 | action_selector: "epsilon_greedy"
 5 | epsilon_start: 1.0
 6 | epsilon_finish: 0.05
 7 | epsilon_anneal_time: 50000
 8 | 
 9 | runner: "parallel"
10 | batch_size_run: 8
11 | 
12 | buffer_size: 5000
13 | 
14 | # update the target network every {} episodes
15 | target_update_interval: 200
16 | 
17 | # use the Q_Learner to train
18 | agent_output_type: "q"
19 | learner: "q_learner"
20 | double_q: True
21 | mixer: "vdn"
22 | 
23 | name: "vdn_smac_parallel"
24 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/algs/iql_beta.yaml:
--------------------------------------------------------------------------------
 1 | # --- IQL specific parameters ---
 2 | 
 3 | # use epsilon greedy action selector
 4 | action_selector: "epsilon_greedy"
 5 | epsilon_start: 1.0
 6 | epsilon_finish: 0.05
 7 | epsilon_anneal_time: 50000
 8 | 
 9 | runner: "parallel"
10 | batch_size_run: 8
11 | 
12 | buffer_size: 5000
13 | 
14 | # update the target network every {} episodes
15 | target_update_interval: 200
16 | 
17 | # use the Q_Learner to train
18 | agent_output_type: "q"
19 | learner: "q_learner"
20 | double_q: True
21 | mixer: # Mixer becomes None
22 | 
23 | name: "iql_smac_parallel"


--------------------------------------------------------------------------------
/PyMARL/src/modules/critics/mlp.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class MLP(nn.Module):
 7 |     def __init__(self, input_shape, hidden_dim, output_dim):
 8 |         super(MLP, self).__init__()
 9 |         self.fc1 = nn.Linear(input_shape, hidden_dim)
10 |         self.fc2 = nn.Linear(hidden_dim, hidden_dim)
11 |         self.fc3 = nn.Linear(hidden_dim, output_dim)
12 | 
13 |     def forward(self, inputs):
14 |         x = F.relu(self.fc1(inputs))
15 |         x = F.relu(self.fc2(x))
16 |         q = self.fc3(x)
17 |         return q


--------------------------------------------------------------------------------
/PyMARL/src/config/algs/qmix_beta.yaml:
--------------------------------------------------------------------------------
 1 | # --- QMIX specific parameters ---
 2 | 
 3 | # use epsilon greedy action selector
 4 | action_selector: "epsilon_greedy"
 5 | epsilon_start: 1.0
 6 | epsilon_finish: 0.05
 7 | epsilon_anneal_time: 50000
 8 | 
 9 | runner: "parallel"
10 | batch_size_run: 8
11 | 
12 | buffer_size: 5000
13 | 
14 | # update the target network every {} episodes
15 | target_update_interval: 200
16 | 
17 | # use the Q_Learner to train
18 | agent_output_type: "q"
19 | learner: "q_learner"
20 | double_q: True
21 | mixer: "qmix"
22 | mixing_embed_dim: 32
23 | 
24 | name: "qmix_smac_parallel"
25 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/algs/qmix.yaml:
--------------------------------------------------------------------------------
 1 | # --- QMIX specific parameters ---
 2 | 
 3 | # use epsilon greedy action selector
 4 | action_selector: "epsilon_greedy"
 5 | epsilon_start: 0.21 # 1
 6 | epsilon_finish: 0.2 # 0/05
 7 | epsilon_anneal_time: 1 # 500000
 8 | 
 9 | runner: "episode"
10 | 
11 | buffer_size: 5000
12 | 
13 | # update the target network every {} episodes
14 | target_update_interval: 200
15 | 
16 | # use the Q_Learner to train
17 | agent_output_type: "q"
18 | learner: "q_learner"
19 | double_q: True
20 | mixer: "qmix"
21 | mixing_embed_dim: 32
22 | hypernet_layers: 2
23 | hypernet_embed: 64
24 | 
25 | name: "qmix"
26 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/simple_twolayer_init.yaml:
--------------------------------------------------------------------------------
 1 | area_length: 4
 2 | area_width: 4
 3 | area_height: 3
 4 | block_num: 2
 5 | slope_num: 1
 6 | smartcar_num: 2
 7 | legged_robot_num: 2
 8 | 
 9 | block:
10 |   - {id: 0,  x: 1, y: 0, z: 1}
11 |   - {id: 1,  x: 1, y: 2, z: 1}
12 | smartcar:
13 |   - {id: 0, x: 0,  y: 2,   z: 1, yaw: 0}
14 |   - {id: 1, x: 2,  y: 0,   z: 1, yaw: 0}
15 | fold_slope:
16 |   - {id: 0, x: 2,  y: 2,   z: 1, yaw: 3}
17 | flag:
18 |   - {id: 0, x: -1, y: -2, z: 1}
19 | goal:
20 |   - {id: 0, x: -1, y: -2, z: 1}
21 | legged_robot:
22 |   - {id: 0, x: -1,  y: 0, z: 0.335, yaw: 1}
23 |   - {id: 1, x: -1,  y: -3, z: 0.335, yaw: 0}
24 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/block_shaped_init.yaml:
--------------------------------------------------------------------------------
 1 | area_length: 4
 2 | area_width: 4
 3 | area_height: 3
 4 | block_num: 4
 5 | slope_num: 0
 6 | smartcar_num: 2
 7 | legged_robot_num: 2
 8 | 
 9 | block:
10 |   - {id: 0,  x: 1, y: 1, z: 1}
11 |   - {id: 1,  x: 1, y: 2, z: 1}
12 |   - {id: 2,  x: 3, y: 0, z: 1}
13 |   - {id: 3,  x: 3, y: 3, z: 1}
14 | smartcar:
15 |   - {id: 0, x: 1,  y: 0,   z: 1, yaw: 0}
16 |   - {id: 1, x: 1,  y: 3,   z: 1, yaw: 0}
17 | 
18 | flag:
19 |   - {id: 0, x: -1, y: -2, z: 1}
20 | goal:
21 |   - {id: 0, x: -1, y: -2, z: 1}
22 | legged_robot:
23 |   - {id: 0, x: -1,  y: 0, z: 0.335, yaw: 1}
24 |   - {id: 1, x: -1,  y: -3, z: 0.335, yaw: 0}
25 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/strip_shaped_init.yaml:
--------------------------------------------------------------------------------
 1 | area_length: 4
 2 | area_width: 4
 3 | area_height: 3
 4 | block_num: 4
 5 | slope_num: 0
 6 | smartcar_num: 2
 7 | legged_robot_num: 2
 8 | 
 9 | block:
10 |   - {id: 0,  x: 1, y: 0, z: 1}
11 |   - {id: 1,  x: 2, y: 1, z: 1}
12 |   - {id: 2,  x: 1, y: 2, z: 1}
13 |   - {id: 3,  x: 2, y: 3, z: 1}
14 | smartcar:
15 |   - {id: 0, x: 1,  y: 1,   z: 1, yaw: 0}
16 |   - {id: 1, x: 1,  y: 3,   z: 1, yaw: 0}
17 | 
18 | flag:
19 |   - {id: 0, x: -1, y: -2, z: 1}
20 | goal:
21 |   - {id: 0, x: -1, y: -2, z: 1}
22 | legged_robot:
23 |   - {id: 0, x: -1,  y: 0, z: 0.335, yaw: 1}
24 |   - {id: 1, x: -1,  y: -3, z: 0.335, yaw: 0}
25 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/algs/qtran.yaml:
--------------------------------------------------------------------------------
 1 | # --- QMIX specific parameters ---
 2 | 
 3 | # use epsilon greedy action selector
 4 | action_selector: "epsilon_greedy"
 5 | epsilon_start: 0.21 #1.0
 6 | epsilon_finish: 0.2 #0.05
 7 | epsilon_anneal_time: 1 #50000
 8 | 
 9 | runner: "episode"
10 | 
11 | buffer_size: 5000
12 | 
13 | # update the target network every {} episodes
14 | target_update_interval: 200
15 | 
16 | # use the Q_Learner to train
17 | agent_output_type: "q"
18 | learner: "qtran_learner"
19 | double_q: True
20 | mixer: "qtran_base"
21 | mixing_embed_dim: 64
22 | qtran_arch: "qtran_paper"
23 | 
24 | opt_loss: 1
25 | nopt_min_loss: 0.1
26 | 
27 | network_size: small
28 | 
29 | name: "qtran"
30 | 


--------------------------------------------------------------------------------
/PyMARL/src/components/transforms.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | 
 3 | 
 4 | class Transform:
 5 |     def transform(self, tensor):
 6 |         raise NotImplementedError
 7 | 
 8 |     def infer_output_info(self, vshape_in, dtype_in):
 9 |         raise NotImplementedError
10 | 
11 | 
12 | class OneHot(Transform):
13 |     def __init__(self, out_dim):
14 |         self.out_dim = out_dim
15 | 
16 |     def transform(self, tensor):
17 |         y_onehot = tensor.new(*tensor.shape[:-1], self.out_dim).zero_()
18 |         y_onehot.scatter_(-1, tensor.long(), 1)
19 |         return y_onehot.float()
20 | 
21 |     def infer_output_info(self, vshape_in, dtype_in):
22 |         return (self.out_dim,), th.float32


--------------------------------------------------------------------------------
/PyMARL/src/modules/critics/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coma import COMACritic
 2 | from .centralV import CentralVCritic
 3 | from .coma_ns import COMACriticNS
 4 | from .centralV_ns import CentralVCriticNS
 5 | from .maddpg import MADDPGCritic
 6 | from .maddpg_ns import MADDPGCriticNS
 7 | from .ac import ACCritic
 8 | from .ac_ns import ACCriticNS
 9 | REGISTRY = {}
10 | 
11 | REGISTRY["coma_critic"] = COMACritic
12 | REGISTRY["cv_critic"] = CentralVCritic
13 | REGISTRY["coma_critic_ns"] = COMACriticNS
14 | REGISTRY["cv_critic_ns"] = CentralVCriticNS
15 | REGISTRY["maddpg_critic"] = MADDPGCritic
16 | REGISTRY["maddpg_critic_ns"] = MADDPGCriticNS
17 | REGISTRY["ac_critic"] = ACCritic
18 | REGISTRY["ac_critic_ns"] = ACCriticNS


--------------------------------------------------------------------------------
/PyMARL/src/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | # from smac.env import MultiAgentEnv, StarCraft2Env
 3 | from .multiagentenv import MultiAgentEnv
 4 | from .multicar_env import MultiCarEnv
 5 | from .flagenv import FlagEnv
 6 | from .freeenv import FreeEnv
 7 | import sys
 8 | import os
 9 | 
10 | def env_fn(env, **kwargs) -> MultiAgentEnv:
11 |     return env(**kwargs)
12 | 
13 | REGISTRY = {}
14 | # REGISTRY["sc2"] = partial(env_fn, env=StarCraft2Env)
15 | REGISTRY["multicar"] = partial(env_fn, env=MultiCarEnv)
16 | REGISTRY["flag"] = partial(env_fn, env=FlagEnv)
17 | REGISTRY["free"] = partial(env_fn, env=FreeEnv)
18 | 
19 | # if sys.platform == "linux":
20 | #     os.environ.setdefault("SC2PATH",
21 | #                           os.path.join(os.getcwd(), "3rdparty", "StarCraftII"))
22 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/breaking_barrier_init.yaml:
--------------------------------------------------------------------------------
 1 | area_length: 5
 2 | area_width: 5
 3 | area_height: 2
 4 | block_num: 8
 5 | slope_num: 0
 6 | smartcar_num: 2
 7 | legged_robot_num: 2
 8 | 
 9 | block:
10 |   - {id: 0,  x: 0, y: 1, z: 1}
11 |   - {id: 1,  x: 0, y: 3, z: 1}
12 |   - {id: 2,  x: 1, y: 2, z: 1}
13 |   - {id: 3,  x: 3, y: 0, z: 1}
14 |   - {id: 4,  x: 3, y: 1, z: 1}
15 |   - {id: 5,  x: 3, y: 2, z: 1}
16 |   - {id: 6,  x: 3, y: 3, z: 1}
17 |   - {id: 7,  x: 3, y: 4, z: 1}
18 | smartcar:
19 |   - {id: 0, x: 0,  y: 2,   z: 1, yaw: 0}
20 |   - {id: 1, x: 4,  y: 3,   z: 1, yaw: 0}
21 | flag:
22 |   - {id: 0, x: 4,  y: 1, z: 1}
23 | goal:
24 |   - {id: 0, x: 0,  y: 2, z: 1}
25 | legged_robot:
26 |   - {id: 0, x: -1,  y: 0, z: 0.335, yaw: 1}
27 |   - {id: 1, x: -1,  y: -3, z: 0.335, yaw: 0}
28 | 


--------------------------------------------------------------------------------
/CraftEnv/setup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from setuptools import setup, find_packages
 6 | 
 7 | setup(
 8 |     name="craftenv",
 9 |     version="0.1",
10 |     description="The CraftEnv MARL environment for CRC",
11 |     keywords="Robotics, Reinforcement Learning",
12 |     package_dir={"": "src"},
13 |     packages=find_packages(where="src"),
14 |     python_requires=">=3.5, <4",
15 |     install_requires=[
16 |         "gym",
17 |         "numpy",
18 |         "scipy",
19 |         "wheel",
20 |         "pybullet",
21 |         "absl-py",
22 |         "mpi4py",
23 |         "torch",
24 |         "scipy",
25 |         "cloudpickle",
26 |         "pandas",
27 |         "matplotlib"
28 |     ],
29 | )
30 | 
31 | print(find_packages(where="src"))
32 | 


--------------------------------------------------------------------------------
/PyMARL/src/utils/rl_utils.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | 
 3 | 
 4 | def build_td_lambda_targets(rewards, terminated, mask, target_qs, n_agents, gamma, td_lambda):
 5 |     # Assumes  <target_qs > in B*T*A and <reward >, <terminated >, <mask > in (at least) B*T-1*1
 6 |     # Initialise  last  lambda -return  for  not  terminated  episodes
 7 |     ret = target_qs.new_zeros(*target_qs.shape)
 8 |     ret[:, -1] = target_qs[:, -1] * (1 - th.sum(terminated, dim=1))
 9 |     # Backwards  recursive  update  of the "forward  view"
10 |     for t in range(ret.shape[1] - 2, -1,  -1):
11 |         ret[:, t] = td_lambda * gamma * ret[:, t + 1] + mask[:, t] \
12 |                     * (rewards[:, t] + (1 - td_lambda) * gamma * target_qs[:, t + 1] * (1 - terminated[:, t]))
13 |     # Returns lambda-return from t=0 to t=T-1, i.e. in B*T-1*A
14 |     return ret[:, 0:-1]
15 | 
16 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/free_building_init.yaml:
--------------------------------------------------------------------------------
 1 | area_length: 5
 2 | area_width: 5
 3 | area_height: 2
 4 | block_num: 10
 5 | slope_num: 0
 6 | smartcar_num: 2
 7 | legged_robot_num: 2
 8 | 
 9 | block:
10 |   - {id: 0,  x: 0, y: 1, z: 1}
11 |   - {id: 1,  x: 0, y: 3, z: 1}
12 |   - {id: 2,  x: 1, y: 0, z: 1}
13 |   - {id: 3,  x: 1, y: 2, z: 1}
14 |   - {id: 4,  x: 1, y: 4, z: 1}
15 |   - {id: 5,  x: 3, y: 0, z: 1}
16 |   - {id: 6,  x: 3, y: 2, z: 1}
17 |   - {id: 7,  x: 3, y: 4, z: 1}
18 |   - {id: 8,  x: 4, y: 1, z: 1}
19 |   - {id: 9,  x: 4, y: 3, z: 1}
20 | smartcar:
21 |   - {id: 0, x: 2,  y: 1,   z: 1, yaw: 0}
22 |   - {id: 1, x: 2,  y: 3,   z: 1, yaw: 0}
23 | flag:
24 |   - {id: 0, x: -1,  y: -2, z: 1}
25 | goal:
26 |   - {id: 0, x: -1,  y: -2, z: 1}
27 | legged_robot:
28 |   - {id: 0, x: -1,  y: 0, z: 0.335, yaw: 1}
29 |   - {id: 1, x: -1,  y: -3, z: 0.335, yaw: 0}
30 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/agents/rnn_agent.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class RNNAgent(nn.Module):
 6 |     def __init__(self, input_shape, args):
 7 |         super(RNNAgent, self).__init__()
 8 |         self.args = args
 9 | 
10 |         self.fc1 = nn.Linear(input_shape, args.rnn_hidden_dim)
11 |         self.rnn = nn.GRUCell(args.rnn_hidden_dim, args.rnn_hidden_dim)
12 |         self.fc2 = nn.Linear(args.rnn_hidden_dim, args.n_actions)
13 | 
14 |     def init_hidden(self):
15 |         # make hidden states on same device as model
16 |         return self.fc1.weight.new(1, self.args.rnn_hidden_dim).zero_()
17 | 
18 |     def forward(self, inputs, hidden_state):
19 |         x = F.relu(self.fc1(inputs))
20 |         h_in = hidden_state.reshape(-1, self.args.rnn_hidden_dim)
21 |         h = self.rnn(x, h_in)
22 |         q = self.fc2(h)
23 |         return q, h
24 | 


--------------------------------------------------------------------------------
/PyMARL/src/components/epsilon_schedules.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class DecayThenFlatSchedule():
 5 | 
 6 |     def __init__(self,
 7 |                  start,
 8 |                  finish,
 9 |                  time_length,
10 |                  decay="exp"):
11 | 
12 |         self.start = start
13 |         self.finish = finish
14 |         self.time_length = time_length
15 |         self.delta = (self.start - self.finish) / self.time_length
16 |         self.decay = decay
17 | 
18 |         if self.decay in ["exp"]:
19 |             self.exp_scaling = (-1) * self.time_length / np.log(self.finish) if self.finish > 0 else 1
20 | 
21 |     def eval(self, T):
22 |         if self.decay in ["linear"]:
23 |             return max(self.finish, self.start - self.delta * T)
24 |         elif self.decay in ["exp"]:
25 |             return min(self.start, max(self.finish, np.exp(- T / self.exp_scaling)))
26 |     pass
27 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/algs/coma.yaml:
--------------------------------------------------------------------------------
 1 | # --- COMA specific parameters ---
 2 | 
 3 | action_selector: "multinomial"
 4 | epsilon_start: 0.21 #.5
 5 | epsilon_finish: 0.2 # .01
 6 | epsilon_anneal_time: 1 # 100000
 7 | mask_before_softmax: False
 8 | 
 9 | runner: "episode"
10 | 
11 | buffer_size: 5000 # 8 # size of the replay buffer
12 | # batch_size_run: 1 # number of environments to run in parallel
13 | # batch_size: 32 # 8   # batch size
14 | 
15 | env_args:
16 |   state_last_action: False # critic adds last action internally
17 | 
18 | # update the target network every {} training steps
19 | target_update_interval: 200
20 | 
21 | # lr: 0.0005
22 | # critic_lr: 0.0005
23 | td_lambda: 0.8
24 | 
25 | # use COMA
26 | agent_output_type: "pi_logits"
27 | learner: "coma_learner"
28 | critic_q_fn: "coma"
29 | critic_baseline_fn: "coma"
30 | critic_train_mode: "seq"
31 | critic_train_reps: 1
32 | q_nstep: 0  # 0 corresponds to default Q, 1 is r + gamma*Q, etc
33 | 
34 | name: "coma"
35 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/envs/sc2_beta.yaml:
--------------------------------------------------------------------------------
 1 | env: sc2
 2 | 
 3 | env_args:
 4 |   continuing_episode: False
 5 |   difficulty: "7"
 6 |   game_version: null
 7 |   map_name: "3m"
 8 |   move_amount: 2
 9 |   obs_all_health: True
10 |   obs_instead_of_state: False
11 |   obs_last_action: False
12 |   obs_own_health: True
13 |   obs_pathing_grid: False
14 |   obs_terrain_height: False
15 |   obs_timestep_number: False
16 |   reward_death_value: 10
17 |   reward_defeat: 0
18 |   reward_negative_scale: 0.5
19 |   reward_only_positive: True
20 |   reward_scale: True
21 |   reward_scale_rate: 20
22 |   reward_sparse: False
23 |   reward_win: 200
24 |   replay_dir: ""
25 |   replay_prefix: ""
26 |   state_last_action: True
27 |   state_timestep_number: False
28 |   step_mul: 8
29 |   seed: null
30 |   heuristic_ai: False
31 |   debug: False
32 | 
33 | learner_log_interval: 20000
34 | log_interval: 20000
35 | runner_log_interval: 20000
36 | t_max: 10050000
37 | test_interval: 20000
38 | test_nepisode: 24
39 | test_greedy: True
40 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/envs/sc2.yaml:
--------------------------------------------------------------------------------
 1 | env: sc2
 2 | 
 3 | env_args:
 4 |   continuing_episode: False
 5 |   difficulty: "7"
 6 |   game_version: null
 7 |   map_name: "3m"
 8 |   move_amount: 2
 9 |   obs_all_health: True
10 |   obs_instead_of_state: False
11 |   obs_last_action: False
12 |   obs_own_health: True
13 |   obs_pathing_grid: False
14 |   obs_terrain_height: False
15 |   obs_timestep_number: False
16 |   reward_death_value: 10
17 |   reward_defeat: 0
18 |   reward_negative_scale: 0.5
19 |   reward_only_positive: True
20 |   reward_scale: True
21 |   reward_scale_rate: 20
22 |   reward_sparse: False
23 |   reward_win: 200
24 |   replay_dir: ""
25 |   replay_prefix: ""
26 |   state_last_action: True
27 |   state_timestep_number: False
28 |   step_mul: 8
29 |   seed: null
30 |   heuristic_ai: False
31 |   heuristic_rest: False
32 |   debug: False
33 | 
34 | test_greedy: True
35 | test_nepisode: 32
36 | test_interval: 10000
37 | log_interval: 10000
38 | runner_log_interval: 10000
39 | learner_log_interval: 10000
40 | t_max: 2050000
41 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/algs/mappo.yaml:
--------------------------------------------------------------------------------
 1 | # --- MAPPO specific parameters ---
 2 | 
 3 | # action_selector: "soft_policies"
 4 | # mask_before_softmax: True
 5 | action_selector: "epsilon_greedy"
 6 | epsilon_start: 0.21 # 1
 7 | epsilon_finish: 0.2 # 0/05
 8 | epsilon_anneal_time: 1 # 500000
 9 | 
10 | runner: "episode"
11 | 
12 | buffer_size: 5000 # 10
13 | # batch_size_run: 1 # 10
14 | # batch_size: 32 # 8 # 10
15 | 
16 | env_args:
17 |   state_last_action: False # critic adds last action internally
18 | 
19 | # update the target network every {} training steps
20 | target_update_interval_or_tau: 200
21 | 
22 | lr: 0.0005
23 | 
24 | obs_agent_id: True
25 | obs_last_action: False
26 | obs_individual_obs: False
27 | 
28 | agent_output_type: "pi_logits"
29 | learner: "ppo_learner"
30 | entropy_coef: 0.01
31 | use_rnn: False
32 | standardise_returns: True
33 | standardise_rewards: False
34 | q_nstep: 5 # 1 corresponds to normal r + gammaV
35 | critic_type: "cv_critic"
36 | epochs: 4
37 | eps_clip: 0.2
38 | name: "mappo"
39 | 
40 | # t_max: 20050000
41 | hidden_dim: 64
42 | add_value_last_step: True


--------------------------------------------------------------------------------
/CraftEnv/src/craft/bullet_goal.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from craft import get_urdf_path
 3 | 
 4 | 
 5 | class BulletGoal:
 6 |     def __init__(self, bullet_client):
 7 |         self._bullet_client = bullet_client
 8 |         self.init_pose = [0.0, 0.0, 1.0]
 9 |         self.init_quat = [0.0, 0.0, 0.0, 1.0]
10 |         self._init_model(self.init_pose, self.init_quat)
11 | 
12 |     def _init_model(self, init_pose, init_quat):
13 |         robot_path = os.path.join(get_urdf_path(), "goal/block.urdf")
14 |         self.robot_id = self._bullet_client.loadURDF(
15 |             robot_path,
16 |             init_pose,
17 |             init_quat,
18 |             flags=(self._bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES),
19 |             useFixedBase=True,
20 |         )
21 |         for i in range(-1, self._bullet_client.getNumJoints(self.robot_id)):
22 |             self._bullet_client.setCollisionFilterGroupMask(
23 |                 self.robot_id, i, collisionFilterGroup=0, collisionFilterMask=0
24 |             )
25 |         self._bullet_client.changeVisualShape(self.robot_id, -1, rgbaColor=[0, 1, 0, 1])
26 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (C) 2023 THL A29 Limited 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/envs/multicar.yaml:
--------------------------------------------------------------------------------
 1 | env: multicar
 2 | 
 3 | env_args:
 4 |   arena_id: 'Craft-v0'
 5 |   init_blueprint_path: '/home/xliu/craft/CraftEnv/src/craft/blueprint/strip_shaped_init.yaml'
 6 |   design_path: '/home/xliu/craft/CraftEnv/src/craft/blueprint/strip_shaped_goal.yaml'
 7 |   reward_cnt: 1
 8 |   act_lift: 0
 9 |   act_drop: 0
10 |   act_fold: 0
11 |   act_unfo: 0
12 |   lift_block: 0
13 |   lift_slope: 0
14 |   second_floor: 0
15 |   third_floor: 0
16 |   lift_flag: 0
17 |   reach_goal: 0
18 |   max_steps: 18 # 8 # 18 # 48 # 28
19 |   step_penalty: 0
20 |   block_near_unfold: 1
21 |   building_complexity: 1
22 |   complexity_1: 1
23 |   complexity_2: 5
24 |   complexity_3: 50
25 |   complexity_4: 10
26 |   complexity_5: 25
27 |   complexity_6: 50
28 |   building_complexity_max: 10000
29 |   second_floor_block: 0
30 |   reachable_space: 0
31 |   block_on_block: 1
32 |   fold_on_block: 5
33 |   unfold_on_block: 10
34 |   block_unfold_on_block: 25
35 |   block_on_block_on_block: 50
36 |   enable_local_obs: False
37 |   local_max_free_num: 10
38 |   local_max_block_num: 10
39 |   local_max_slope_num: 10
40 |   search_depth: 20


--------------------------------------------------------------------------------
/CraftEnv/src/craft/bullet_flag.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from craft import get_urdf_path
 3 | 
 4 | 
 5 | class BulletFlag:
 6 |     def __init__(self, bullet_client):
 7 |         self._bullet_client = bullet_client
 8 |         self.init_pose = [0.0, 0.0, 1.0]
 9 |         self.init_quat = [0.0, 0.0, 0.0, 1.0]
10 |         self._init_model(self.init_pose, self.init_quat)
11 | 
12 |     def _init_model(self, init_pose, init_quat):
13 |         robot_path = os.path.join(get_urdf_path(), "flag/block.urdf")
14 |         self.robot_id = self._bullet_client.loadURDF(
15 |             robot_path,
16 |             init_pose,
17 |             init_quat,
18 |             flags=(self._bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES),
19 |             useFixedBase=True,
20 |         )
21 | 
22 |         for i in range(self._bullet_client.getNumJoints(self.robot_id)):
23 |             self._bullet_client.changeVisualShape(
24 |                 self.robot_id, i, rgbaColor=[1, 0, 0, 1]
25 |             )
26 | 
27 |         self._bullet_client.setCollisionFilterGroupMask(
28 |             self.robot_id, -1, collisionFilterGroup=3, collisionFilterMask=3
29 |         )
30 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/bullet_smartcar.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from craft import get_urdf_path
 3 | 
 4 | 
 5 | class BulletSmartcar:
 6 |     def __init__(self, bullet_client):
 7 |         self._bullet_client = bullet_client
 8 |         self.init_pose = [0.0, 0.0, 1.0]
 9 |         self.init_quat = [0.0, 0.0, 0.0, 1.0]
10 |         self._init_model(self.init_pose, self.init_quat)
11 | 
12 |     def _init_model(self, init_pose, init_quat):
13 |         self.robot_id = self._bullet_client.loadURDF(
14 |             os.path.join(get_urdf_path(), "smartcar/smartcar.urdf"),
15 |             flags=(self._bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES),
16 |             globalScaling=1,
17 |         )
18 |         for i in range(-1, self._bullet_client.getNumJoints(self.robot_id)):
19 |             self._bullet_client.setCollisionFilterGroupMask(
20 |                 self.robot_id, i, collisionFilterGroup=3, collisionFilterMask=3
21 |             )
22 |         self._bullet_client.changeVisualShape(self.robot_id, 0, rgbaColor=[211 / 255, 211 / 255, 211 / 255, 1])
23 |         self._bullet_client.changeVisualShape(self.robot_id, 1, rgbaColor=[211 / 255, 211 / 255, 211 / 255, 1])
24 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/envs/free.yaml:
--------------------------------------------------------------------------------
 1 | env: free
 2 | 
 3 | env_args:
 4 |   arena_id: 'Craft-v0'
 5 |   init_blueprint_path: '/home/droid/Downloads/CraftEnv/CraftEnv/src/craftenv/sim_envs/pybullet_envs/craft/blueprint/free_building_init.yaml'
 6 |   design_path: '/home/droid/Downloads/CraftEnv/CraftEnv/src/craftenv/sim_envs/pybullet_envs/craft/blueprint/free_building_goal.yaml'
 7 |   reward_cnt: 1
 8 |   act_lift: 0
 9 |   act_drop: 0
10 |   act_fold: 0
11 |   act_unfo: 0
12 |   lift_block: 0
13 |   lift_slope: 0
14 |   second_floor: 0
15 |   third_floor: 0
16 |   lift_flag: 0
17 |   reach_goal: 0
18 |   max_steps: 18 # 8 # 18 # 48 # 28
19 |   step_penalty: 0
20 |   block_near_unfold: 1
21 |   building_complexity: 1
22 |   complexity_1: 1
23 |   complexity_2: 5
24 |   complexity_3: 50
25 |   complexity_4: 10
26 |   complexity_5: 25
27 |   complexity_6: 50
28 |   building_complexity_max: 10000
29 |   second_floor_block: 0
30 |   reachable_space: 0
31 |   block_on_block: 1
32 |   fold_on_block: 5
33 |   unfold_on_block: 10
34 |   block_unfold_on_block: 25
35 |   block_on_block_on_block: 50
36 |   enable_local_obs: False
37 |   local_max_free_num: 10
38 |   local_max_block_num: 10
39 |   local_max_slope_num: 10
40 |   search_depth: 20


--------------------------------------------------------------------------------
/PyMARL/src/config/envs/flag.yaml:
--------------------------------------------------------------------------------
 1 | env: flag
 2 | 
 3 | env_args:
 4 |   arena_id: 'Craft-v0'
 5 |   init_blueprint_path: '/home/droid/Downloads/CraftEnv/CraftEnv/src/craftenv/sim_envs/pybullet_envs/craft/blueprint/breaking_barrier_init.yaml'
 6 |   design_path: '/home/droid/Downloads/CraftEnv/CraftEnv/src/craftenv/sim_envs/pybullet_envs/craft/blueprint/breaking_barrier_goal.yaml'
 7 |   reward_cnt: 1
 8 |   act_lift: 0
 9 |   act_drop: 0
10 |   act_fold: 0
11 |   act_unfo: 0
12 |   lift_block: 0
13 |   lift_slope: 0
14 |   second_floor: 0
15 |   third_floor: 0
16 |   lift_flag: 0
17 |   reach_goal: 0
18 |   max_steps: 18 # 8 # 18 # 48 # 28
19 |   step_penalty: 0
20 |   block_near_unfold: 1
21 |   building_complexity: 1
22 |   complexity_1: 1
23 |   complexity_2: 5
24 |   complexity_3: 50
25 |   complexity_4: 10
26 |   complexity_5: 25
27 |   complexity_6: 50
28 |   building_complexity_max: 10000
29 |   second_floor_block: 0
30 |   reachable_space: 0
31 |   block_on_block: 1
32 |   fold_on_block: 5
33 |   unfold_on_block: 10
34 |   block_unfold_on_block: 25
35 |   block_on_block_on_block: 50
36 |   enable_local_obs: False
37 |   local_max_free_num: 10
38 |   local_max_block_num: 10
39 |   local_max_slope_num: 10
40 |   search_depth: 20


--------------------------------------------------------------------------------
/PyMARL/src/modules/agents/mlp_agent.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class MLPAgent(nn.Module):
 6 |     def __init__(self, input_shape, args):
 7 |         super(MLPAgent, self).__init__()
 8 |         self.args = args
 9 |         # self.fc1 = nn.Linear(input_shape, args.rnn_hidden_dim)
10 |         # self.rnn = nn.GRUCell(args.rnn_hidden_dim, args.rnn_hidden_dim)
11 |         # self.fc2 = nn.Linear(args.rnn_hidden_dim, args.n_actions)
12 |         self.fc1 = nn.Linear(input_shape, 256)
13 |         self.relu1 = nn.ReLU()
14 |         self.fc2 = nn.Linear(256, 256)
15 |         self.relu2 = nn.ReLU()
16 |         self.fc3 = nn.Linear(256, 64)
17 |         self.relu3 = nn.ReLU()
18 |         self.fc4 = nn.Linear(64, 64)
19 |         self.relu4 = nn.ReLU()
20 |         self.fc5 = nn.Linear(64, args.n_actions)
21 | 
22 |     def init_hidden(self):
23 |         # make hidden states on same device as model
24 |         return self.fc1.weight.new(1, self.args.rnn_hidden_dim).zero_()
25 | 
26 |     def forward(self, inputs, hidden_state):
27 |         x = self.relu1(self.fc1(inputs))
28 |         x = self.relu2(self.fc2(x))
29 |         x = self.relu3(self.fc3(x))
30 |         x = self.relu4(self.fc4(x))
31 |         x = self.fc5(x)
32 |         h = hidden_state.reshape(-1, self.args.rnn_hidden_dim)
33 |         return x, h


--------------------------------------------------------------------------------
/CraftEnv/src/craft/bullet_slope.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from craft import get_urdf_path
 3 | 
 4 | 
 5 | class BulletSlope:
 6 |     def __init__(self, bullet_client):
 7 |         self._bullet_client = bullet_client
 8 |         self.init_pose = [0.0, 0.0, 1.0]
 9 |         self.init_quat = [0.0, 0.0, 0.0, 1.0]
10 |         self._init_model(self.init_pose, self.init_quat)
11 | 
12 |     def _init_model(self, init_pose, init_quat):
13 |         robot_path = os.path.join(get_urdf_path(), "slope/slope.urdf.xacro")
14 |         self.robot_id = self._bullet_client.loadURDF(
15 |             robot_path,
16 |             init_pose,
17 |             init_quat,
18 |             flags=(self._bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES),
19 |             useFixedBase=True,
20 |         )
21 |         for i in range(-1, self._bullet_client.getNumJoints(self.robot_id)):
22 |             self._bullet_client.setCollisionFilterGroupMask(
23 |                 self.robot_id, i, collisionFilterGroup=12, collisionFilterMask=1
24 |             )
25 |         self.fold()
26 | 
27 |     def fold(self):
28 |         self._bullet_client.resetJointState(
29 |             bodyUniqueId=self.robot_id,
30 |             jointIndex=1,
31 |             targetValue=-0.02,
32 |             targetVelocity=0,
33 |         )
34 | 
35 |     def unfold(self):
36 |         self._bullet_client.resetJointState(
37 |             bodyUniqueId=self.robot_id,
38 |             jointIndex=1,
39 |             targetValue=-3.12,
40 |             targetVelocity=0,
41 |         )
42 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/goal/block.urdf:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" ?>
 2 | <!-- =================================================================================== -->
 3 | <!-- |    This document was autogenerated by xacro from block.urdf.xacro               | -->
 4 | <!-- |    EDITING THIS FILE BY HAND IS NOT RECOMMENDED                                 | -->
 5 | <!-- =================================================================================== -->
 6 | <!-- This URDF was automatically created by SolidWorks to URDF Exporter! Originally created by Stephen Brawner (brawner@gmail.com)
 7 |      Commit Version: 1.6.0-1-g15f4949  Build Version: 1.6.7594.29634
 8 |      For more information, please see http://wiki.ros.org/sw_urdf_exporter -->
 9 | <robot name="block">
10 |   <link name="base_link">
11 |     <inertial>
12 |       <origin rpy="0 0 0" xyz="0 0 0"/>
13 |       <mass value="0.5"/>
14 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
15 |     </inertial>
16 |     <visual>
17 |       <origin rpy="0 0 0" xyz="0 0 0.037000000000000005"/>
18 |       <geometry>
19 |         <box size="0.32 0.32 0.07200000000000001"/>
20 |       </geometry>
21 |       <material name="">
22 |         <color rgba="0.752941176470588 0.752941176470588 0.752941176470588 1"/>
23 |       </material>
24 |     </visual>
25 |     <collision>
26 |       <origin rpy="0 0 0" xyz="0 0 0.004"/>
27 |       <geometry>
28 |         <box size="0.32 0.32 0.07200000000000001"/>
29 |       </geometry>
30 |     </collision>
31 |   </link>
32 | </robot>
33 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/critics/maddpg.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class MADDPGCritic(nn.Module):
 7 |     def __init__(self, scheme, args):
 8 |         super(MADDPGCritic, self).__init__()
 9 |         self.args = args
10 |         self.n_actions = args.n_actions
11 |         self.n_agents = args.n_agents
12 |         self.input_shape = self._get_input_shape(scheme) + self.n_actions * self.n_agents
13 |         if self.args.obs_last_action:
14 |             self.input_shape += self.n_actions
15 |         self.output_type = "q"
16 | 
17 |         # Set up network layers
18 |         self.fc1 = nn.Linear(self.input_shape, args.hidden_dim)
19 |         self.fc2 = nn.Linear(args.hidden_dim, args.hidden_dim)
20 |         self.fc3 = nn.Linear(args.hidden_dim, 1)
21 | 
22 |     def forward(self, inputs, actions):
23 |         inputs = th.cat((inputs, actions), dim=-1)
24 |         x = F.relu(self.fc1(inputs))
25 |         x = F.relu(self.fc2(x))
26 |         q = self.fc3(x)
27 |         return q
28 | 
29 |     def _get_input_shape(self, scheme):
30 |         # state
31 |         input_shape = scheme["state"]["vshape"]
32 |         # print(scheme["state"]["vshape"], scheme["obs"]["vshape"], self.n_agents, scheme["actions_one"])
33 |         # whether to add the individual observation
34 |         if self.args.obs_individual_obs:
35 |             input_shape += scheme["obs"]["vshape"]
36 |         # agent id
37 |         if self.args.obs_agent_id:
38 |             input_shape += self.n_agents
39 |         return input_shape


--------------------------------------------------------------------------------
/CraftEnv/src/create_pybullet_envs.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | from gym.spaces import Tuple as GymTuple
 3 | 
 4 | from craftenv.sim_envs.pybullet_envs.craft.craft_env import CraftEnv
 5 | 
 6 | 
 7 | class SingleAgentWrapper(gym.Wrapper):
 8 |     def __init__(self, env):
 9 |         super(SingleAgentWrapper, self).__init__(env)
10 | 
11 |         self.observation_space = GymTuple([env.observation_space])
12 |         self.action_space = GymTuple([env.action_space])
13 | 
14 |     def reset(self, **kwargs):
15 |         obs = self.env.reset()
16 |         return (obs,)
17 | 
18 |     def step(self, action):
19 |         obs, rwd, done, info = super(SingleAgentWrapper, self).step(action[0])
20 |         if "post_process_data" in info:
21 |             info["post_process_data"] = (info["post_process_data"],)
22 |         return (obs,), (rwd,), done, info
23 | 
24 | 
25 | def create_pybullet_env(**env_config):
26 |     arena_id = env_config["arena_id"]
27 |     assert arena_id in [
28 |         "Craft-v0"
29 |     ]
30 |     enable_render = env_config["render"] if "render" in env_config else False
31 | 
32 |     enable_render = env_config["render"] if "render" in env_config else False
33 | 
34 |     def create_single_env():
35 |         if arena_id in ["Craft-v0"]:
36 |             init_blueprint_path = list(
37 |                 env_config["init_blueprint_path"].split(","))
38 |             env0 = CraftEnv(enable_render, init_blueprint_path, env_config)
39 |         else:
40 |             raise NotImplementedError
41 |         env0 = SingleAgentWrapper(env0)
42 |         return env0
43 |     env = create_single_env()
44 |     return env
45 | 


--------------------------------------------------------------------------------
/PyMARL/src/components/standarize_stream.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Taken from: https://github.com/semitable/fast-marl
 3 | """
 4 | 
 5 | import torch
 6 | from typing import Tuple
 7 | 
 8 | 
 9 | class RunningMeanStd(object):
10 |     def __init__(self, epsilon: float = 1e-4, shape: Tuple[int, ...] = (), device="cpu"):
11 |         """
12 |         https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
13 |         """
14 |         self.mean = torch.zeros(shape, dtype=torch.float32, device=device)
15 |         self.var = torch.ones(shape, dtype=torch.float32, device=device)
16 |         self.count = epsilon
17 | 
18 |     def update(self, arr):
19 |         arr = arr.reshape(-1, arr.size(-1))
20 |         batch_mean = torch.mean(arr, dim=0)
21 |         batch_var = torch.var(arr, dim=0)
22 |         batch_count = arr.shape[0]
23 |         self.update_from_moments(batch_mean, batch_var, batch_count)
24 | 
25 |     def update_from_moments(self, batch_mean, batch_var, batch_count: int):
26 |         delta = batch_mean - self.mean
27 |         tot_count = self.count + batch_count
28 | 
29 |         new_mean = self.mean + delta * batch_count / tot_count
30 |         m_a = self.var * self.count
31 |         m_b = batch_var * batch_count
32 |         m_2 = (
33 |             m_a
34 |             + m_b
35 |             + torch.square(delta)
36 |             * self.count
37 |             * batch_count
38 |             / (self.count + batch_count)
39 |         )
40 |         new_var = m_2 / (self.count + batch_count)
41 | 
42 |         new_count = batch_count + self.count
43 | 
44 |         self.mean = new_mean
45 |         self.var = new_var
46 |         self.count = new_count


--------------------------------------------------------------------------------
/PyMARL/src/modules/critics/ac.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class ACCritic(nn.Module):
 7 |     def __init__(self, scheme, args):
 8 |         super(ACCritic, self).__init__()
 9 | 
10 |         self.args = args
11 |         self.n_actions = args.n_actions
12 |         self.n_agents = args.n_agents
13 | 
14 |         input_shape = self._get_input_shape(scheme)
15 |         self.output_type = "v"
16 | 
17 |         # Set up network layers
18 |         self.fc1 = nn.Linear(input_shape, args.hidden_dim)
19 |         self.fc2 = nn.Linear(args.hidden_dim, args.hidden_dim)
20 |         self.fc3 = nn.Linear(args.hidden_dim, 1)
21 | 
22 |     def forward(self, batch, t=None):
23 |         inputs, bs, max_t = self._build_inputs(batch, t=t)
24 |         x = F.relu(self.fc1(inputs))
25 |         x = F.relu(self.fc2(x))
26 |         q = self.fc3(x)
27 |         return q
28 | 
29 |     def _build_inputs(self, batch, t=None):
30 |         bs = batch.batch_size
31 |         max_t = batch.max_seq_length if t is None else 1
32 |         ts = slice(None) if t is None else slice(t, t+1)
33 |         inputs = []
34 |         # observations
35 |         inputs.append(batch["obs"][:, ts])
36 | 
37 |         inputs.append(th.eye(self.n_agents, device=batch.device).unsqueeze(0).unsqueeze(0).expand(bs, max_t, -1, -1))
38 | 
39 |         inputs = th.cat(inputs, dim=-1)
40 |         return inputs, bs, max_t
41 | 
42 |     def _get_input_shape(self, scheme):
43 |         # observations
44 |         input_shape = scheme["obs"]["vshape"]
45 |         # agent id
46 |         input_shape += self.n_agents
47 |         return input_shape
48 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/bullet_wall.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from craft import get_urdf_path
 3 | 
 4 | 
 5 | class BulletWalls:
 6 |     """
 7 |     pybullet API createMultiBody
 8 |     """
 9 | 
10 |     def __init__(self, bullet_client, blackboard):
11 |         self._bullet_client = bullet_client
12 |         self._blackboard = blackboard
13 |         self.init_pose = [0.0, 0.0, 1.0]
14 |         self.init_quat = [0.0, 0.0, 0.0, 1.0]
15 |         self.num = self._blackboard.wall_num
16 | 
17 |         self._init_model(self.num)
18 | 
19 |     def _init_model(self, num):
20 |         visual_file_name = os.path.join(get_urdf_path(), "wall/meshes/base_link.STL")
21 |         visual_shape = self._bullet_client.createVisualShape(
22 |             shapeType=self._bullet_client.GEOM_MESH,
23 |             fileName=visual_file_name,
24 |             rgbaColor=[211 / 255, 211 / 255, 211 / 255, 0.1],
25 |         )
26 |         collision_shape = self._bullet_client.createCollisionShape(
27 |             shapeType=self._bullet_client.GEOM_BOX,
28 |             halfExtents=[
29 |                 self._blackboard.BLOCK_LENGTH / 2,
30 |                 self._blackboard.BLOCK_LENGTH / 2,
31 |                 self._blackboard.BLOCK_HEIGHT / 2,
32 |             ],
33 |         )
34 | 
35 |         position = [[0, 0, 0] for _ in range(num)]
36 |         self.ids = self._bullet_client.createMultiBody(
37 |             baseCollisionShapeIndex=collision_shape,
38 |             baseVisualShapeIndex=visual_shape,
39 |             batchPositions=position,
40 |         )
41 | 
42 |         for id_ in self.ids:
43 |             self._bullet_client.setCollisionFilterGroupMask(
44 |                 id_, -1, collisionFilterGroup=3, collisionFilterMask=3
45 |             )
46 | 


--------------------------------------------------------------------------------
/PyMARL/src/utils/timehelper.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | 
 4 | 
 5 | def print_time(start_time, T, t_max, episode, episode_rewards):
 6 |     time_elapsed = time.time() - start_time
 7 |     T = max(1, T)
 8 |     time_left = time_elapsed * (t_max - T) / T
 9 |     # Just in case its over 100 days
10 |     time_left = min(time_left, 60 * 60 * 24 * 100)
11 |     last_reward = "N\A"
12 |     if len(episode_rewards) > 5:
13 |         last_reward = "{:.2f}".format(np.mean(episode_rewards[-50:]))
14 |     print("\033[F\033[F\x1b[KEp: {:,}, T: {:,}/{:,}, Reward: {}, \n\x1b[KElapsed: {}, Left: {}\n".format(episode, T, t_max, last_reward, time_str(time_elapsed), time_str(time_left)), " " * 10, end="\r")
15 | 
16 | 
17 | def time_left(start_time, t_start, t_current, t_max):
18 |     if t_current >= t_max:
19 |         return "-"
20 |     time_elapsed = time.time() - start_time
21 |     t_current = max(1, t_current)
22 |     time_left = time_elapsed * (t_max - t_current) / (t_current - t_start)
23 |     # Just in case its over 100 days
24 |     time_left = min(time_left, 60 * 60 * 24 * 100)
25 |     return time_str(time_left)
26 | 
27 | 
28 | def time_str(s):
29 |     """
30 |     Convert seconds to a nicer string showing days, hours, minutes and seconds
31 |     """
32 |     days, remainder = divmod(s, 60 * 60 * 24)
33 |     hours, remainder = divmod(remainder, 60 * 60)
34 |     minutes, seconds = divmod(remainder, 60)
35 |     string = ""
36 |     if days > 0:
37 |         string += "{:d} days, ".format(int(days))
38 |     if hours > 0:
39 |         string += "{:d} hours, ".format(int(hours))
40 |     if minutes > 0:
41 |         string += "{:d} minutes, ".format(int(minutes))
42 |     string += "{:d} seconds".format(int(seconds))
43 |     return string
44 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/action_enum.py:
--------------------------------------------------------------------------------
 1 | from enum import IntEnum, auto, unique
 2 | import numpy as np
 3 | 
 4 | 
 5 | ACTION_ARG = [
 6 |     None,
 7 |     None,
 8 |     None,
 9 |     None,
10 |     1,  # TURN_LEFT
11 |     -1,  # TURN_RIGHT
12 |     # (x, y)
13 |     np.array((0, 1)),  # MOVE_FORWARD
14 |     np.array((0, -1)),  # MOVE_BACK
15 |     np.array((-1, 0)),  # MOVE_LEFT
16 |     np.array((1, 0)),  # MOVE_RIGHT
17 |     None,  # STOP
18 |     np.array((-1, 1)),  # MOVE_FORWARD_LEFT
19 |     np.array((1, 1)),  # MOVE_FORWARD_RIGHT
20 |     np.array((-1, -1)),  # MOVE_BACK_LEFT
21 |     np.array((1, -1)),  # MOVE_BACK_RIGHT
22 | ]
23 | 
24 | 
25 | @unique
26 | class ActionEnum(IntEnum):
27 |     """
28 |     ^ y
29 |     |
30 |     |
31 |     |
32 |     o---------> x
33 |     world coordinate
34 | 
35 |     smartcar action enum
36 | 
37 |     MOVE_FORWARD:    ^
38 |                      |
39 | 
40 |     MOVE_BACK:       |
41 |                      v
42 | 
43 |     MOVE_LEFT:       <--
44 | 
45 |     MOVE_RIGHT:      -->
46 | 
47 |     TURN_LEFT       <--
48 |                       |
49 | 
50 |     TURN_RIGHT        -->
51 |                       |
52 | 
53 |     LIFT
54 | 
55 |     DROP
56 | 
57 |     FOLD
58 | 
59 |     UNFOLD
60 | 
61 |     STOP
62 |     """
63 | 
64 |     LIFT = 0  # 0
65 |     DROP = auto()  # 1
66 |     FOLD = auto()  # 2
67 |     UNFOLD = auto()  # 3
68 |     ROTATE_LEFT = auto()  # 4
69 |     ROTATE_RIGHT = auto()  # 5
70 |     MOVE_FORWARD = auto()  # 6
71 |     MOVE_BACK = auto()  # 7
72 |     MOVE_LEFT = auto()  # 8
73 |     MOVE_RIGHT = auto()  # 9
74 |     STOP = auto()  # 10
75 |     MOVE_FORWARD_LEFT = auto()  # 11
76 |     MOVE_FORWARD_RIGHT = auto()  # 12
77 |     MOVE_BACK_LEFT = auto()  # 13
78 |     MOVE_BACK_RIGHT = auto()  # 14
79 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/critics/maddpg_ns.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from modules.critics.mlp import MLP
 5 | 
 6 | 
 7 | class MADDPGCriticNS(nn.Module):
 8 |     def __init__(self, scheme, args):
 9 |         super(MADDPGCriticNS, self).__init__()
10 |         self.args = args
11 |         self.n_actions = args.n_actions
12 |         self.n_agents = args.n_agents
13 |         self.input_shape = self._get_input_shape(scheme) + self.n_actions * self.n_agents
14 |         if self.args.obs_last_action:
15 |             self.input_shape += self.n_actions
16 |         self.output_type = "q"
17 |         self.critics = [MLP(self.input_shape, self.args.hidden_dim, 1) for _ in range(self.n_agents)]
18 | 
19 |     def forward(self, inputs, actions):
20 |         inputs = th.cat((inputs, actions), dim=-1)
21 |         qs = []
22 |         for i in range(self.n_agents):
23 |             q = self.critics[i](inputs[:, :, i]).unsqueeze(2)
24 |             qs.append(q)
25 |         return th.cat(qs, dim=2)
26 | 
27 |     def _get_input_shape(self, scheme):
28 |         # state
29 |         input_shape = scheme["state"]["vshape"]
30 |         # observation
31 |         if self.args.obs_individual_obs:
32 |             input_shape += scheme["obs"]["vshape"]
33 |         return input_shape
34 | 
35 |     def parameters(self):
36 |         params = list(self.critics[0].parameters())
37 |         for i in range(1, self.n_agents):
38 |             params += list(self.critics[i].parameters())
39 |         return params
40 | 
41 |     def state_dict(self):
42 |         return [a.state_dict() for a in self.critics]
43 | 
44 |     def load_state_dict(self, state_dict):
45 |         for i, c in enumerate(self.critics):
46 |             c.load_state_dict(state_dict[i])
47 | 
48 |     def cuda(self):
49 |         for c in self.critics:
50 |             c.cuda()
51 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/critics/ac_ns.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from modules.critics.mlp import MLP
 5 | 
 6 | 
 7 | class ACCriticNS(nn.Module):
 8 |     def __init__(self, scheme, args):
 9 |         super(ACCriticNS, self).__init__()
10 | 
11 |         self.args = args
12 |         self.n_actions = args.n_actions
13 |         self.n_agents = args.n_agents
14 | 
15 |         input_shape = self._get_input_shape(scheme)
16 |         self.output_type = "v"
17 | 
18 |         # Set up network layers
19 |         self.critics = [MLP(input_shape, args.hidden_dim, 1) for _ in range(self.n_agents)]
20 | 
21 |     def forward(self, batch, t=None):
22 |         inputs, bs, max_t = self._build_inputs(batch, t=t)
23 |         qs = []
24 |         for i in range(self.n_agents):
25 |             q = self.critics[i](inputs[:, :, i])
26 |             qs.append(q.view(bs, max_t, 1, -1))
27 |         q = th.cat(qs, dim=2)
28 |         return q
29 | 
30 |     def _build_inputs(self, batch, t=None):
31 |         bs = batch.batch_size
32 |         max_t = batch.max_seq_length if t is None else 1
33 |         ts = slice(None) if t is None else slice(t, t+1)
34 |         inputs = batch["obs"][:, ts]
35 |         return inputs, bs, max_t
36 | 
37 |     def _get_input_shape(self, scheme):
38 |         # observations
39 |         input_shape = scheme["obs"]["vshape"]
40 |         return input_shape
41 | 
42 |     def parameters(self):
43 |         params = list(self.critics[0].parameters())
44 |         for i in range(1, self.n_agents):
45 |             params += list(self.critics[i].parameters())
46 |         return params
47 | 
48 |     def state_dict(self):
49 |         return [a.state_dict() for a in self.critics]
50 | 
51 |     def load_state_dict(self, state_dict):
52 |         for i, a in enumerate(self.critics):
53 |             a.load_state_dict(state_dict[i])
54 | 
55 |     def cuda(self):
56 |         for c in self.critics:
57 |             c.cuda()


--------------------------------------------------------------------------------
/PyMARL/src/envs/multiagentenv.py:
--------------------------------------------------------------------------------
 1 | class MultiAgentEnv(object):
 2 | 
 3 |     def step(self, actions):
 4 |         """ Returns reward, terminated, info """
 5 |         raise NotImplementedError
 6 | 
 7 |     def get_obs(self):
 8 |         """ Returns all agent observations in a list """
 9 |         raise NotImplementedError
10 | 
11 |     def get_obs_agent(self, agent_id):
12 |         """ Returns observation for agent_id """
13 |         raise NotImplementedError
14 | 
15 |     def get_obs_size(self):
16 |         """ Returns the shape of the observation """
17 |         raise NotImplementedError
18 | 
19 |     def get_state(self):
20 |         raise NotImplementedError
21 | 
22 |     def get_state_size(self):
23 |         """ Returns the shape of the state"""
24 |         raise NotImplementedError
25 | 
26 |     def get_avail_actions(self):
27 |         raise NotImplementedError
28 | 
29 |     def get_avail_agent_actions(self, agent_id):
30 |         """ Returns the available actions for agent_id """
31 |         raise NotImplementedError
32 | 
33 |     def get_total_actions(self):
34 |         """ Returns the total number of actions an agent could ever take """
35 |         # TODO: This is only suitable for a discrete 1 dimensional action space for each agent
36 |         raise NotImplementedError
37 | 
38 |     def reset(self):
39 |         """ Returns initial observations and states"""
40 |         raise NotImplementedError
41 | 
42 |     def render(self):
43 |         raise NotImplementedError
44 | 
45 |     def close(self):
46 |         raise NotImplementedError
47 | 
48 |     def seed(self):
49 |         raise NotImplementedError
50 | 
51 |     def save_replay(self):
52 |         raise NotImplementedError
53 | 
54 |     def get_env_info(self):
55 |         env_info = {"state_shape": self.get_state_size(),
56 |                     "obs_shape": self.get_obs_size(),
57 |                     "n_actions": self.get_total_actions(),
58 |                     "n_agents": self.n_agents,
59 |                     "episode_limit": self.episode_limit}
60 |         return env_info
61 | 


--------------------------------------------------------------------------------
/PyMARL/src/config/default.yaml:
--------------------------------------------------------------------------------
 1 | # --- Defaults ---
 2 | 
 3 | # --- pymarl options ---
 4 | runner: "episode" # Runs 1 env for an episode
 5 | mac: "basic_mac" # Basic controller
 6 | env: "sc2" # Environment name
 7 | env_args: {} # Arguments for the environment
 8 | batch_size_run: 1 # Number of environments to run in parallel
 9 | test_nepisode: 20 # Number of episodes to test for
10 | test_interval: 10000 # 2000 # Test after {} timesteps have passed
11 | test_greedy: True # Use greedy evaluation (if False, will set epsilon floor to 0
12 | log_interval: 2000 # Log summary of stats after every {} timesteps
13 | runner_log_interval: 2000 # Log runner stats (not test stats) every {} timesteps
14 | learner_log_interval: 2000 # Log training stats every {} timesteps
15 | t_max: 1000500 # 1000500 # 2000000 # Stop running after this many timesteps
16 | use_cuda: True # Use gpu by default unless it isn't available
17 | buffer_cpu_only: True # If true we won't keep all of the replay buffer in vram
18 | 
19 | # --- Logging options ---
20 | use_tensorboard: False # Log results to tensorboard
21 | save_model: True # Save the models to disk
22 | save_model_interval: 1000000 # Save models after this many timesteps
23 | checkpoint_path: "" # Load a checkpoint from this path
24 | evaluate: False # Evaluate model for test_nepisode episodes and quit (no training)
25 | load_step: 0 # Load model trained on this many timesteps (0 if choose max possible)
26 | save_replay: False # Saving the replay of the model loaded from checkpoint_path
27 | local_results_path: "results" # Path for local results
28 | 
29 | # --- RL hyperparameters ---
30 | gamma: 0.99
31 | batch_size: 32 # Number of episodes to train on
32 | buffer_size: 32 # Size of the replay buffer
33 | lr: 0.0005 # Learning rate for agents
34 | critic_lr: 0.0005 # Learning rate for critics
35 | optim_alpha: 0.99 # RMSProp alpha
36 | optim_eps: 0.00001 # RMSProp epsilon
37 | grad_norm_clip: 10 # Reduce magnitude of gradients above this L2 norm
38 | 
39 | # --- Agent parameters ---
40 | agent: "rnn" # Default rnn agent
41 | rnn_hidden_dim: 64 # Size of hidden state for default rnn agent
42 | obs_agent_id: True # Include the agent's one_hot id in the observation
43 | obs_last_action: True # Include the agent's last action (one_hot) in the observation
44 | 
45 | # --- Experiment running params ---
46 | repeat_id: 1
47 | label: "default_label"
48 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/bullet_block.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from craft import get_urdf_path
 3 | 
 4 | 
 5 | class BulletBlock:
 6 |     def __init__(self, bullet_client):
 7 |         self._bullet_client = bullet_client
 8 |         self.init_pose = [0.0, 0.0, 1.0]
 9 |         self.init_quat = [0.0, 0.0, 0.0, 1.0]
10 |         self._init_model(self.init_pose, self.init_quat)
11 | 
12 |     def _init_model(self, init_pose, init_quat):
13 |         robot_path = os.path.join(get_urdf_path(), "block/block.urdf")
14 |         self.robot_id = self._bullet_client.loadURDF(
15 |             robot_path,
16 |             init_pose,
17 |             init_quat,
18 |             flags=(self._bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES),
19 |             useFixedBase=True,
20 |         )
21 | 
22 | 
23 | class BulletBlocks:
24 |     """
25 |     pybullet API createMultiBody
26 |     """
27 | 
28 |     def __init__(self, bullet_client, blackboard):
29 |         self._bullet_client = bullet_client
30 |         self._blackboard = blackboard
31 |         self.init_pose = [0.0, 0.0, 1.0]
32 |         self.init_quat = [0.0, 0.0, 0.0, 1.0]
33 |         self.num = self._blackboard.block_num
34 | 
35 |         self._init_model(self.num)
36 | 
37 |     def _init_model(self, num):
38 |         visual_file_name = os.path.join(get_urdf_path(), "block/meshes/base_link.STL")
39 |         visual_shape = self._bullet_client.createVisualShape(
40 |             shapeType=self._bullet_client.GEOM_MESH,
41 |             fileName=visual_file_name,
42 |             rgbaColor=[211 / 255, 211 / 255, 211 / 255, 1],
43 |         )
44 |         collision_shape = self._bullet_client.createCollisionShape(
45 |             shapeType=self._bullet_client.GEOM_BOX,
46 |             halfExtents=[
47 |                 self._blackboard.BLOCK_LENGTH / 2,
48 |                 self._blackboard.BLOCK_LENGTH / 2,
49 |                 self._blackboard.BLOCK_HEIGHT / 2,
50 |             ],
51 |         )
52 | 
53 |         position = [[0, 0, 0] for _ in range(num)]
54 |         self.ids = self._bullet_client.createMultiBody(
55 |             baseCollisionShapeIndex=collision_shape,
56 |             baseVisualShapeIndex=visual_shape,
57 |             batchPositions=position,
58 |         )
59 | 
60 |         for id_ in self.ids:
61 |             self._bullet_client.setCollisionFilterGroupMask(
62 |                 id_, -1, collisionFilterGroup=3, collisionFilterMask=3
63 |             )
64 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blueprint/complex_twolayer_init.yaml:
--------------------------------------------------------------------------------
 1 | area_length: 5
 2 | area_width: 4
 3 | area_height: 3
 4 | block_num: 6
 5 | slope_num: 2
 6 | smartcar_num: 4
 7 | legged_robot_num: 2
 8 | 
 9 | # block:
10 | #   - {id: 0,  x: 3, y: 0, z: 1}
11 | #   - {id: 1,  x: 3, y: 3, z: 1}
12 | #   - {id: 2,  x: 4, y: 0, z: 1}
13 | #   - {id: 3,  x: 4, y: 3, z: 1}
14 | #   - {id: 4,  x: 0, y: 1, z: 1}
15 | #   - {id: 5,  x: 0, y: 2, z: 1}
16 | # smartcar:
17 | #   - {id: 0, x: 1,  y: 0,   z: 1, yaw: 0}
18 | #   - {id: 1, x: 1,  y: 3,   z: 1, yaw: 0}
19 | #   - {id: 2, x: 3,  y: 1,   z: 1, yaw: 0}
20 | #   - {id: 3, x: 3,  y: 2,   z: 1, yaw: 0}
21 | # fold_slope:
22 | #   - {id: 0, x: 2,  y: 1,   z: 1, yaw: 2}
23 | #   - {id: 1, x: 2,  y: 2,   z: 1, yaw: 2}
24 | # flag:
25 | #   - {id: 0, x: -1, y: -2, z: 1}
26 | # goal:
27 | #   - {id: 0, x: -1, y: -2, z: 1}
28 | # legged_robot:
29 | #   - {id: 0, x: -1,  y: 0, z: 0.335, yaw: 1}
30 | #   - {id: 1, x: -1,  y: -3, z: 0.335, yaw: 0}
31 | 
32 | 
33 | # block:
34 | #   - {id: 0,  x: 3, y: 1, z: 1}
35 | #   - {id: 1,  x: 3, y: 2, z: 1}
36 | #   - {id: 2,  x: 4, y: 1, z: 1}
37 | #   - {id: 3,  x: 4, y: 2, z: 1}
38 | #   - {id: 4,  x: 3, y: 1, z: 2}
39 | #   - {id: 5,  x: 4, y: 2, z: 2}
40 | # smartcar:
41 | #   - {id: 0, x: 1,  y: 0,   z: 1, yaw: 0}
42 | #   - {id: 1, x: 1,  y: 3,   z: 1, yaw: 0}
43 | #   - {id: 2, x: 3,  y: 1,   z: 1, yaw: 0}
44 | #   - {id: 3, x: 3,  y: 2,   z: 1, yaw: 0}
45 | # unfold_slope:
46 | #   - {id: 0, x: 2,  y: 1,   z: 1, yaw: 2}
47 | #   - {id: 1, x: 2,  y: 2,   z: 1, yaw: 2}
48 | # flag:
49 | #   - {id: 0, x: -1, y: -2, z: 1}
50 | # goal:
51 | #   - {id: 0, x: -1, y: -2, z: 1}
52 | # legged_robot:
53 | #   - {id: 0, x: -1,  y: 0, z: 0.335, yaw: 1}
54 | #   - {id: 1, x: -1,  y: -3, z: 0.335, yaw: 0}
55 | 
56 | block:
57 |   - {id: 0,  x: 3, y: 0, z: 1}
58 |   - {id: 1,  x: 3, y: 3, z: 1}
59 |   - {id: 2,  x: 4, y: 0, z: 1}
60 |   - {id: 3,  x: 4, y: 3, z: 1}
61 |   - {id: 4,  x: 0, y: 1, z: 1}
62 |   - {id: 5,  x: 0, y: 2, z: 1}
63 | smartcar:
64 |   - {id: 0, x: 2,  y: 0,   z: 1, yaw: 0}
65 |   - {id: 1, x: 2,  y: 3,   z: 1, yaw: 0}
66 |   - {id: 2, x: 0,  y: 1,   z: 1, yaw: 0}
67 |   - {id: 3, x: 0,  y: 2,   z: 1, yaw: 0}
68 | fold_slope:
69 |   - {id: 0, x: 2,  y: 1,   z: 1, yaw: 2}
70 |   - {id: 1, x: 2,  y: 2,   z: 1, yaw: 2}
71 | flag:
72 |   - {id: 0, x: -1, y: -2, z: 1}
73 | goal:
74 |   - {id: 0, x: -1, y: -2, z: 1}
75 | legged_robot:
76 |   - {id: 0, x: -1,  y: 0, z: 0.335, yaw: 1}
77 |   - {id: 1, x: -1,  y: -3, z: 0.335, yaw: 0}


--------------------------------------------------------------------------------
/PyMARL/src/utils/logging.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | import logging
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | class Logger:
 7 |     def __init__(self, console_logger):
 8 |         self.console_logger = console_logger
 9 | 
10 |         self.use_tb = False
11 |         self.use_sacred = False
12 |         self.use_hdf = False
13 | 
14 |         self.stats = defaultdict(lambda: [])
15 | 
16 |     def setup_tb(self, directory_name):
17 |         # Import here so it doesn't have to be installed if you don't use it
18 |         from tensorboard_logger import configure, log_value
19 |         configure(directory_name)
20 |         self.tb_logger = log_value
21 |         self.use_tb = True
22 | 
23 |     def setup_sacred(self, sacred_run_dict):
24 |         self.sacred_info = sacred_run_dict.info
25 |         self.use_sacred = True
26 | 
27 |     def log_stat(self, key, value, t, to_sacred=True):
28 |         self.stats[key].append((t, value))
29 | 
30 |         if self.use_tb:
31 |             self.tb_logger(key, value, t)
32 | 
33 |         if self.use_sacred and to_sacred:
34 |             if key in self.sacred_info:
35 |                 self.sacred_info["{}_T".format(key)].append(t)
36 |                 self.sacred_info[key].append(value)
37 |             else:
38 |                 self.sacred_info["{}_T".format(key)] = [t]
39 |                 self.sacred_info[key] = [value]
40 | 
41 |     def print_recent_stats(self):
42 |         log_str = "Recent Stats | t_env: {:>10} | Episode: {:>8}\n".format(*self.stats["episode"][-1])
43 |         i = 0
44 |         for (k, v) in sorted(self.stats.items()):
45 |             if k == "episode":
46 |                 continue
47 |             i += 1
48 |             window = 5 if k != "epsilon" else 1
49 |             item = "{:.4f}".format(np.mean([
50 |                 x[1].item() if torch.is_tensor(x[1]) is True else x[1]
51 |                 for x in self.stats[k][-window:]]))
52 |             log_str += "{:<25}{:>8}".format(k + ":", item)
53 |             log_str += "\n" if i % 4 == 0 else "\t"
54 |         self.console_logger.info(log_str)
55 | 
56 | 
57 | # set up a custom logger
58 | def get_logger():
59 |     logger = logging.getLogger()
60 |     logger.handlers = []
61 |     ch = logging.StreamHandler()
62 |     formatter = logging.Formatter('[%(levelname)s %(asctime)s] %(name)s %(message)s', '%H:%M:%S')
63 |     ch.setFormatter(formatter)
64 |     logger.addHandler(ch)
65 |     logger.setLevel('DEBUG')
66 | 
67 |     return logger
68 | 
69 | 


--------------------------------------------------------------------------------
/PyMARL/src/envs/flagenv.py:
--------------------------------------------------------------------------------
 1 | from .multiagentenv import MultiAgentEnv
 2 | from craft.flag_env import FlagEnv as Env
 3 | 
 4 | 
 5 | class FlagEnv(MultiAgentEnv):
 6 | 
 7 |     def __init__(self, **env_config):
 8 |         enable_render = env_config["render"] if "render" in env_config else False
 9 |         init_blueprint_path = list(env_config['init_blueprint_path'].split(','))
10 |         self.env = Env(enable_render, init_blueprint_path, env_config)
11 |         self.env.reset()
12 |         self.episode_limit = env_config["max_steps"] + 10
13 |         self.n_agents = self.env._blackboard.smartcar_num
14 |         return
15 | 
16 |     def step(self, actions):
17 |         obs, reward, done, info = self.env.step(actions)
18 |         return reward, done, info
19 | 
20 |     def get_obs(self):
21 |         obs_tuple = self.env.get_obs()
22 |         obs_list = list(obs_tuple)
23 |         result = [obs_list[i][0] for i in range(0, len(obs_list))]
24 |         return result
25 | 
26 |     def get_obs_agent(self, agent_id):
27 |         all_obs = self.get_obs()
28 |         return all_obs[agent_id]
29 | 
30 |     def get_obs_size(self):
31 |         return self.env.ob_dim
32 | 
33 |     def get_state(self):
34 |         obs_tuple = self.get_obs()
35 |         obs_list = list(obs_tuple)
36 |         global_state = obs_list[0][self.env._ob_dim:]
37 |         return global_state
38 | 
39 |     def get_state_size(self):
40 |         state = self.get_state()
41 |         return len(state)
42 | 
43 |     def get_avail_actions(self):
44 |         obs_tuple = self.env.get_obs()
45 |         obs_list = list(obs_tuple)
46 |         result = [obs_list[i][1] for i in range(0, len(obs_list))]
47 |         return result
48 | 
49 |     def get_avail_agent_actions(self, agent_id):
50 |         all_masks = self.get_avail_actions()
51 |         result = all_masks[agent_id]
52 |         return result
53 | 
54 |     def get_total_actions(self):
55 |         return self.env.ac_dim
56 | 
57 |     def reset(self):
58 |         return self.env.reset()
59 | 
60 |     def render(self):
61 |         return None
62 | 
63 |     def close(self):
64 |         return
65 | 
66 |     def seed(self):
67 |         return 0
68 | 
69 |     def save_replay(self):
70 |         return
71 | 
72 |     def get_env_info(self):
73 |         env_info = {"state_shape": self.get_state_size(),
74 |                     "obs_shape": self.get_obs_size(),
75 |                     "n_actions": self.get_total_actions(),
76 |                     "n_agents": self.n_agents,
77 |                     "episode_limit": self.episode_limit}
78 |         return env_info
79 | 
80 |     def get_stats(self):
81 |         return {}
82 | 


--------------------------------------------------------------------------------
/PyMARL/src/envs/freeenv.py:
--------------------------------------------------------------------------------
 1 | from .multiagentenv import MultiAgentEnv
 2 | from craft.free_env import FreeEnv as Env
 3 | 
 4 | 
 5 | class FreeEnv(MultiAgentEnv):
 6 | 
 7 |     def __init__(self, **env_config):
 8 |         enable_render = env_config["render"] if "render" in env_config else False
 9 |         init_blueprint_path = list(env_config['init_blueprint_path'].split(','))
10 |         self.env = Env(enable_render, init_blueprint_path, env_config)
11 |         self.env.reset()
12 |         self.episode_limit = env_config["max_steps"] + 10
13 |         self.n_agents = self.env._blackboard.smartcar_num
14 |         return
15 | 
16 |     def step(self, actions):
17 |         obs, reward, done, info = self.env.step(actions)
18 |         return reward, done, info
19 | 
20 |     def get_obs(self):
21 |         obs_tuple = self.env.get_obs()
22 |         obs_list = list(obs_tuple)
23 |         result = [obs_list[i][0] for i in range(0, len(obs_list))]
24 |         return result
25 | 
26 |     def get_obs_agent(self, agent_id):
27 |         all_obs = self.get_obs()
28 |         return all_obs[agent_id]
29 | 
30 |     def get_obs_size(self):
31 |         return self.env.ob_dim
32 | 
33 |     def get_state(self):
34 |         obs_tuple = self.get_obs()
35 |         obs_list = list(obs_tuple)
36 |         global_state = obs_list[0][self.env._ob_dim:]
37 |         return global_state
38 | 
39 |     def get_state_size(self):
40 |         state = self.get_state()
41 |         return len(state)
42 | 
43 |     def get_avail_actions(self):
44 |         obs_tuple = self.env.get_obs()
45 |         obs_list = list(obs_tuple)
46 |         result = [obs_list[i][1] for i in range(0, len(obs_list))]
47 |         return result
48 | 
49 |     def get_avail_agent_actions(self, agent_id):
50 |         all_masks = self.get_avail_actions()
51 |         result = all_masks[agent_id]
52 |         return result
53 | 
54 |     def get_total_actions(self):
55 |         return self.env.ac_dim
56 | 
57 |     def reset(self):
58 |         return self.env.reset()
59 | 
60 |     def render(self):
61 |         return None
62 | 
63 |     def close(self):
64 |         return
65 | 
66 |     def seed(self):
67 |         return 0
68 | 
69 |     def save_replay(self):
70 |         return
71 | 
72 |     def get_env_info(self):
73 |         env_info = {"state_shape": self.get_state_size(),
74 |                     "obs_shape": self.get_obs_size(),
75 |                     "n_actions": self.get_total_actions(),
76 |                     "n_agents": self.n_agents,
77 |                     "episode_limit": self.episode_limit}
78 |         return env_info
79 | 
80 |     def get_stats(self):
81 |         return {}
82 | 


--------------------------------------------------------------------------------
/PyMARL/src/envs/multicar_env.py:
--------------------------------------------------------------------------------
 1 | from .multiagentenv import MultiAgentEnv
 2 | from craft.goal_env import GoalEnv as Env
 3 | 
 4 | 
 5 | class MultiCarEnv(MultiAgentEnv):
 6 | 
 7 |     def __init__(self, **env_config):
 8 |         enable_render = env_config["render"] if "render" in env_config else False
 9 |         init_blueprint_path = list(env_config['init_blueprint_path'].split(','))
10 |         self.env = Env(enable_render, init_blueprint_path, env_config)
11 |         self.env.reset()
12 |         self.episode_limit = env_config["max_steps"] + 10
13 |         self.n_agents = self.env._blackboard.smartcar_num
14 |         return
15 | 
16 |     def step(self, actions):
17 |         obs, reward, done, info = self.env.step(actions)
18 |         return reward, done, info
19 | 
20 |     def get_obs(self):
21 |         obs_tuple = self.env.get_obs()
22 |         obs_list = list(obs_tuple)
23 |         result = [obs_list[i][0] for i in range(0, len(obs_list))]
24 |         return result
25 | 
26 |     def get_obs_agent(self, agent_id):
27 |         all_obs = self.get_obs()
28 |         return all_obs[agent_id]
29 | 
30 |     def get_obs_size(self):
31 |         return self.env.ob_dim
32 | 
33 |     def get_state(self):
34 |         obs_tuple = self.get_obs()
35 |         obs_list = list(obs_tuple)
36 |         global_state = obs_list[0][self.env._ob_dim:]
37 |         return global_state
38 | 
39 |     def get_state_size(self):
40 |         state = self.get_state()
41 |         return len(state)
42 | 
43 |     def get_avail_actions(self):
44 |         obs_tuple = self.env.get_obs()
45 |         obs_list = list(obs_tuple)
46 |         result = [obs_list[i][1] for i in range(0, len(obs_list))]
47 |         return result
48 | 
49 |     def get_avail_agent_actions(self, agent_id):
50 |         all_masks = self.get_avail_actions()
51 |         result = all_masks[agent_id]
52 |         return result
53 | 
54 |     def get_total_actions(self):
55 |         return self.env.ac_dim
56 | 
57 |     def reset(self):
58 |         return self.env.reset()
59 | 
60 |     def render(self):
61 |         return None
62 | 
63 |     def close(self):
64 |         return
65 | 
66 |     def seed(self):
67 |         return 0
68 | 
69 |     def save_replay(self):
70 |         return
71 | 
72 |     def get_env_info(self):
73 |         env_info = {"state_shape": self.get_state_size(),
74 |                     "obs_shape": self.get_obs_size(),
75 |                     "n_actions": self.get_total_actions(),
76 |                     "n_agents": self.n_agents,
77 |                     "episode_limit": self.episode_limit}
78 |         return env_info
79 | 
80 |     def get_stats(self):
81 |         return {}
82 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/mixers/qmix.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | 
 6 | 
 7 | class QMixer(nn.Module):
 8 |     def __init__(self, args):
 9 |         super(QMixer, self).__init__()
10 | 
11 |         self.args = args
12 |         self.n_agents = args.n_agents
13 |         self.state_dim = int(np.prod(args.state_shape))
14 | 
15 |         self.embed_dim = args.mixing_embed_dim
16 | 
17 |         if getattr(args, "hypernet_layers", 1) == 1:
18 |             self.hyper_w_1 = nn.Linear(self.state_dim, self.embed_dim * self.n_agents)
19 |             self.hyper_w_final = nn.Linear(self.state_dim, self.embed_dim)
20 |         elif getattr(args, "hypernet_layers", 1) == 2:
21 |             hypernet_embed = self.args.hypernet_embed
22 |             self.hyper_w_1 = nn.Sequential(nn.Linear(self.state_dim, hypernet_embed),
23 |                                            nn.ReLU(),
24 |                                            nn.Linear(hypernet_embed, self.embed_dim * self.n_agents))
25 |             self.hyper_w_final = nn.Sequential(nn.Linear(self.state_dim, hypernet_embed),
26 |                                            nn.ReLU(),
27 |                                            nn.Linear(hypernet_embed, self.embed_dim))
28 |         elif getattr(args, "hypernet_layers", 1) > 2:
29 |             raise Exception("Sorry >2 hypernet layers is not implemented!")
30 |         else:
31 |             raise Exception("Error setting number of hypernet layers.")
32 | 
33 |         # State dependent bias for hidden layer
34 |         self.hyper_b_1 = nn.Linear(self.state_dim, self.embed_dim)
35 | 
36 |         # V(s) instead of a bias for the last layers
37 |         self.V = nn.Sequential(nn.Linear(self.state_dim, self.embed_dim),
38 |                                nn.ReLU(),
39 |                                nn.Linear(self.embed_dim, 1))
40 | 
41 |     def forward(self, agent_qs, states):
42 |         bs = agent_qs.size(0)
43 |         states = states.reshape(-1, self.state_dim)
44 |         agent_qs = agent_qs.view(-1, 1, self.n_agents)
45 |         # First layer
46 |         w1 = th.abs(self.hyper_w_1(states))
47 |         b1 = self.hyper_b_1(states)
48 |         w1 = w1.view(-1, self.n_agents, self.embed_dim)
49 |         b1 = b1.view(-1, 1, self.embed_dim)
50 |         hidden = F.elu(th.bmm(agent_qs, w1) + b1)
51 |         # Second layer
52 |         w_final = th.abs(self.hyper_w_final(states))
53 |         w_final = w_final.view(-1, self.embed_dim, 1)
54 |         # State-dependent bias
55 |         v = self.V(states).view(-1, 1, 1)
56 |         # Compute final output
57 |         y = th.bmm(hidden, w_final) + v
58 |         # Reshape and return
59 |         q_tot = y.view(bs, -1, 1)
60 |         return q_tot
61 | 


--------------------------------------------------------------------------------
/PyMARL/src/components/action_selectors.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | from torch.distributions import Categorical
 3 | from .epsilon_schedules import DecayThenFlatSchedule
 4 | 
 5 | REGISTRY = {}
 6 | 
 7 | 
 8 | class MultinomialActionSelector():
 9 | 
10 |     def __init__(self, args):
11 |         self.args = args
12 | 
13 |         self.schedule = DecayThenFlatSchedule(args.epsilon_start, args.epsilon_finish, args.epsilon_anneal_time,
14 |                                               decay="linear")
15 |         self.epsilon = self.schedule.eval(0)
16 |         self.test_greedy = getattr(args, "test_greedy", True)
17 | 
18 |     def select_action(self, agent_inputs, avail_actions, t_env, test_mode=False):
19 |         masked_policies = agent_inputs.clone()
20 |         masked_policies[avail_actions == 0.0] = 0.0
21 | 
22 |         self.epsilon = self.schedule.eval(t_env)
23 | 
24 |         if test_mode and self.test_greedy:
25 |             picked_actions = masked_policies.max(dim=2)[1]
26 |         else:
27 |             picked_actions = Categorical(masked_policies).sample().long()
28 | 
29 |         return picked_actions
30 | 
31 | 
32 | REGISTRY["multinomial"] = MultinomialActionSelector
33 | 
34 | 
35 | class EpsilonGreedyActionSelector():
36 | 
37 |     def __init__(self, args):
38 |         self.args = args
39 | 
40 |         self.schedule = DecayThenFlatSchedule(args.epsilon_start, args.epsilon_finish, args.epsilon_anneal_time,
41 |                                               decay="linear")
42 |         self.epsilon = self.schedule.eval(0)
43 | 
44 |     def select_action(self, agent_inputs, avail_actions, t_env, test_mode=False):
45 | 
46 |         # fix for special bug 20220822
47 |         for i in range(0, len(avail_actions[0])):
48 |             # print(avail_actions[0][i])
49 |             if th.sum(avail_actions[0][i]) == 0:
50 |                 avail_actions[0][i][10] = 1
51 | 
52 |         # Assuming agent_inputs is a batch of Q-Values for each agent bav
53 |         self.epsilon = self.schedule.eval(t_env)
54 | 
55 |         if test_mode:
56 |             # Greedy action selection only
57 |             self.epsilon = 0.0
58 | 
59 |         # mask actions that are excluded from selection
60 |         masked_q_values = agent_inputs.clone()
61 |         masked_q_values[avail_actions == 0.0] = -float("inf")  # should never be selected!
62 | 
63 |         random_numbers = th.rand_like(agent_inputs[:, :, 0])
64 |         pick_random = (random_numbers < self.epsilon).long()
65 |         random_actions = Categorical(avail_actions.float()).sample().long()
66 | 
67 |         picked_actions = pick_random * random_actions + (1 - pick_random) * masked_q_values.max(dim=2)[1]
68 |         return picked_actions
69 | 
70 | 
71 | REGISTRY["epsilon_greedy"] = EpsilonGreedyActionSelector
72 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/critics/centralV.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class CentralVCritic(nn.Module):
 7 |     def __init__(self, scheme, args):
 8 |         super(CentralVCritic, self).__init__()
 9 | 
10 |         self.args = args
11 |         self.n_actions = args.n_actions
12 |         self.n_agents = args.n_agents
13 | 
14 |         input_shape = self._get_input_shape(scheme)
15 |         self.output_type = "v"
16 | 
17 |         # Set up network layers
18 |         self.fc1 = nn.Linear(input_shape, args.hidden_dim)
19 |         self.fc2 = nn.Linear(args.hidden_dim, args.hidden_dim)
20 |         self.fc3 = nn.Linear(args.hidden_dim, 1)
21 | 
22 |     def forward(self, batch, t=None):
23 |         inputs, bs, max_t = self._build_inputs(batch, t=t)
24 |         x = F.relu(self.fc1(inputs))
25 |         x = F.relu(self.fc2(x))
26 |         q = self.fc3(x)
27 |         return q
28 | 
29 |     def _build_inputs(self, batch, t=None):
30 |         bs = batch.batch_size
31 |         max_t = batch.max_seq_length if t is None else 1
32 |         ts = slice(None) if t is None else slice(t, t+1)
33 |         inputs = []
34 |         # state
35 |         inputs.append(batch["state"][:, ts].unsqueeze(2).repeat(1, 1, self.n_agents, 1))
36 | 
37 |         # observations
38 |         if self.args.obs_individual_obs:
39 |             inputs.append(batch["obs"][:, ts].view(bs, max_t, -1).unsqueeze(2).repeat(1, 1, self.n_agents, 1))
40 | 
41 |         # last actions
42 |         if self.args.obs_last_action:
43 |             if t == 0:
44 |                 inputs.append(th.zeros_like(batch["actions_onehot"][:, 0:1]).view(bs, max_t, 1, -1))
45 |             elif isinstance(t, int):
46 |                 inputs.append(batch["actions_onehot"][:, slice(t-1, t)].view(bs, max_t, 1, -1))
47 |             else:
48 |                 last_actions = th.cat([th.zeros_like(batch["actions_onehot"][:, 0:1]), batch["actions_onehot"][:, :-1]], dim=1)
49 |                 last_actions = last_actions.view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1)
50 |                 inputs.append(last_actions)
51 | 
52 |         inputs.append(th.eye(self.n_agents, device=batch.device).unsqueeze(0).unsqueeze(0).expand(bs, max_t, -1, -1))
53 | 
54 |         inputs = th.cat(inputs, dim=-1)
55 |         return inputs, bs, max_t
56 | 
57 |     def _get_input_shape(self, scheme):
58 |         # state
59 |         input_shape = scheme["state"]["vshape"]
60 |         # observations
61 |         if self.args.obs_individual_obs:
62 |             input_shape += scheme["obs"]["vshape"] * self.n_agents
63 |         # last actions
64 |         if self.args.obs_last_action:
65 |             input_shape += scheme["actions_onehot"]["vshape"][0] * self.n_agents
66 |         input_shape += self.n_agents
67 |         return input_shape


--------------------------------------------------------------------------------
/PyMARL/src/modules/critics/coma.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class COMACritic(nn.Module):
 7 |     def __init__(self, scheme, args):
 8 |         super(COMACritic, self).__init__()
 9 | 
10 |         self.args = args
11 |         self.n_actions = args.n_actions
12 |         self.n_agents = args.n_agents
13 | 
14 |         input_shape = self._get_input_shape(scheme)
15 |         self.output_type = "q"
16 | 
17 |         # Set up network layers
18 |         self.fc1 = nn.Linear(input_shape, 128)
19 |         self.fc2 = nn.Linear(128, 128)
20 |         self.fc3 = nn.Linear(128, self.n_actions)
21 | 
22 |     def forward(self, batch, t=None):
23 |         inputs = self._build_inputs(batch, t=t)
24 |         x = F.relu(self.fc1(inputs))
25 |         x = F.relu(self.fc2(x))
26 |         q = self.fc3(x)
27 |         return q
28 | 
29 |     def _build_inputs(self, batch, t=None):
30 |         bs = batch.batch_size
31 |         max_t = batch.max_seq_length if t is None else 1
32 |         ts = slice(None) if t is None else slice(t, t+1)
33 |         inputs = []
34 |         # state
35 |         inputs.append(batch["state"][:, ts].unsqueeze(2).repeat(1, 1, self.n_agents, 1))
36 | 
37 |         # observation
38 |         inputs.append(batch["obs"][:, ts])
39 | 
40 |         # actions (masked out by agent)
41 |         actions = batch["actions_onehot"][:, ts].view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1)
42 |         agent_mask = (1 - th.eye(self.n_agents, device=batch.device))
43 |         agent_mask = agent_mask.view(-1, 1).repeat(1, self.n_actions).view(self.n_agents, -1)
44 |         inputs.append(actions * agent_mask.unsqueeze(0).unsqueeze(0))
45 | 
46 |         # last actions
47 |         if t == 0:
48 |             inputs.append(th.zeros_like(batch["actions_onehot"][:, 0:1]).view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1))
49 |         elif isinstance(t, int):
50 |             inputs.append(batch["actions_onehot"][:, slice(t-1, t)].view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1))
51 |         else:
52 |             last_actions = th.cat([th.zeros_like(batch["actions_onehot"][:, 0:1]), batch["actions_onehot"][:, :-1]], dim=1)
53 |             last_actions = last_actions.view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1)
54 |             inputs.append(last_actions)
55 | 
56 |         inputs.append(th.eye(self.n_agents, device=batch.device).unsqueeze(0).unsqueeze(0).expand(bs, max_t, -1, -1))
57 | 
58 |         inputs = th.cat([x.reshape(bs, max_t, self.n_agents, -1) for x in inputs], dim=-1)
59 |         return inputs
60 | 
61 |     def _get_input_shape(self, scheme):
62 |         # state
63 |         input_shape = scheme["state"]["vshape"]
64 |         # observation
65 |         input_shape += scheme["obs"]["vshape"]
66 |         # actions and last actions
67 |         input_shape += scheme["actions_onehot"]["vshape"][0] * self.n_agents * 2
68 |         # agent id
69 |         input_shape += self.n_agents
70 |         return input_shape


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/slope/slope.urdf.xacro:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- This URDF was automatically created by SolidWorks to URDF Exporter! Originally created by Stephen Brawner (brawner@gmail.com)
 3 |      Commit Version: 1.6.0-1-g15f4949  Build Version: 1.6.7594.29634
 4 |      For more information, please see http://wiki.ros.org/sw_urdf_exporter -->
 5 | <robot name="slope"
 6 |   xmlns:xacro="http://ros.org/wiki/xacro">
 7 |   <link name="slope_base">
 8 |     <inertial>
 9 |       <origin xyz="0 -0.16 0" rpy="0 0 0" />
10 |       <mass value="1" />
11 |       <inertia ixx="0.013609" ixy="5.3826E-06" ixz="1.1115E-05" iyy="0.022909" iyz="-0.0034102" izz="0.013642" />
12 |     </inertial>
13 |     <visual>
14 |       <origin xyz="0 0 0" rpy="0 0 0" />
15 |       <geometry>
16 |         <mesh filename="package://slope/meshes/slope_base.STL" />
17 |       </geometry>
18 |       <material name="">
19 |         <color rgba="0.75294 0.75294 0.75294 1" />
20 |       </material>
21 |     </visual>
22 |     <collision>
23 |       <origin xyz="0 -0.16 0" rpy="0 0 1.57" />
24 |       <geometry>
25 |         <mesh filename="package://slope/meshes/slope_base_collision.STL" />
26 |       </geometry>
27 |     </collision>
28 |   </link>
29 |   <link name="slope_end">
30 |     <inertial>
31 |       <origin xyz="0 0 0" rpy="0 0 0" />
32 |       <mass value="0.1" />
33 |       <inertia ixx="0.01" ixy="0" ixz="0" iyy="0.01" iyz="0.0019145" izz="0.01" />
34 |     </inertial>
35 |     <visual>
36 |       <origin xyz="0.02 0 0.005" rpy="0.085 0 0" />
37 |       <geometry>
38 |         <mesh filename="package://slope/meshes/slope_end.STL" />
39 |       </geometry>
40 |       <material name="">
41 |         <color rgba="0.75294 0.75294 0.75294 1" />
42 |       </material>
43 |     </visual>
44 |     <collision>
45 |       <origin xyz="0 0.16 -0.0925" rpy="0 0 1.57" />
46 |       <geometry>
47 |         <mesh filename="package://slope/meshes/slope_end_collision.STL" />
48 |       </geometry>
49 |     </collision>
50 |   </link>
51 |   <joint name="rot" type="revolute">
52 |     <origin xyz="0 0 0.095" rpy="3.14 0 0" />
53 |     <parent link="slope_base" />
54 |     <child link="slope_end" />
55 |     <limit upper="0" lower="-3.14" velocity="0.3" effort="10"/>
56 |     <axis xyz="1 0 0" />
57 |   </joint>
58 | 
59 |   <transmission name="rot_tran">
60 |     <type>transmission_interface/SimpleTransmission</type>
61 |     <joint name="rot">
62 |       <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
63 |     </joint>
64 |     <actuator name="rot_motor">
65 |       <hardwareInterface>hardware_interface/EffortJointInterface</hardwareInterface>
66 |       <mechanicalReduction>1</mechanicalReduction>
67 |     </actuator>
68 |   </transmission>
69 | 
70 |   <link name="base_link">
71 |   </link>
72 |   <joint name="floor_joint" type="fixed">
73 |     <origin xyz="0.16 0 0" rpy="0 0 -1.57"/>
74 |     <parent link="base_link"/>
75 |     <child link="slope_base" />
76 |   </joint>
77 |   <!-- Bring in simulation data for Gazebo. -->
78 |   <xacro:include filename="$(find slope)/urdf/slope.gazebo" />
79 | 
80 | </robot>
81 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/critics/centralV_ns.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from modules.critics.mlp import MLP
 5 | 
 6 | 
 7 | class CentralVCriticNS(nn.Module):
 8 |     def __init__(self, scheme, args):
 9 |         super(CentralVCriticNS, self).__init__()
10 | 
11 |         self.args = args
12 |         self.n_actions = args.n_actions
13 |         self.n_agents = args.n_agents
14 | 
15 |         input_shape = self._get_input_shape(scheme)
16 |         self.output_type = "v"
17 | 
18 |         # Set up network layers
19 |         self.critics = [MLP(input_shape, args.hidden_dim, 1) for _ in range(self.n_agents)]
20 | 
21 |     def forward(self, batch, t=None):
22 |         inputs, bs, max_t = self._build_inputs(batch, t=t)
23 |         qs = []
24 |         for i in range(self.n_agents):
25 |             q = self.critics[i](inputs)
26 |             qs.append(q.view(bs, max_t, 1, -1))
27 |         q = th.cat(qs, dim=2)
28 |         return q
29 | 
30 |     def _build_inputs(self, batch, t=None):
31 |         bs = batch.batch_size
32 |         max_t = batch.max_seq_length if t is None else 1
33 |         ts = slice(None) if t is None else slice(t, t+1)
34 |         inputs = []
35 |         # state
36 |         inputs.append(batch["state"][:, ts])
37 | 
38 |         # observations
39 |         if self.args.obs_individual_obs:
40 |             inputs.append(batch["obs"][:, ts].view(bs, max_t, -1))
41 | 
42 |         if self.args.obs_last_action:
43 |             # last actions
44 |             if t == 0:
45 |                 inputs.append(th.zeros_like(batch["actions_onehot"][:, 0:1]).view(bs, max_t, 1, -1))
46 |             elif isinstance(t, int):
47 |                 inputs.append(batch["actions_onehot"][:, slice(t-1, t)].view(bs, max_t, 1, -1))
48 |             else:
49 |                 last_actions = th.cat([th.zeros_like(batch["actions_onehot"][:, 0:1]), batch["actions_onehot"][:, :-1]], dim=1)
50 |                 last_actions = last_actions.view(bs, max_t, 1, -1)
51 |                 inputs.append(last_actions)
52 | 
53 |         inputs = th.cat([x.reshape(bs * max_t, -1) for x in inputs], dim=1)
54 |         return inputs, bs, max_t
55 | 
56 |     def _get_input_shape(self, scheme):
57 |         # state
58 |         input_shape = scheme["state"]["vshape"]
59 |         # observations
60 |         if self.args.obs_individual_obs:
61 |             input_shape += scheme["obs"]["vshape"]
62 |         # last actions
63 |         if self.args.obs_last_action:
64 |             input_shape += scheme["actions_onehot"]["vshape"][0] * self.n_agents
65 | 
66 |         return input_shape
67 | 
68 |     def parameters(self):
69 |         params = list(self.critics[0].parameters())
70 |         for i in range(1, self.n_agents):
71 |             params += list(self.critics[i].parameters())
72 |         return params
73 | 
74 |     def state_dict(self):
75 |         return [a.state_dict() for a in self.critics]
76 | 
77 |     def load_state_dict(self, state_dict):
78 |         for i, a in enumerate(self.critics):
79 |             a.load_state_dict(state_dict[i])
80 | 
81 |     def cuda(self):
82 |         for c in self.critics:
83 |             c.cuda()
84 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/bread_first_search.py:
--------------------------------------------------------------------------------
 1 | from collections import deque
 2 | import numpy as np
 3 | from craftenv.sim_envs.pybullet_envs.craft.grid_objs import (
 4 |     Air, Block, Flag, FoldedSlope, FoldedSlopeGear, UnfoldedSlopeBody, UnfoldedSlopeFoot
 5 | )
 6 | from craftenv.sim_envs.pybullet_envs.craft.utils import next_step
 7 | 
 8 | 
 9 | class BreadthFirstSearch:
10 | 
11 |     def __init__(self, blackboard):
12 |         """
13 |         Initialize grid map for bfs search
14 |         """
15 |         self._blackboard = blackboard
16 |         self.motion = self.get_motion_model()
17 |         self.area_size = blackboard.area_size
18 | 
19 |     def calc_reachable_space(self, x, y, z):
20 |         grid = self._blackboard.grid
21 |         maxx = self._blackboard.area_size[0] + 1
22 |         minx = 0
23 |         maxy = self._blackboard.area_size[1] + 1
24 |         miny = 0
25 |         maxz = self._blackboard.area_size[2]
26 |         minz = 1
27 |         visited = np.zeros((maxx, maxy, maxz + 1))
28 | 
29 |         q = deque()
30 |         q.append((x, y, z))
31 |         visited[x][y][z] = 1
32 |         cnt = 1
33 | 
34 |         while q:
35 |             x, y, z = q.popleft()
36 | 
37 |             # expand_grid search grid based on motion model
38 |             for i, _ in enumerate(self.motion):
39 |                 n_x, n_y, n_z = x + self.motion[i][0], y + self.motion[i][1], z
40 | 
41 |                 if minx <= n_x < maxx and \
42 |                    miny <= n_y < maxy and \
43 |                    minz <= n_z <= maxz:
44 |                     n_obj = grid[n_x][n_y][n_z]
45 |                     blow_n_obj = grid[n_x][n_y][n_z - 1]
46 | 
47 |                     if visited[n_x][n_y][n_z] == 0:
48 |                         if blow_n_obj.can_stand and isinstance(n_obj, (Air, Block, Flag, FoldedSlopeGear)):
49 |                             q.append((n_x, n_y, n_z))
50 |                             visited[n_x][n_y][n_z] = 1
51 |                             cnt += 1
52 | 
53 |                         elif isinstance(n_obj, UnfoldedSlopeFoot):
54 |                             yaw = (n_obj.yaw + 2) % 4
55 |                             nn_x, nn_y = next_step(n_x, n_y, yaw * np.pi / 2)
56 |                             q.append((n_x, n_y, n_z))
57 |                             visited[n_x][n_y][n_z] = 1
58 |                             cnt += 1
59 |                             if minx <= nn_x < maxx and \
60 |                                miny <= nn_y < maxy and \
61 |                                minz <= n_z + 1 <= maxz:
62 |                                 q.append((nn_x, nn_y, n_z + 1))
63 |                                 visited[nn_x][nn_y][n_z + 1] = 1
64 |                                 cnt += 1
65 | 
66 |                         elif isinstance(n_obj, (FoldedSlope, UnfoldedSlopeBody)):
67 |                             pre_yaw = (n_obj.yaw + 2) % 4
68 |                             pre_x, pre_y = next_step(n_x, n_y, pre_yaw * np.pi / 2)
69 |                             if pre_x == x and pre_y == y:
70 |                                 visited[n_x][n_y][n_z] = 1
71 |                                 cnt += 1
72 | 
73 |         return visited
74 | 
75 |     @staticmethod
76 |     def get_motion_model():
77 |         return np.array([[1, 0], [0, 1], [-1, 0], [0, -1]])
78 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The CraftEnv Environment
 2 | 
 3 | CraftEnv is a flexible Multi-Agent Reinforcement Learning (MARL) environment for Collective Robotic Construction (CRC) systems, written in Python.
 4 | 
 5 | The CraftEnv paper is accepted by the 22nd International Conference on Autonomous Agents and Multiagent Systems (AAMAS) 2023. 
 6 | 
 7 | ## Installation instructions
 8 | 
 9 | To install the codebase, please clone this repo and install the `CraftEnv/setup.py` via `pip install -e .`. The file can be used to install the necessary packages into a virtual environment. 
10 | We use the [PyMARL](https://github.com/oxwhirl/pymarl) and the [EPyMARL](https://github.com/uoe-agents/epymarl) framework for the deep multi-agent reinforcement learning algorithms.
11 | 
12 | ## Run an experiment
13 | 
14 | ```shell
15 | cd PyMARL
16 | python src/main.py --config=qmix --env-config=multicar
17 | ```
18 | 
19 | The config files act as defaults for an algorithm or environment.
20 | 
21 | They are all located in `src/config`.
22 | `--config` refers to the config files in `src/config/algs`
23 | `--env-config` refers to the config files in `src/config/envs`
24 | 
25 | Note that the `multicar` environment corresponds to the goal-conditioned tasks, the `multicar2` environment corresponds to the free building tasks, and the `flag` environment corresponds to the breaking barrier tasks.
26 | 
27 | All results will be stored in the `Results` folder.
28 | 
29 | Currently, supported algos and environments are:
30 | 
31 | - IQL, MAPPO, QMIX, QTRAN, COMA, VDN
32 | - multicar, multicar2, goal
33 | 
34 | ## Saving and loading learnt models
35 | 
36 | ### Saving models
37 | 
38 | You can save the learnt models to disk by setting `save_model = True`, which is set to `False` by default. The frequency of saving models can be adjusted using `save_model_interval` configuration. Models will be saved in the result directory, under the folder called *models*. The directory corresponding each run will contain models saved throughout the experiment, each within a folder corresponding to the number of timesteps passed since starting the learning process.
39 | 
40 | ### Loading models
41 | 
42 | Learnt models can be loaded using the `checkpoint_path` parameter, after which the learning will proceed from the corresponding timestep.
43 | 
44 | ## Citation 
45 | ```
46 | @inproceedings{zhao2023craftenv,  
47 |   title={CraftEnv: A Flexible Collective Robotic Construction Environment for Multi-Agent Reinforcement Learning},  
48 |   author={Zhao, Rui and Liu, Xu and Zhang, Yizheng and Li, Minghao and Zhou, Cheng and Li, Shuai and Han, Lei},  
49 |   booktitle={2023 International Joint Conference on Autonomous Agents and Multi-agent Systems (AAMAS)},  
50 |   year={2023},  
51 | }
52 | ```
53 | 
54 | ## License
55 | 
56 | Use MIT license (see LICENSE.md) except for third-party softwares. They are all open-source softwares and have their own license types.
57 |  
58 | ## Disclaimer
59 |  
60 | This is not an officially supported Tencent product. The code and data in this repository are for research purpose only. No representation or warranty whatsoever, expressed or implied, is made as to its accuracy, reliability or completeness. We assume no liability and are not responsible for any misuse or damage caused by the code and data. Your use of the code and data are subject to applicable laws and your use of them is at your own risk.
61 | 
62 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/grid_objs.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from enum import IntEnum, unique, auto
  3 | 
  4 | 
  5 | @unique
  6 | class ObjType(IntEnum):
  7 |     Undefined = -1
  8 |     Air = auto()
  9 |     Ground = auto()
 10 |     Wall = auto()
 11 |     Block = auto()
 12 |     Flag = auto()
 13 |     FoldedSlope = auto()
 14 |     FoldedSlopeGear = auto()
 15 |     UnfoldedSlopeBody = auto()
 16 |     UnfoldedSlopeFoot = auto()
 17 | 
 18 | 
 19 | class WorldObj(ABC):
 20 |     """
 21 |     Base class for grid world objects
 22 |     """
 23 | 
 24 |     @abstractmethod
 25 |     def __init__(self):
 26 |         self.can_lift = False
 27 |         self.can_fold = False
 28 |         self.can_unfold = False
 29 |         self.can_stand = False
 30 |         self.near_unfold_slope_body = False
 31 |         self.near_blow_unfold_slope_foot = False
 32 |         """
 33 |         the obj id on the WorldObj, -1 means there is nothing, -2 means there is something,
 34 |         0, 1, 2... means there is a smartcar on it and the number represent the smartcar's id
 35 |         """
 36 |         self.obj_on_it = -1
 37 |         self.type = ObjType.Undefined
 38 | 
 39 | 
 40 | class Air(WorldObj):
 41 |     def __init__(self):
 42 |         super().__init__()
 43 |         self.can_lift = False
 44 |         self.can_stand = False
 45 |         self.type = ObjType.Air
 46 | 
 47 | 
 48 | class Ground(WorldObj):
 49 |     def __init__(self):
 50 |         super().__init__()
 51 |         self.can_stand = True
 52 |         self.type = ObjType.Ground
 53 | 
 54 | 
 55 | class Wall(WorldObj):
 56 |     def __init__(self):
 57 |         super().__init__()
 58 |         self.type = ObjType.Wall
 59 |         self.can_lift = False
 60 |         self.can_stand = False
 61 | 
 62 | 
 63 | class Block(WorldObj):
 64 |     def __init__(self):
 65 |         super().__init__()
 66 |         self.can_lift = True
 67 |         self.can_stand = True
 68 |         self.type = ObjType.Block
 69 | 
 70 | 
 71 | class Flag(WorldObj):
 72 |     def __init__(self):
 73 |         super().__init__()
 74 |         self.can_lift = True
 75 |         self.type = ObjType.Flag
 76 | 
 77 | 
 78 | class FoldedSlope(WorldObj):
 79 |     """
 80 |      ↑        1
 81 |     ← →  ↔  2   0
 82 |      ↓        3
 83 | 
 84 |     body-foot
 85 |     yaw = 0
 86 | 
 87 |     foot
 88 |     |
 89 |     body
 90 |     yaw = np.pi * 0.5
 91 | 
 92 |     foot-body
 93 |     yaw = np.pi * 1
 94 | 
 95 |     body
 96 |     |
 97 |     foot
 98 |     yaw = np.pi * 1.5
 99 |     """
100 | 
101 |     def __init__(self, yaw):
102 |         super().__init__()
103 |         self.can_lift = True
104 |         self.yaw = yaw
105 |         self.type = ObjType.FoldedSlope
106 | 
107 | 
108 | class FoldedSlopeGear(WorldObj):
109 |     def __init__(self, yaw):
110 |         super().__init__()
111 |         self.yaw = yaw
112 |         self.type = ObjType.FoldedSlopeGear
113 | 
114 | 
115 | class UnfoldedSlopeBody(WorldObj):
116 |     """
117 |      ↑        1
118 |     ← →  ↔  2   0
119 |      ↓        3
120 |     """
121 | 
122 |     def __init__(self, yaw):
123 |         super().__init__()
124 |         self.yaw = yaw
125 |         self.type = ObjType.UnfoldedSlopeBody
126 | 
127 | 
128 | class UnfoldedSlopeFoot(WorldObj):
129 |     """
130 |      ↑        1
131 |     ← →  ↔  2   0
132 |      ↓        3
133 |     """
134 | 
135 |     def __init__(self, yaw):
136 |         super().__init__()
137 |         self.yaw = yaw
138 |         self.type = ObjType.UnfoldedSlopeFoot
139 | 


--------------------------------------------------------------------------------
/PyMARL/src/main.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import collections
  4 | from os.path import dirname, abspath
  5 | from copy import deepcopy
  6 | from sacred import Experiment, SETTINGS
  7 | from sacred.observers import FileStorageObserver
  8 | from sacred.utils import apply_backspaces_and_linefeeds
  9 | import sys
 10 | import torch as th
 11 | from utils.logging import get_logger
 12 | import yaml
 13 | 
 14 | from run import run
 15 | 
 16 | SETTINGS['CAPTURE_MODE'] = "fd" # set to "no" if you want to see stdout/stderr in console
 17 | logger = get_logger()
 18 | 
 19 | ex = Experiment("pymarl")
 20 | ex.logger = logger
 21 | ex.captured_out_filter = apply_backspaces_and_linefeeds
 22 | 
 23 | results_path = os.path.join(dirname(dirname(abspath(__file__))), "results")
 24 | 
 25 | 
 26 | @ex.main
 27 | def my_main(_run, _config, _log):
 28 |     # Setting the random seed throughout the modules
 29 |     config = config_copy(_config)
 30 |     np.random.seed(config["seed"])
 31 |     th.manual_seed(config["seed"])
 32 |     config['env_args']['seed'] = config["seed"]
 33 | 
 34 |     # run the framework
 35 |     run(_run, config, _log)
 36 | 
 37 | 
 38 | def _get_config(params, arg_name, subfolder):
 39 |     config_name = None
 40 |     for _i, _v in enumerate(params):
 41 |         if _v.split("=")[0] == arg_name:
 42 |             config_name = _v.split("=")[1]
 43 |             del params[_i]
 44 |             break
 45 | 
 46 |     if config_name is not None:
 47 |         with open(os.path.join(os.path.dirname(__file__), "config", subfolder, "{}.yaml".format(config_name)), "r") as f:
 48 |             try:
 49 |                 config_dict = yaml.load(f)
 50 |             except yaml.YAMLError as exc:
 51 |                 assert False, "{}.yaml error: {}".format(config_name, exc)
 52 |         return config_dict
 53 | 
 54 | 
 55 | def recursive_dict_update(d, u):
 56 |     for k, v in u.items():
 57 |         if isinstance(v, collections.Mapping):
 58 |             d[k] = recursive_dict_update(d.get(k, {}), v)
 59 |         else:
 60 |             d[k] = v
 61 |     return d
 62 | 
 63 | 
 64 | def config_copy(config):
 65 |     if isinstance(config, dict):
 66 |         return {k: config_copy(v) for k, v in config.items()}
 67 |     elif isinstance(config, list):
 68 |         return [config_copy(v) for v in config]
 69 |     else:
 70 |         return deepcopy(config)
 71 | 
 72 | 
 73 | if __name__ == '__main__':
 74 |     params = deepcopy(sys.argv)
 75 | 
 76 |     # Get the defaults from default.yaml
 77 |     with open(os.path.join(os.path.dirname(__file__), "config", "default.yaml"), "r") as f:
 78 |         try:
 79 |             config_dict = yaml.load(f)
 80 |         except yaml.YAMLError as exc:
 81 |             assert False, "default.yaml error: {}".format(exc)
 82 | 
 83 |     # Load algorithm and env base configs
 84 |     env_config = _get_config(params, "--env-config", "envs")
 85 |     alg_config = _get_config(params, "--config", "algs")
 86 |     # config_dict = {**config_dict, **env_config, **alg_config}
 87 |     config_dict = recursive_dict_update(config_dict, env_config)
 88 |     config_dict = recursive_dict_update(config_dict, alg_config)
 89 | 
 90 |     # now add all the config to sacred
 91 |     ex.add_config(config_dict)
 92 | 
 93 |     # Save to disk by default for sacred
 94 |     logger.info("Saving to FileStorageObserver in results/sacred.")
 95 |     file_obs_path = os.path.join(results_path, "sacred")
 96 |     ex.observers.append(FileStorageObserver.create(file_obs_path))
 97 | 
 98 |     ex.run_commandline(params)
 99 | 
100 | 


--------------------------------------------------------------------------------
/environment.yaml:
--------------------------------------------------------------------------------
  1 | name: craft
  2 | channels:
  3 |   - pytorch
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=main
  7 |   - _openmp_mutex=5.1=1_gnu
  8 |   - blas=1.0=mkl
  9 |   - bzip2=1.0.8=h7b6447c_0
 10 |   - ca-certificates=2022.10.11=h06a4308_0
 11 |   - certifi=2021.5.30=py36h06a4308_0
 12 |   - cpuonly=2.0=0
 13 |   - dataclasses=0.8=pyh4f3eec9_6
 14 |   - ffmpeg=4.3=hf484d3e_0
 15 |   - freetype=2.12.1=h4a9f257_0
 16 |   - gmp=6.2.1=h295c915_3
 17 |   - gnutls=3.6.15=he1e5248_0
 18 |   - intel-openmp=2022.1.0=h9e868ea_3769
 19 |   - jpeg=9e=h7f8727e_0
 20 |   - lame=3.100=h7b6447c_0
 21 |   - lcms2=2.12=h3be6417_0
 22 |   - ld_impl_linux-64=2.38=h1181459_1
 23 |   - lerc=3.0=h295c915_0
 24 |   - libdeflate=1.8=h7f8727e_5
 25 |   - libffi=3.3=he6710b0_2
 26 |   - libgcc-ng=11.2.0=h1234567_1
 27 |   - libgomp=11.2.0=h1234567_1
 28 |   - libiconv=1.16=h7f8727e_2
 29 |   - libidn2=2.3.2=h7f8727e_0
 30 |   - libpng=1.6.37=hbc83047_0
 31 |   - libstdcxx-ng=11.2.0=h1234567_1
 32 |   - libtasn1=4.16.0=h27cfd23_0
 33 |   - libtiff=4.4.0=hecacb30_0
 34 |   - libunistring=0.9.10=h27cfd23_0
 35 |   - libuv=1.40.0=h7b6447c_0
 36 |   - libwebp-base=1.2.4=h5eee18b_0
 37 |   - lz4-c=1.9.3=h295c915_1
 38 |   - mkl=2020.2=256
 39 |   - mkl-service=2.3.0=py36he8ac12f_0
 40 |   - mkl_fft=1.3.0=py36h54f3939_0
 41 |   - mkl_random=1.1.1=py36h0573a6f_0
 42 |   - ncurses=6.3=h5eee18b_3
 43 |   - nettle=3.7.3=hbbd107a_1
 44 |   - numpy=1.19.2=py36h54aff64_0
 45 |   - numpy-base=1.19.2=py36hfa32c7d_0
 46 |   - olefile=0.46=py36_0
 47 |   - openh264=2.1.1=h4ff587b_0
 48 |   - openjpeg=2.4.0=h3ad879b_0
 49 |   - openssl=1.1.1q=h7f8727e_0
 50 |   - pillow=8.3.1=py36h2c7a002_0
 51 |   - pip=21.2.2=py36h06a4308_0
 52 |   - python=3.6.13=h12debd9_1
 53 |   - pytorch=1.10.2=py3.6_cpu_0
 54 |   - pytorch-mutex=1.0=cpu
 55 |   - readline=8.2=h5eee18b_0
 56 |   - setuptools=58.0.4=py36h06a4308_0
 57 |   - six=1.16.0=pyhd3eb1b0_1
 58 |   - sqlite=3.39.3=h5082296_0
 59 |   - tk=8.6.12=h1ccaba5_0
 60 |   - torchaudio=0.10.2=py36_cpu
 61 |   - torchvision=0.11.3=py36_cpu
 62 |   - typing_extensions=4.1.1=pyh06a4308_0
 63 |   - wheel=0.37.1=pyhd3eb1b0_0
 64 |   - xz=5.2.6=h5eee18b_0
 65 |   - zlib=1.2.13=h5eee18b_0
 66 |   - zstd=1.5.2=ha4553b6_0
 67 |   - pip:
 68 |     - absl-py==1.3.0
 69 |     - charset-normalizer==2.0.12
 70 |     - cloudpickle==2.2.0
 71 |     - cycler==0.11.0
 72 |     - deepdiff==5.7.0
 73 |     - dm-env==1.5
 74 |     - dm-env-rpc==1.1.0
 75 |     - dm-tree==0.1.7
 76 |     - docopt==0.6.2
 77 |     - enum34==1.1.10
 78 |     - googleapis-common-protos==1.56.3
 79 |     - grpcio==1.48.2
 80 |     - gym==0.21.0
 81 |     - idna==3.4
 82 |     - immutabledict==2.2.1
 83 |     - importlib-metadata==4.8.3
 84 |     - jsonpickle==0.9.6
 85 |     - kiwisolver==1.3.1
 86 |     - matplotlib==3.3.4
 87 |     - mock==4.0.3
 88 |     - mpi4py==3.0.3
 89 |     - mpyq==0.2.5
 90 |     - munch==2.5.0
 91 |     - ordered-set==4.0.2
 92 |     - pandas==1.1.5
 93 |     - portpicker==1.5.2
 94 |     - protobuf==3.19.5
 95 |     - psutil==5.9.3
 96 |     - pybullet==3.2.5
 97 |     - pygame==2.1.2
 98 |     - pyparsing==3.0.9
 99 |     - pysc2==4.0.0
100 |     - python-dateutil==2.8.2
101 |     - pytz==2022.6
102 |     - pyyaml==3.13
103 |     - requests==2.27.1
104 |     - s2clientprotocol==5.0.10.88500.0
105 |     - s2protocol==5.0.10.88500.0
106 |     - sacred==0.7.2
107 |     - scipy==1.5.4
108 |     - sk-video==1.1.10
109 |     - smac==1.0.0
110 |     - urllib3==1.26.12
111 |     - websocket-client==1.3.1
112 |     - wrapt==1.14.1
113 |     - zipp==3.6.0
114 | prefix: /home/droid/anaconda3/envs/craft2
115 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/critics/coma_ns.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from modules.critics.mlp import MLP
 5 | 
 6 | 
 7 | class COMACriticNS(nn.Module):
 8 |     def __init__(self, scheme, args):
 9 |         super(COMACriticNS, self).__init__()
10 | 
11 |         self.args = args
12 |         self.n_actions = args.n_actions
13 |         self.n_agents = args.n_agents
14 | 
15 |         input_shape = self._get_input_shape(scheme)
16 |         self.output_type = "q"
17 | 
18 |         # Set up network layers
19 |         self.critics = [MLP(input_shape, args.hidden_dim, self.n_actions) for _ in range(self.n_agents)]
20 | 
21 |     def forward(self, batch, t=None):
22 |         inputs = self._build_inputs(batch, t=t)
23 |         qs = []
24 |         for i in range(self.n_agents):
25 |             q = self.critics[i](inputs[:, :, i]).unsqueeze(2)
26 |             qs.append(q)
27 |         return th.cat(qs, dim=2)
28 | 
29 |     def _build_inputs(self, batch, t=None):
30 |         bs = batch.batch_size
31 |         max_t = batch.max_seq_length if t is None else 1
32 |         ts = slice(None) if t is None else slice(t, t+1)
33 |         inputs = []
34 |         # state
35 |         inputs.append(batch["state"][:, ts].unsqueeze(2).repeat(1, 1, self.n_agents, 1))
36 | 
37 |         # observation
38 |         if self.args.obs_individual_obs:
39 |             inputs.append(batch["obs"][:, ts])
40 | 
41 |         # actions (masked out by agent)
42 |         actions = batch["actions_onehot"][:, ts].view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1)
43 |         agent_mask = (1 - th.eye(self.n_agents, device=batch.device))
44 |         agent_mask = agent_mask.view(-1, 1).repeat(1, self.n_actions).view(self.n_agents, -1)
45 |         inputs.append(actions * agent_mask.unsqueeze(0).unsqueeze(0))
46 | 
47 |         # last actions
48 |         if self.args.obs_last_action:
49 |             if t == 0:
50 |                 inputs.append(th.zeros_like(batch["actions_onehot"][:, 0:1]).view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1))
51 |             elif isinstance(t, int):
52 |                 inputs.append(batch["actions_onehot"][:, slice(t-1, t)].view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1))
53 |             else:
54 |                 last_actions = th.cat([th.zeros_like(batch["actions_onehot"][:, 0:1]), batch["actions_onehot"][:, :-1]], dim=1)
55 |                 last_actions = last_actions.view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1)
56 |                 inputs.append(last_actions)
57 | 
58 |         inputs = th.cat([x.reshape(bs, max_t, self.n_agents, -1) for x in inputs], dim=-1)
59 |         return inputs
60 | 
61 |     def _get_input_shape(self, scheme):
62 |         # state
63 |         input_shape = scheme["state"]["vshape"]
64 |         # observation
65 |         if self.args.obs_individual_obs:
66 |             input_shape += scheme["obs"]["vshape"]
67 | 
68 |         # actions
69 |         input_shape += scheme["actions_onehot"]["vshape"][0] * self.n_agents
70 | 
71 |         # last action
72 |         if self.args.obs_last_action:
73 |             input_shape += scheme["actions_onehot"]["vshape"][0] * self.n_agents
74 |         # agent id
75 |         # input_shape += self.n_agents
76 |         return input_shape
77 | 
78 |     def parameters(self):
79 |         params = list(self.critics[0].parameters())
80 |         for i in range(1, self.n_agents):
81 |             params += list(self.critics[i].parameters())
82 |         return params
83 | 
84 |     def state_dict(self):
85 |         return [a.state_dict() for a in self.critics]
86 | 
87 |     def load_state_dict(self, state_dict):
88 |         for i, a in enumerate(self.critics):
89 |             a.load_state_dict(state_dict[i])
90 | 
91 |     def cuda(self):
92 |         for c in self.critics:
93 |             c.cuda()


--------------------------------------------------------------------------------
/CraftEnv/src/craft/blackboard.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | import numpy as np
  4 | import yaml
  5 | 
  6 | from .grid_objs import Air, Ground, ObjType, Wall
  7 | from .utils import Direction
  8 | 
  9 | 
 10 | class Point:
 11 |     def __init__(self, x, y, z=1):
 12 |         self.x = x
 13 |         self.y = y
 14 |         self.z = z
 15 | 
 16 |     def __add__(self, other):
 17 |         return Point(self.x + other, self.y + other)
 18 | 
 19 |     def __mul__(self, other):
 20 |         return Point(self.x * other, self.y * other)
 21 | 
 22 |     def __hash__(self):
 23 |         return 1
 24 | 
 25 |     def __eq__(self, other):
 26 |         return self.x == other.x and self.y == other.y and self.z == other.z
 27 | 
 28 |     def __repr__(self):
 29 |         return f"Point({self.x}, {self.y}, {self.z})"
 30 | 
 31 | 
 32 | class Blackboard:
 33 |     BLOCK_LENGTH = 0.3225
 34 |     BLOCK_HEIGHT = 0.155 + 0.03
 35 |     SMARTCAR_LENGTH = 0.155
 36 |     SMARTCAR_WIDTH = 0.155
 37 |     SMARTCAR_HEIGHT = 0.155
 38 | 
 39 |     def __init__(self, blueprint_path_list):
 40 |         """
 41 |         ^ y
 42 |         |
 43 |         |
 44 |         |
 45 |         o ----------> x
 46 |         world coordinate
 47 |         """
 48 |         self.spawn_point_set = set()
 49 |         self.blueprint_path_list = blueprint_path_list
 50 |         self.area_size = [0, 0, 0]  # length, width, height
 51 |         self.load_blueprint(blueprint_path_list[0])
 52 | 
 53 |     def load_blueprint(self, blueprint_path):
 54 |         f = open(blueprint_path, "r", encoding="utf-8")
 55 |         self.blueprint_path = blueprint_path
 56 |         template_generator = yaml.safe_load_all(f)
 57 |         self.template = {}
 58 |         for t in template_generator:
 59 |             if t is not None:
 60 |                 self.template.update(t)
 61 | 
 62 |         try:
 63 |             self.wall_num = self.template["wall_num"]
 64 |         except KeyError as e:
 65 |             self.wall_num = 0
 66 |             print("KeyError, ", e)
 67 |         self.block_num = self.template["block_num"]
 68 |         self.slope_num = self.template["slope_num"]
 69 |         self.smartcar_num = self.template["smartcar_num"]
 70 |         self.legged_robot_num = self.template["legged_robot_num"]
 71 |         self.area_size[0] = self.template["area_length"]
 72 |         self.area_size[1] = self.template["area_width"]
 73 |         self.area_size[2] = self.template["area_height"]
 74 | 
 75 |     def reset(self, blueprint_path=None):
 76 |         if blueprint_path is not None:
 77 |             self.load_blueprint(blueprint_path)
 78 | 
 79 |         self.spawn_point_set.clear()
 80 | 
 81 |         length = self.area_size[0] + 1
 82 |         width = self.area_size[1] + 1
 83 |         height = self.area_size[2] + 1
 84 | 
 85 |         self.grid = [
 86 |             [[Air() for _ in range(height)] for _ in range(width)]
 87 |             for _ in range(length)
 88 |         ]
 89 | 
 90 |         for i, j in itertools.product(range(length), range(width)):
 91 |             self.grid[i][j][0] = Ground()
 92 |         for k in range(height):
 93 |             for i in range(width):
 94 |                 self.grid[-1][i][k] = Wall()
 95 |             for j in range(length):
 96 |                 self.grid[j][-1][k] = Wall()
 97 | 
 98 |     def random_spawn_obj(self, obj_type):
 99 |         direction = np.random.randint(Direction.DIR_0, Direction.DIR_3)
100 |         random_cnt = 0
101 |         while True:
102 |             random_cnt += 1
103 |             assert random_cnt < 1000, f"please reduce obj {obj_type} num"
104 |             p = Point(
105 |                 np.random.randint(0, self.area_size[0]),
106 |                 np.random.randint(0, self.area_size[1]),
107 |             )
108 |             obj = self.grid[p.x][p.y][p.z]
109 |             blow_obj = self.grid[p.x][p.y][p.z - 1]
110 |             if (
111 |                 p not in self.spawn_point_set
112 |                 and obj.type is ObjType.Air
113 |                 and blow_obj.can_stand
114 |             ):
115 |                 break
116 |         return p, direction
117 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/flag/block.urdf:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" ?>
  2 | <!-- =================================================================================== -->
  3 | <!-- |    This document was autogenerated by xacro from block.urdf.xacro               | -->
  4 | <!-- |    EDITING THIS FILE BY HAND IS NOT RECOMMENDED                                 | -->
  5 | <!-- =================================================================================== -->
  6 | <!-- This URDF was automatically created by SolidWorks to URDF Exporter! Originally created by Stephen Brawner (brawner@gmail.com)
  7 |      Commit Version: 1.6.0-1-g15f4949  Build Version: 1.6.7594.29634
  8 |      For more information, please see http://wiki.ros.org/sw_urdf_exporter -->
  9 | <robot name="block">
 10 |   <link name="top_link">
 11 |     <inertial>
 12 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 13 |       <mass value="0.5"/>
 14 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 15 |     </inertial>
 16 |     <visual>
 17 |       <origin rpy="0 0 0" xyz="0 0 0.037000000000000005"/>
 18 |       <geometry>
 19 |         <mesh filename="package://flag/meshes/base_link.STL"/>
 20 |       </geometry>
 21 |       <material name="">
 22 |         <color rgba="${255/255} ${0/255} ${0/255} 1.0"/>
 23 |       </material>
 24 |     </visual>
 25 |     <collision>
 26 |       <origin rpy="0 0 0" xyz="0 0 0.004"/>
 27 |       <geometry>
 28 |         <box size="0.32 0.32 0.07200000000000001"/>
 29 |       </geometry>
 30 |     </collision>
 31 |   </link>
 32 |   <link name="rear_left_leg_link">
 33 |     <collision>
 34 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 35 |       <geometry>
 36 |         <box size="0.025 0.025 0.115"/>
 37 |       </geometry>
 38 |     </collision>
 39 |     <inertial>
 40 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 41 |       <mass value="0.001"/>
 42 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 43 |     </inertial>
 44 |   </link>
 45 |   <joint name="rear_left_leg" type="fixed">
 46 |     <parent link="top_link"/>
 47 |     <child link="rear_left_leg_link"/>
 48 |     <origin rpy="0 0 0" xyz="-0.1475 -0.1475 -0.0925"/>
 49 |   </joint>
 50 |   <link name="rear_right_leg_link">
 51 |     <collision>
 52 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 53 |       <geometry>
 54 |         <box size="0.025 0.025 0.115"/>
 55 |       </geometry>
 56 |     </collision>
 57 |     <inertial>
 58 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 59 |       <mass value="0.001"/>
 60 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 61 |     </inertial>
 62 |   </link>
 63 |   <joint name="rear_right_leg" type="fixed">
 64 |     <parent link="top_link"/>
 65 |     <child link="rear_right_leg_link"/>
 66 |     <origin rpy="0 0 0" xyz="-0.1475 0.1475 -0.0925"/>
 67 |   </joint>
 68 |   <link name="front_left_leg_link">
 69 |     <collision>
 70 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 71 |       <geometry>
 72 |         <box size="0.025 0.025 0.115"/>
 73 |       </geometry>
 74 |     </collision>
 75 |     <inertial>
 76 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 77 |       <mass value="0.001"/>
 78 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 79 |     </inertial>
 80 |   </link>
 81 |   <joint name="front_left_leg" type="fixed">
 82 |     <parent link="top_link"/>
 83 |     <child link="front_left_leg_link"/>
 84 |     <origin rpy="0 0 0" xyz="0.1475 -0.1475 -0.0925"/>
 85 |   </joint>
 86 |   <link name="front_right_leg_link">
 87 |     <collision>
 88 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 89 |       <geometry>
 90 |         <box size="0.025 0.025 0.115"/>
 91 |       </geometry>
 92 |     </collision>
 93 |     <inertial>
 94 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 95 |       <mass value="0.001"/>
 96 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 97 |     </inertial>
 98 |   </link>
 99 |   <joint name="front_right_leg" type="fixed">
100 |     <parent link="top_link"/>
101 |     <child link="front_right_leg_link"/>
102 |     <origin rpy="0 0 0" xyz="0.1475 0.1475 -0.0925"/>
103 |   </joint>
104 |   <link name="base_link">
105 |   </link>
106 |   <joint name="floor_joint" type="fixed">
107 |     <origin rpy="0 0 0" xyz="0 0 0.1505"/>
108 |     <parent link="base_link"/>
109 |     <child link="top_link"/>
110 |   </joint>
111 | </robot>
112 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/wall/block.urdf:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" ?>
  2 | <!-- =================================================================================== -->
  3 | <!-- |    This document was autogenerated by xacro from block.urdf.xacro               | -->
  4 | <!-- |    EDITING THIS FILE BY HAND IS NOT RECOMMENDED                                 | -->
  5 | <!-- =================================================================================== -->
  6 | <!-- This URDF was automatically created by SolidWorks to URDF Exporter! Originally created by Stephen Brawner (brawner@gmail.com)
  7 |      Commit Version: 1.6.0-1-g15f4949  Build Version: 1.6.7594.29634
  8 |      For more information, please see http://wiki.ros.org/sw_urdf_exporter -->
  9 | <robot name="block">
 10 |   <link name="top_link">
 11 |     <inertial>
 12 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 13 |       <mass value="0.5"/>
 14 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 15 |     </inertial>
 16 |     <visual>
 17 |       <origin rpy="0 0 0" xyz="0 0 0.037000000000000005"/>
 18 |       <geometry>
 19 |         <mesh filename="package://block/meshes/base_link.STL"/>
 20 |       </geometry>
 21 |       <material name="">
 22 |         <color rgba="0.752941176470588 0.752941176470588 0.752941176470588 1"/>
 23 |       </material>
 24 |     </visual>
 25 |     <collision>
 26 |       <origin rpy="0 0 0" xyz="0 0 0.004"/>
 27 |       <geometry>
 28 |         <box size="0.32 0.32 0.07200000000000001"/>
 29 |       </geometry>
 30 |     </collision>
 31 |   </link>
 32 |   <link name="rear_left_leg_link">
 33 |     <collision>
 34 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 35 |       <geometry>
 36 |         <box size="0.025 0.025 0.115"/>
 37 |       </geometry>
 38 |     </collision>
 39 |     <inertial>
 40 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 41 |       <mass value="0.001"/>
 42 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 43 |     </inertial>
 44 |   </link>
 45 |   <joint name="rear_left_leg" type="fixed">
 46 |     <parent link="top_link"/>
 47 |     <child link="rear_left_leg_link"/>
 48 |     <origin rpy="0 0 0" xyz="-0.1475 -0.1475 -0.0925"/>
 49 |   </joint>
 50 |   <link name="rear_right_leg_link">
 51 |     <collision>
 52 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 53 |       <geometry>
 54 |         <box size="0.025 0.025 0.115"/>
 55 |       </geometry>
 56 |     </collision>
 57 |     <inertial>
 58 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 59 |       <mass value="0.001"/>
 60 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 61 |     </inertial>
 62 |   </link>
 63 |   <joint name="rear_right_leg" type="fixed">
 64 |     <parent link="top_link"/>
 65 |     <child link="rear_right_leg_link"/>
 66 |     <origin rpy="0 0 0" xyz="-0.1475 0.1475 -0.0925"/>
 67 |   </joint>
 68 |   <link name="front_left_leg_link">
 69 |     <collision>
 70 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 71 |       <geometry>
 72 |         <box size="0.025 0.025 0.115"/>
 73 |       </geometry>
 74 |     </collision>
 75 |     <inertial>
 76 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 77 |       <mass value="0.001"/>
 78 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 79 |     </inertial>
 80 |   </link>
 81 |   <joint name="front_left_leg" type="fixed">
 82 |     <parent link="top_link"/>
 83 |     <child link="front_left_leg_link"/>
 84 |     <origin rpy="0 0 0" xyz="0.1475 -0.1475 -0.0925"/>
 85 |   </joint>
 86 |   <link name="front_right_leg_link">
 87 |     <collision>
 88 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 89 |       <geometry>
 90 |         <box size="0.025 0.025 0.115"/>
 91 |       </geometry>
 92 |     </collision>
 93 |     <inertial>
 94 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 95 |       <mass value="0.001"/>
 96 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 97 |     </inertial>
 98 |   </link>
 99 |   <joint name="front_right_leg" type="fixed">
100 |     <parent link="top_link"/>
101 |     <child link="front_right_leg_link"/>
102 |     <origin rpy="0 0 0" xyz="0.1475 0.1475 -0.0925"/>
103 |   </joint>
104 |   <link name="base_link">
105 |   </link>
106 |   <joint name="floor_joint" type="fixed">
107 |     <origin rpy="0 0 0" xyz="0 0 0.1505"/>
108 |     <parent link="base_link"/>
109 |     <child link="top_link"/>
110 |   </joint>
111 | </robot>
112 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/block/block.urdf:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" ?>
  2 | <!-- =================================================================================== -->
  3 | <!-- |    This document was autogenerated by xacro from block.urdf.xacro               | -->
  4 | <!-- |    EDITING THIS FILE BY HAND IS NOT RECOMMENDED                                 | -->
  5 | <!-- =================================================================================== -->
  6 | <!-- This URDF was automatically created by SolidWorks to URDF Exporter! Originally created by Stephen Brawner (brawner@gmail.com)
  7 |      Commit Version: 1.6.0-1-g15f4949  Build Version: 1.6.7594.29634
  8 |      For more information, please see http://wiki.ros.org/sw_urdf_exporter -->
  9 | <robot name="block">
 10 |   <link name="top_link">
 11 |     <inertial>
 12 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 13 |       <mass value="0.5"/>
 14 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 15 |     </inertial>
 16 |     <visual>
 17 |       <origin rpy="0 0 0" xyz="0 0 0.037000000000000005"/>
 18 |       <geometry>
 19 |         <mesh filename="package://block/meshes/base_link.STL"/>
 20 |       </geometry>
 21 |       <material name="">
 22 |         <color rgba="0.752941176470588 0.752941176470588 0.752941176470588 1"/>
 23 |       </material>
 24 |     </visual>
 25 |     <collision>
 26 |       <origin rpy="0 0 0" xyz="0 0 0.004"/>
 27 |       <geometry>
 28 |         <box size="0.32 0.32 0.07200000000000001"/>
 29 |       </geometry>
 30 |     </collision>
 31 |   </link>
 32 |   <link name="rear_left_leg_link">
 33 |     <collision>
 34 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 35 |       <geometry>
 36 |         <box size="0.025 0.025 0.115"/>
 37 |       </geometry>
 38 |     </collision>
 39 |     <inertial>
 40 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 41 |       <mass value="0.001"/>
 42 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 43 |     </inertial>
 44 |   </link>
 45 |   <joint name="rear_left_leg" type="fixed">
 46 |     <parent link="top_link"/>
 47 |     <child link="rear_left_leg_link"/>
 48 |     <origin rpy="0 0 0" xyz="-0.1475 -0.1475 -0.0925"/>
 49 |   </joint>
 50 |   <link name="rear_right_leg_link">
 51 |     <collision>
 52 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 53 |       <geometry>
 54 |         <box size="0.025 0.025 0.115"/>
 55 |       </geometry>
 56 |     </collision>
 57 |     <inertial>
 58 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 59 |       <mass value="0.001"/>
 60 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 61 |     </inertial>
 62 |   </link>
 63 |   <joint name="rear_right_leg" type="fixed">
 64 |     <parent link="top_link"/>
 65 |     <child link="rear_right_leg_link"/>
 66 |     <origin rpy="0 0 0" xyz="-0.1475 0.1475 -0.0925"/>
 67 |   </joint>
 68 |   <link name="front_left_leg_link">
 69 |     <collision>
 70 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 71 |       <geometry>
 72 |         <box size="0.025 0.025 0.115"/>
 73 |       </geometry>
 74 |     </collision>
 75 |     <inertial>
 76 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 77 |       <mass value="0.001"/>
 78 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 79 |     </inertial>
 80 |   </link>
 81 |   <joint name="front_left_leg" type="fixed">
 82 |     <parent link="top_link"/>
 83 |     <child link="front_left_leg_link"/>
 84 |     <origin rpy="0 0 0" xyz="0.1475 -0.1475 -0.0925"/>
 85 |   </joint>
 86 |   <link name="front_right_leg_link">
 87 |     <collision>
 88 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 89 |       <geometry>
 90 |         <box size="0.025 0.025 0.115"/>
 91 |       </geometry>
 92 |     </collision>
 93 |     <inertial>
 94 |       <origin rpy="0 0 0" xyz="0 0 0"/>
 95 |       <mass value="0.001"/>
 96 |       <inertia ixx="1e-3" ixy="0" ixz="0" iyy="1e-3" iyz="0" izz="1e-3"/>
 97 |     </inertial>
 98 |   </link>
 99 |   <joint name="front_right_leg" type="fixed">
100 |     <parent link="top_link"/>
101 |     <child link="front_right_leg_link"/>
102 |     <origin rpy="0 0 0" xyz="0.1475 0.1475 -0.0925"/>
103 |   </joint>
104 |   <link name="base_link">
105 |   </link>
106 |   <joint name="floor_joint" type="fixed">
107 |     <origin rpy="0 0 0" xyz="0 0 0.1505"/>
108 |     <parent link="base_link"/>
109 |     <child link="top_link"/>
110 |   </joint>
111 | </robot>
112 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/flag_env.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | import numpy as np
  4 | import yaml
  5 | 
  6 | from .flag_craft_env import CraftEnv
  7 | from .grid_objs import ObjType
  8 | 
  9 | 
 10 | class FlagEnv(CraftEnv):
 11 | 
 12 |     def __init__(self, enable_render, init_blueprint_path, env_config):
 13 |         super().__init__(enable_render, init_blueprint_path, env_config)
 14 |         self.key_mapping = {
 15 |             "block": 1,
 16 |             "folded_slope": 2,
 17 |             "unfolded_body": 3,
 18 |             "unfolded_foot": 4
 19 |         }
 20 |         ##########
 21 |         self.hit = False
 22 |         self.current_step = -1
 23 |         self.last_distance = None
 24 |         ##########
 25 |         self.last_pos_dict = None
 26 | 
 27 |     def read_design(self, design_path):
 28 |         with open(design_path) as f:
 29 |             source = yaml.load(f, Loader=yaml.loader.SafeLoader)
 30 |             design_list = np.zeros(self.area_size)
 31 |             design_dict = {
 32 |                 "block": [],
 33 |                 "folded_slope": [],
 34 |                 "unfolded_body": [],
 35 |                 "unfolded_foot": []
 36 |             }
 37 |             for key in self.key_mapping.keys():
 38 |                 if key not in source.keys():
 39 |                     continue
 40 |                 for obj in source[key]:
 41 |                     x, y, z = int(obj['x']), int(obj['y']), int(obj['z'])
 42 |                     design_list[x][y][z] = self.key_mapping[key]
 43 |                     design_dict[key].append((x, y, z))
 44 |         return design_list, design_dict
 45 | 
 46 |     def get_pos_list(self):
 47 |         raise NotImplementedError
 48 | 
 49 |     def get_pos_dict(self):
 50 |         result = {
 51 |             "block": [],
 52 |             "folded_slope": [],
 53 |             "unfolded_body": [],
 54 |             "unfolded_foot": []
 55 |         }
 56 |         grid = self._blackboard.grid
 57 |         for i, j, k in itertools.product(range(self.area_size[0]),
 58 |                                          range(self.area_size[1]),
 59 |                                          range(self.area_size[2])):
 60 |             obj = grid[i][j][k]
 61 |             if obj.type is ObjType.Block or obj.type is \
 62 |                     ObjType.FoldedSlopeGear:
 63 |                 result["block"].append((i, j, k))
 64 |             elif obj.type is ObjType.FoldedSlope:
 65 |                 result["folded_slope"].append((i, j, k))
 66 |             elif obj.type is ObjType.UnfoldedSlopeBody:
 67 |                 result["unfolded_body"].append((i, j, k))
 68 |             elif obj.type is ObjType.UnfoldedSlopeFoot:
 69 |                 result["unfolded_foot"].append((i, j, k))
 70 |             else:
 71 |                 pass
 72 |         return result
 73 | 
 74 |     def reset(self):
 75 |         self.hit = False
 76 |         self.current_step = -1
 77 |         self.last_distance = None
 78 |         obs = super().reset()
 79 |         return obs
 80 | 
 81 |     def _calculate_dist(self):
 82 |         flag = (self.flag_pos_x, self.flag_pos_y, self.flag_pos_z)
 83 |         goal = (self._blackboard.goal.x, self._blackboard.goal.y,
 84 |                 self._blackboard.goal.z)
 85 |         dist = abs(flag[0] - goal[0]) + abs(flag[1] - goal[1]) + abs(flag[2] -
 86 |                                                                      goal[2])
 87 |         return dist
 88 | 
 89 |     def _compute_reward(self, blackboard=None):
 90 |         reward = None
 91 |         self.current_step += 1
 92 |         if self.last_distance is None:
 93 |             dist = self._calculate_dist()
 94 |             reward = 0
 95 |             self.last_distance = dist
 96 |         else:
 97 |             dist = self._calculate_dist()
 98 |             reward = self.last_distance - dist
 99 |             self.last_distance = dist
100 |             if dist == 0 and (self.hit is False):
101 |                 self.hit = True
102 |                 reward += (20 - self.current_step)
103 |         return reward
104 | 
105 |     def step(self, action):
106 |         enable_local_obs = self.env_config.get('enable_local_obs', False)
107 |         if not enable_local_obs:
108 |             obs, reward, done, info = super().step(action)
109 |             return obs, reward, done, info
110 |         else:
111 |             return NotImplementedError
112 | 


--------------------------------------------------------------------------------
/PyMARL/src/runners/episode_runner.py:
--------------------------------------------------------------------------------
  1 | from envs import REGISTRY as env_REGISTRY
  2 | from functools import partial
  3 | from components.episode_buffer import EpisodeBatch
  4 | import numpy as np
  5 | 
  6 | 
  7 | class EpisodeRunner:
  8 | 
  9 |     def __init__(self, args, logger):
 10 |         self.args = args
 11 |         self.logger = logger
 12 |         self.batch_size = self.args.batch_size_run
 13 |         assert self.batch_size == 1
 14 | 
 15 |         self.env = env_REGISTRY[self.args.env](**self.args.env_args)
 16 |         self.episode_limit = self.env.episode_limit
 17 |         self.t = 0
 18 | 
 19 |         self.t_env = 0
 20 | 
 21 |         self.train_returns = []
 22 |         self.test_returns = []
 23 |         self.train_stats = {}
 24 |         self.test_stats = {}
 25 | 
 26 |         # Log the first run
 27 |         self.log_train_stats_t = -1000000
 28 | 
 29 |     def setup(self, scheme, groups, preprocess, mac):
 30 |         self.new_batch = partial(EpisodeBatch, scheme, groups, self.batch_size, self.episode_limit + 1,
 31 |                                  preprocess=preprocess, device=self.args.device)
 32 |         self.mac = mac
 33 | 
 34 |     def get_env_info(self):
 35 |         return self.env.get_env_info()
 36 | 
 37 |     def save_replay(self):
 38 |         self.env.save_replay()
 39 | 
 40 |     def close_env(self):
 41 |         self.env.close()
 42 | 
 43 |     def reset(self):
 44 |         self.batch = self.new_batch()
 45 |         self.env.reset()
 46 |         self.t = 0
 47 | 
 48 |     def run(self, test_mode=False):
 49 |         self.reset()
 50 | 
 51 |         terminated = False
 52 |         episode_return = 0
 53 |         self.mac.init_hidden(batch_size=self.batch_size)
 54 | 
 55 |         while not terminated:
 56 | 
 57 |             pre_transition_data = {
 58 |                 "state": [self.env.get_state()],
 59 |                 "avail_actions": [self.env.get_avail_actions()],
 60 |                 "obs": [self.env.get_obs()]
 61 |             }
 62 | 
 63 |             self.batch.update(pre_transition_data, ts=self.t)
 64 | 
 65 |             # Pass the entire batch of experiences up till now to the agents
 66 |             # Receive the actions for each agent at this timestep in a batch of size 1
 67 |             actions = self.mac.select_actions(self.batch, t_ep=self.t, t_env=self.t_env, test_mode=test_mode)
 68 | 
 69 |             reward, terminated, env_info = self.env.step(actions[0])
 70 |             episode_return += reward
 71 | 
 72 |             post_transition_data = {
 73 |                 "actions": actions,
 74 |                 "reward": [(reward,)],
 75 |                 "terminated": [(terminated != env_info.get("episode_limit", False),)],
 76 |             }
 77 | 
 78 |             self.batch.update(post_transition_data, ts=self.t)
 79 | 
 80 |             self.t += 1
 81 | 
 82 |         last_data = {
 83 |             "state": [self.env.get_state()],
 84 |             "avail_actions": [self.env.get_avail_actions()],
 85 |             "obs": [self.env.get_obs()]
 86 |         }
 87 |         self.batch.update(last_data, ts=self.t)
 88 | 
 89 |         # Select actions in the last stored state
 90 |         actions = self.mac.select_actions(self.batch, t_ep=self.t, t_env=self.t_env, test_mode=test_mode)
 91 |         self.batch.update({"actions": actions}, ts=self.t)
 92 | 
 93 |         cur_stats = self.test_stats if test_mode else self.train_stats
 94 |         cur_returns = self.test_returns if test_mode else self.train_returns
 95 |         log_prefix = "test_" if test_mode else ""
 96 |         cur_stats.update({k: cur_stats.get(k, 0) + env_info.get(k, 0) for k in set(cur_stats) | set(env_info)})
 97 |         cur_stats["n_episodes"] = 1 + cur_stats.get("n_episodes", 0)
 98 |         cur_stats["ep_length"] = self.t + cur_stats.get("ep_length", 0)
 99 | 
100 |         if not test_mode:
101 |             self.t_env += self.t
102 | 
103 |         cur_returns.append(episode_return)
104 | 
105 |         if test_mode and (len(self.test_returns) == self.args.test_nepisode):
106 |             self._log(cur_returns, cur_stats, log_prefix)
107 |         elif self.t_env - self.log_train_stats_t >= self.args.runner_log_interval:
108 |             self._log(cur_returns, cur_stats, log_prefix)
109 |             if hasattr(self.mac.action_selector, "epsilon"):
110 |                 self.logger.log_stat("epsilon", self.mac.action_selector.epsilon, self.t_env)
111 |             self.log_train_stats_t = self.t_env
112 | 
113 |         return self.batch
114 | 
115 |     def _log(self, returns, stats, prefix):
116 |         self.logger.log_stat(prefix + "return_mean", np.mean(returns), self.t_env)
117 |         self.logger.log_stat(prefix + "return_std", np.std(returns), self.t_env)
118 |         returns.clear()
119 | 
120 |         for k, v in stats.items():
121 |             if k != "n_episodes":
122 |                 self.logger.log_stat(prefix + k + "_mean" , v/stats["n_episodes"], self.t_env)
123 |         stats.clear()
124 | 


--------------------------------------------------------------------------------
/PyMARL/README.md.pymarl:
--------------------------------------------------------------------------------
  1 | ```diff
  2 | - Please pay attention to the version of SC2 you are using for your experiments. 
  3 | - Performance is *not* always comparable between versions. 
  4 | - The results in SMAC (https://arxiv.org/abs/1902.04043) use SC2.4.6.2.69232 not SC2.4.10.
  5 | ```
  6 | 
  7 | # Python MARL framework
  8 | 
  9 | PyMARL is [WhiRL](http://whirl.cs.ox.ac.uk)'s framework for deep multi-agent reinforcement learning and includes implementations of the following algorithms:
 10 | - [**QMIX**: QMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1803.11485)
 11 | - [**COMA**: Counterfactual Multi-Agent Policy Gradients](https://arxiv.org/abs/1705.08926)
 12 | - [**VDN**: Value-Decomposition Networks For Cooperative Multi-Agent Learning](https://arxiv.org/abs/1706.05296) 
 13 | - [**IQL**: Independent Q-Learning](https://arxiv.org/abs/1511.08779)
 14 | - [**QTRAN**: QTRAN: Learning to Factorize with Transformation for Cooperative Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1905.05408)
 15 | 
 16 | PyMARL is written in PyTorch and uses [SMAC](https://github.com/oxwhirl/smac) as its environment.
 17 | 
 18 | ## Installation instructions
 19 | 
 20 | Build the Dockerfile using 
 21 | ```shell
 22 | cd docker
 23 | bash build.sh
 24 | ```
 25 | 
 26 | Set up StarCraft II and SMAC:
 27 | ```shell
 28 | bash install_sc2.sh
 29 | ```
 30 | 
 31 | This will download SC2 into the 3rdparty folder and copy the maps necessary to run over.
 32 | 
 33 | The requirements.txt file can be used to install the necessary packages into a virtual environment (not recomended).
 34 | 
 35 | ## Run an experiment 
 36 | 
 37 | ```shell
 38 | python3 src/main.py --config=qmix --env-config=sc2 with env_args.map_name=2s3z
 39 | ```
 40 | 
 41 | The config files act as defaults for an algorithm or environment. 
 42 | 
 43 | They are all located in `src/config`.
 44 | `--config` refers to the config files in `src/config/algs`
 45 | `--env-config` refers to the config files in `src/config/envs`
 46 | 
 47 | To run experiments using the Docker container:
 48 | ```shell
 49 | bash run.sh $GPU python3 src/main.py --config=qmix --env-config=sc2 with env_args.map_name=2s3z
 50 | ```
 51 | 
 52 | All results will be stored in the `Results` folder.
 53 | 
 54 | The previous config files used for the SMAC Beta have the suffix `_beta`.
 55 | 
 56 | ## Saving and loading learnt models
 57 | 
 58 | ### Saving models
 59 | 
 60 | You can save the learnt models to disk by setting `save_model = True`, which is set to `False` by default. The frequency of saving models can be adjusted using `save_model_interval` configuration. Models will be saved in the result directory, under the folder called *models*. The directory corresponding each run will contain models saved throughout the experiment, each within a folder corresponding to the number of timesteps passed since starting the learning process.
 61 | 
 62 | ### Loading models
 63 | 
 64 | Learnt models can be loaded using the `checkpoint_path` parameter, after which the learning will proceed from the corresponding timestep. 
 65 | 
 66 | ## Watching StarCraft II replays
 67 | 
 68 | `save_replay` option allows saving replays of models which are loaded using `checkpoint_path`. Once the model is successfully loaded, `test_nepisode` number of episodes are run on the test mode and a .SC2Replay file is saved in the Replay directory of StarCraft II. Please make sure to use the episode runner if you wish to save a replay, i.e., `runner=episode`. The name of the saved replay file starts with the given `env_args.save_replay_prefix` (map_name if empty), followed by the current timestamp. 
 69 | 
 70 | The saved replays can be watched by double-clicking on them or using the following command:
 71 | 
 72 | ```shell
 73 | python -m pysc2.bin.play --norender --rgb_minimap_size 0 --replay NAME.SC2Replay
 74 | ```
 75 | 
 76 | **Note:** Replays cannot be watched using the Linux version of StarCraft II. Please use either the Mac or Windows version of the StarCraft II client.
 77 | 
 78 | ## Documentation/Support
 79 | 
 80 | Documentation is a little sparse at the moment (but will improve!). Please raise an issue in this repo, or email [Tabish](mailto:tabish.rashid@cs.ox.ac.uk)
 81 | 
 82 | ## Citing PyMARL 
 83 | 
 84 | If you use PyMARL in your research, please cite the [SMAC paper](https://arxiv.org/abs/1902.04043).
 85 | 
 86 | *M. Samvelyan, T. Rashid, C. Schroeder de Witt, G. Farquhar, N. Nardelli, T.G.J. Rudner, C.-M. Hung, P.H.S. Torr, J. Foerster, S. Whiteson. The StarCraft Multi-Agent Challenge, CoRR abs/1902.04043, 2019.*
 87 | 
 88 | In BibTeX format:
 89 | 
 90 | ```tex
 91 | @article{samvelyan19smac,
 92 |   title = {{The} {StarCraft} {Multi}-{Agent} {Challenge}},
 93 |   author = {Mikayel Samvelyan and Tabish Rashid and Christian Schroeder de Witt and Gregory Farquhar and Nantas Nardelli and Tim G. J. Rudner and Chia-Man Hung and Philiph H. S. Torr and Jakob Foerster and Shimon Whiteson},
 94 |   journal = {CoRR},
 95 |   volume = {abs/1902.04043},
 96 |   year = {2019},
 97 | }
 98 | ```
 99 | 
100 | ## License
101 | 
102 | Code licensed under the Apache License v2.0
103 | 


--------------------------------------------------------------------------------
/PyMARL/src/controllers/basic_controller.py:
--------------------------------------------------------------------------------
  1 | from modules.agents import REGISTRY as agent_REGISTRY
  2 | from components.action_selectors import REGISTRY as action_REGISTRY
  3 | import torch as th
  4 | 
  5 | 
  6 | # This multi-agent controller shares parameters between agents
  7 | class BasicMAC:
  8 |     def __init__(self, scheme, groups, args):
  9 |         self.n_agents = args.n_agents
 10 |         self.args = args
 11 |         input_shape = self._get_input_shape(scheme)
 12 |         self._build_agents(input_shape)
 13 |         self.agent_output_type = args.agent_output_type
 14 | 
 15 |         self.action_selector = action_REGISTRY[args.action_selector](args)
 16 | 
 17 |         self.hidden_states = None
 18 | 
 19 |     def select_actions(self, ep_batch, t_ep, t_env, bs=slice(None), test_mode=False):
 20 |         # print("State", ep_batch["state"][:, t_ep])
 21 |         # print("State")
 22 |         # for i in range(0, 15):
 23 |         #     print(ep_batch["state"][:, t_ep][0][13* i: 13*(i+1)])
 24 |         # print()
 25 |         # Only select actions for the selected batch elements in bs
 26 |         avail_actions = ep_batch["avail_actions"][:, t_ep]
 27 |         agent_outputs = self.forward(ep_batch, t_ep, test_mode=test_mode)
 28 |         chosen_actions = self.action_selector.select_action(agent_outputs[bs], avail_actions[bs], t_env, test_mode=test_mode)
 29 |         return chosen_actions
 30 | 
 31 |     def forward(self, ep_batch, t, test_mode=False):
 32 |         agent_inputs = self._build_inputs(ep_batch, t)
 33 |         avail_actions = ep_batch["avail_actions"][:, t]
 34 |         agent_outs, self.hidden_states = self.agent(agent_inputs, self.hidden_states)
 35 | 
 36 |         # Softmax the agent outputs if they're policy logits
 37 |         if self.agent_output_type == "pi_logits":
 38 | 
 39 |             if getattr(self.args, "mask_before_softmax", True):
 40 |                 # Make the logits for unavailable actions very negative to minimise their affect on the softmax
 41 |                 reshaped_avail_actions = avail_actions.reshape(ep_batch.batch_size * self.n_agents, -1)
 42 |                 agent_outs[reshaped_avail_actions == 0] = -1e10
 43 | 
 44 |             agent_outs = th.nn.functional.softmax(agent_outs, dim=-1)
 45 |             if not test_mode:
 46 |                 # Epsilon floor
 47 |                 epsilon_action_num = agent_outs.size(-1)
 48 |                 if getattr(self.args, "mask_before_softmax", True):
 49 |                     # With probability epsilon, we will pick an available action uniformly
 50 |                     epsilon_action_num = reshaped_avail_actions.sum(dim=1, keepdim=True).float()
 51 | 
 52 |                 agent_outs = ((1 - self.action_selector.epsilon) * agent_outs
 53 |                                + th.ones_like(agent_outs) * self.action_selector.epsilon/epsilon_action_num)
 54 | 
 55 |                 if getattr(self.args, "mask_before_softmax", True):
 56 |                     # Zero out the unavailable actions
 57 |                     agent_outs[reshaped_avail_actions == 0] = 0.0
 58 | 
 59 |         return agent_outs.view(ep_batch.batch_size, self.n_agents, -1)
 60 | 
 61 |     def init_hidden(self, batch_size):
 62 |         self.hidden_states = self.agent.init_hidden().unsqueeze(0).expand(batch_size, self.n_agents, -1)  # bav
 63 | 
 64 |     def parameters(self):
 65 |         return self.agent.parameters()
 66 | 
 67 |     def load_state(self, other_mac):
 68 |         self.agent.load_state_dict(other_mac.agent.state_dict())
 69 | 
 70 |     def cuda(self):
 71 |         self.agent.cuda()
 72 | 
 73 |     def save_models(self, path):
 74 |         th.save(self.agent.state_dict(), "{}/agent.th".format(path))
 75 | 
 76 |     def load_models(self, path):
 77 |         self.agent.load_state_dict(th.load("{}/agent.th".format(path), map_location=lambda storage, loc: storage))
 78 | 
 79 |     def _build_agents(self, input_shape):
 80 |         self.agent = agent_REGISTRY[self.args.agent](input_shape, self.args)
 81 | 
 82 |     def _build_inputs(self, batch, t):
 83 |         # Assumes homogenous agents with flat observations.
 84 |         # Other MACs might want to e.g. delegate building inputs to each agent
 85 |         bs = batch.batch_size
 86 |         inputs = []
 87 |         inputs.append(batch["obs"][:, t])  # b1av
 88 |         if self.args.obs_last_action:
 89 |             if t == 0:
 90 |                 inputs.append(th.zeros_like(batch["actions_onehot"][:, t]))
 91 |             else:
 92 |                 inputs.append(batch["actions_onehot"][:, t-1])
 93 |         if self.args.obs_agent_id:
 94 |             inputs.append(th.eye(self.n_agents, device=batch.device).unsqueeze(0).expand(bs, -1, -1))
 95 | 
 96 |         inputs = th.cat([x.reshape(bs*self.n_agents, -1) for x in inputs], dim=1)
 97 |         return inputs
 98 | 
 99 |     def _get_input_shape(self, scheme):
100 |         input_shape = scheme["obs"]["vshape"]
101 |         if self.args.obs_last_action:
102 |             input_shape += scheme["actions_onehot"]["vshape"][0]
103 |         if self.args.obs_agent_id:
104 |             input_shape += self.n_agents
105 | 
106 |         return input_shape
107 | 


--------------------------------------------------------------------------------
/PyMARL/src/modules/mixers/qtran.py:
--------------------------------------------------------------------------------
  1 | import torch as th
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | 
  6 | 
  7 | class QTranBase(nn.Module):
  8 |     def __init__(self, args):
  9 |         super(QTranBase, self).__init__()
 10 | 
 11 |         self.args = args
 12 | 
 13 |         self.n_agents = args.n_agents
 14 |         self.n_actions = args.n_actions
 15 |         self.state_dim = int(np.prod(args.state_shape))
 16 |         self.arch = self.args.qtran_arch # QTran architecture
 17 | 
 18 |         self.embed_dim = args.mixing_embed_dim
 19 | 
 20 |         # Q(s,u)
 21 |         if self.arch == "coma_critic":
 22 |             # Q takes [state, u] as input
 23 |             q_input_size = self.state_dim + (self.n_agents * self.n_actions)
 24 |         elif self.arch == "qtran_paper":
 25 |             # Q takes [state, agent_action_observation_encodings]
 26 |             q_input_size = self.state_dim + self.args.rnn_hidden_dim + self.n_actions
 27 |         else:
 28 |             raise Exception("{} is not a valid QTran architecture".format(self.arch))
 29 | 
 30 |         if self.args.network_size == "small":
 31 |             self.Q = nn.Sequential(nn.Linear(q_input_size, self.embed_dim),
 32 |                                    nn.ReLU(),
 33 |                                    nn.Linear(self.embed_dim, self.embed_dim),
 34 |                                    nn.ReLU(),
 35 |                                    nn.Linear(self.embed_dim, 1))
 36 | 
 37 |             # V(s)
 38 |             self.V = nn.Sequential(nn.Linear(self.state_dim, self.embed_dim),
 39 |                                    nn.ReLU(),
 40 |                                    nn.Linear(self.embed_dim, self.embed_dim),
 41 |                                    nn.ReLU(),
 42 |                                    nn.Linear(self.embed_dim, 1))
 43 |             ae_input = self.args.rnn_hidden_dim + self.n_actions
 44 |             self.action_encoding = nn.Sequential(nn.Linear(ae_input, ae_input),
 45 |                                                  nn.ReLU(),
 46 |                                                  nn.Linear(ae_input, ae_input))
 47 |         elif self.args.network_size == "big":
 48 |             self.Q = nn.Sequential(nn.Linear(q_input_size, self.embed_dim),
 49 |                                    nn.ReLU(),
 50 |                                    nn.Linear(self.embed_dim, self.embed_dim),
 51 |                                    nn.ReLU(),
 52 |                                    nn.Linear(self.embed_dim, self.embed_dim),
 53 |                                    nn.ReLU(),
 54 |                                    nn.Linear(self.embed_dim, 1))
 55 |             # V(s)
 56 |             self.V = nn.Sequential(nn.Linear(self.state_dim, self.embed_dim),
 57 |                                    nn.ReLU(),
 58 |                                    nn.Linear(self.embed_dim, self.embed_dim),
 59 |                                    nn.ReLU(),
 60 |                                    nn.Linear(self.embed_dim, self.embed_dim),
 61 |                                    nn.ReLU(),
 62 |                                    nn.Linear(self.embed_dim, 1))
 63 |             ae_input = self.args.rnn_hidden_dim + self.n_actions
 64 |             self.action_encoding = nn.Sequential(nn.Linear(ae_input, ae_input),
 65 |                                                  nn.ReLU(),
 66 |                                                  nn.Linear(ae_input, ae_input))
 67 |         else:
 68 |             assert False
 69 | 
 70 |     def forward(self, batch, hidden_states, actions=None):
 71 |         bs = batch.batch_size
 72 |         ts = batch.max_seq_length
 73 | 
 74 |         states = batch["state"].reshape(bs * ts, self.state_dim)
 75 | 
 76 |         if self.arch == "coma_critic":
 77 |             if actions is None:
 78 |                 # Use the actions taken by the agents
 79 |                 actions = batch["actions_onehot"].reshape(bs * ts, self.n_agents * self.n_actions)
 80 |             else:
 81 |                 # It will arrive as (bs, ts, agents, actions), we need to reshape it
 82 |                 actions = actions.reshape(bs * ts, self.n_agents * self.n_actions)
 83 |             inputs = th.cat([states, actions], dim=1)
 84 |         elif self.arch == "qtran_paper":
 85 |             if actions is None:
 86 |                 # Use the actions taken by the agents
 87 |                 actions = batch["actions_onehot"].reshape(bs * ts, self.n_agents, self.n_actions)
 88 |             else:
 89 |                 # It will arrive as (bs, ts, agents, actions), we need to reshape it
 90 |                 actions = actions.reshape(bs * ts, self.n_agents, self.n_actions)
 91 | 
 92 |             hidden_states = hidden_states.reshape(bs * ts, self.n_agents, -1)
 93 |             agent_state_action_input = th.cat([hidden_states, actions], dim=2)
 94 |             agent_state_action_encoding = self.action_encoding(agent_state_action_input.reshape(bs * ts * self.n_agents, -1)).reshape(bs * ts, self.n_agents, -1)
 95 |             agent_state_action_encoding = agent_state_action_encoding.sum(dim=1) # Sum across agents
 96 | 
 97 |             inputs = th.cat([states, agent_state_action_encoding], dim=1)
 98 | 
 99 |         q_outputs = self.Q(inputs)
100 | 
101 |         states = batch["state"].reshape(bs * ts, self.state_dim)
102 |         v_outputs = self.V(states)
103 | 
104 |         return q_outputs, v_outputs
105 | 
106 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/free_env.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | import numpy as np
  4 | import yaml
  5 | 
  6 | from .craft_env import CraftEnv
  7 | from .grid_objs import ObjType
  8 | 
  9 | 
 10 | class FreeEnv(CraftEnv):
 11 | 
 12 |     def __init__(self, enable_render, init_blueprint_path, env_config):
 13 |         super().__init__(enable_render, init_blueprint_path, env_config)
 14 |         self.key_mapping = {
 15 |             "block": 1,
 16 |             "folded_slope": 2,
 17 |             "unfolded_body": 3,
 18 |             "unfolded_foot": 4
 19 |         }
 20 |         self.design_list, self.design_dict = self.read_design(
 21 |             env_config["design_path"])
 22 |         ##########
 23 |         temp = 0
 24 |         for k in self.design_dict.keys():
 25 |             temp += len(self.design_dict[k])
 26 |         self.design_length = temp
 27 |         print("Direct load from pymarl succeed")
 28 |         print("design_length", self.design_length)
 29 |         ##########
 30 |         self.last_pos_dict = None
 31 | 
 32 |     def read_design(self, design_path):
 33 |         with open(design_path) as f:
 34 |             source = yaml.load(f, Loader=yaml.loader.SafeLoader)
 35 |             design_list = np.zeros(self.area_size)
 36 |             design_dict = {
 37 |                 "block": [],
 38 |                 "folded_slope": [],
 39 |                 "unfolded_body": [],
 40 |                 "unfolded_foot": []
 41 |             }
 42 |             for key in self.key_mapping.keys():
 43 |                 if key not in source.keys():
 44 |                     continue
 45 |                 for obj in source[key]:
 46 |                     x, y, z = int(obj['x']), int(obj['y']), int(obj['z'])
 47 |                     design_list[x][y][z] = self.key_mapping[key]
 48 |                     design_dict[key].append((x, y, z))
 49 |         return design_list, design_dict
 50 | 
 51 |     def get_pos_list(self):
 52 |         raise NotImplementedError
 53 | 
 54 |     def get_pos_dict(self):
 55 |         result = {
 56 |             "block": [],
 57 |             "folded_slope": [],
 58 |             "unfolded_body": [],
 59 |             "unfolded_foot": []
 60 |         }
 61 |         grid = self._blackboard.grid
 62 |         for i, j, k in itertools.product(range(self.area_size[0]),
 63 |                                          range(self.area_size[1]),
 64 |                                          range(self.area_size[2])):
 65 |             obj = grid[i][j][k]
 66 |             if obj.type is ObjType.Block or obj.type is \
 67 |                     ObjType.FoldedSlopeGear:
 68 |                 result["block"].append((i, j, k))
 69 |             elif obj.type is ObjType.FoldedSlope:
 70 |                 result["folded_slope"].append((i, j, k))
 71 |             elif obj.type is ObjType.UnfoldedSlopeBody:
 72 |                 result["unfolded_body"].append((i, j, k))
 73 |             elif obj.type is ObjType.UnfoldedSlopeFoot:
 74 |                 result["unfolded_foot"].append((i, j, k))
 75 |             else:
 76 |                 pass
 77 |         return result
 78 | 
 79 |     def reset(self):
 80 |         self.design_list, self.design_dict = self.read_design(
 81 |             self.env_config["design_path"])
 82 |         self.last_pos_dict = None
 83 |         obs = super().reset()
 84 |         return obs
 85 | 
 86 |     def compute_score(self, pos_dict):
 87 |         blocks = pos_dict["block"]
 88 |         visited = {block: False for block in blocks}
 89 |         counter = [0 for _ in range(0, len(blocks))]
 90 |         index = 0
 91 |         for block in blocks:
 92 |             if visited[block] is True:
 93 |                 continue
 94 |             else:
 95 |                 visited[block] = True
 96 |                 counter[index] = 1
 97 |                 stack = [block]
 98 |                 while len(stack) != 0:
 99 |                     top = stack.pop()
100 |                     neighbors = [(top[0] - 1, top[1], top[2]),
101 |                                  (top[0] + 1, top[1], top[2]),
102 |                                  (top[0], top[1] - 1, top[2]),
103 |                                  (top[0], top[1] + 1, top[2])]
104 |                     for neighbor in neighbors:
105 |                         if neighbor in blocks and visited[neighbor] is False:
106 |                             visited[neighbor] = True
107 |                             counter[index] += 1
108 |                             stack.append(neighbor)
109 |                 index += 1
110 |         score = 0
111 |         for c in counter:
112 |             if c == 0:
113 |                 break
114 |             if c == 1:
115 |                 continue
116 |             elif c > score:
117 |                 score = c
118 |         return score
119 | 
120 |     def _compute_reward(self, blackboard=None):
121 |         reward = 0
122 |         before = 0
123 |         after = 0
124 |         if self.last_pos_dict is not None:
125 |             before = self.compute_score(self.last_pos_dict)
126 |             reward -= before
127 |         current_pos_dict = self.get_pos_dict()
128 |         after = self.compute_score(current_pos_dict)
129 |         reward += after
130 |         self.last_pos_dict = current_pos_dict
131 |         return reward
132 | 
133 |     def step(self, action):
134 |         enable_local_obs = self.env_config.get('enable_local_obs', False)
135 |         if not enable_local_obs:
136 |             obs, reward, done, info = super().step(action)
137 |             return obs, reward, done, info
138 |         else:
139 |             return NotImplementedError
140 | 


--------------------------------------------------------------------------------
/PyMARL/src/painter.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | import seaborn as sns; sns.set()
  5 | 
  6 | plt.switch_backend('agg')
  7 | 
  8 | info = {
  9 |     "QMIX_TEST":{
 10 |         "lr=0.005, epsilon=0.2": [15, 17, 16],
 11 |         "lr=0.005, epsilon=0.3": [18, 20, 19],
 12 |         "lr=0.0005, epsilon=0.2": [23, 21, 22],
 13 |         "lr=0.0005, epsilon=0.3": [25, 26, 24],
 14 |     }
 15 | }
 16 | 
 17 | info = {
 18 |     "The complex two-story building task":{ 
 19 |         "QMIX": [40, 28, 27], # 40 43 28 27
 20 |         "QTRAN": [48, 49, 36, 35, ],
 21 |         "VDN": [47, 41, 38, 37, ],
 22 |         "IQL": [42, 44, 32, 31],
 23 |         "MAPPO": [45, 51, 34, 33, ],
 24 |         "COMA": [46, 30]
 25 |     },
 26 |     "The simple two-story building task":{
 27 |         "QMIX": [90, 71, 82, 68, ],
 28 |         "QTRAN": [84, 77, 81, 67, ],
 29 |         "VDN": [80, 75, 87, 76],
 30 |         "IQL": [79, 72, 85, 74],
 31 |         "MAPPO": [83, 69, 89, 73, ],
 32 |         "COMA": [88, 70, 86, 66]
 33 |     },
 34 |     "The block-shaped building task":{
 35 |         "QMIX": [104, 105, 106, 107, 108],
 36 |         "QTRAN": [109, 110, 111, 112, 113],
 37 |         "VDN": [114, 115, 116, 117, 118],
 38 |         "IQL": [119, 120, 121, 122, 123],
 39 |         "MAPPO": [124, 125, 126, 127, 128],
 40 |         "COMA": [130, 131, 132] # 129 133
 41 |     },
 42 |     "The strip-shaped building task": {
 43 |         "QMIX": [134, 135, 136, 137, 138],
 44 |         "QTRAN": [139, 140, 141, 142, 143],
 45 |         "VDN": [144, 145, 146, 147, 148],
 46 |         "IQL": [149, 150, 151, 152, 153],
 47 |         "MAPPO": [154, 155, 156, 157, 158],
 48 |         "COMA": [159, 160, 161, 162, 163]
 49 |     }
 50 | }
 51 | 
 52 | data = {}
 53 | 
 54 | # read data...
 55 | for task, algos in info.items():
 56 |     for algo, labels in algos.items():
 57 |         for label in labels:
 58 |             path = "craft/PyMARL/results/sacred/" + str(label) + "/info.json"
 59 |             print(path)
 60 |             data1 = json.load(open(path))
 61 |             return_mean_T = data1['return_mean_T']
 62 |             return_mean = data1['return_mean']
 63 |             # print(return_mean_T)
 64 |             # print(return_mean)
 65 |             test_return_mean_T = data1['test_return_mean_T']
 66 |             test_return_mean = data1['test_return_mean']
 67 |             # print(test_return_mean_T)
 68 |             # print(test_return_mean)
 69 | 
 70 |             x = np.array(return_mean_T)
 71 |             y = np.array(return_mean)
 72 | 
 73 |             x = np.array(test_return_mean_T)
 74 |             y = np.array(test_return_mean)
 75 | 
 76 |             if task not in data:
 77 |                 data[task] = {}
 78 |             if algo not in data[task]:
 79 |                 data[task][algo] = []
 80 |             data[task][algo].append((x, y))
 81 | 
 82 | 
 83 | # plot!
 84 | from matplotlib.pyplot import figure
 85 | figure(figsize=(5, 4), dpi=80)
 86 | for task in sorted(data.keys()):
 87 |     plt.clf()
 88 |     for algo in sorted(data[task].keys()):
 89 |         xs, ys = zip(*data[task][algo])
 90 |         xs, ys = np.array(xs), np.array(ys)
 91 |         def cut(x, length):
 92 |             x_cut = np.empty([x.shape[0], length])
 93 |             for i in range(x.shape[0]):
 94 |                 x_cut[i] = x[i][0:length]
 95 |             return x_cut
 96 |         min_length = min(xs[i].shape[0] for i in range(xs.shape[0]))
 97 |         if task in ["task 06", "task 07"]:
 98 |             cutter = -1
 99 |             for i in range(0, xs[0].shape[0]):
100 |                 if xs[0][i] >= 1e6:
101 |                     cutter = i
102 |                     break
103 |             min_length = min(min_length, cutter)
104 |         print(min_length)
105 |         xs = cut(xs, min_length)
106 |         ys = cut(ys, min_length)
107 |         assert xs.shape == ys.shape
108 |         label = algo
109 |         # Calculate for success rate
110 |         success_rate_flag = False
111 |         if success_rate_flag:
112 |             success_rate = [[] for _ in range(0, len(ys))]
113 |             max_val = None
114 |             if task == "The strip-shaped building task":
115 |                 ranges = [8e5, 1e6]
116 |                 max_val = 4
117 |             if task == "The block-shaped building task":
118 |                 ranges = [8e5, 1e6]
119 |                 max_val = 4
120 |             if task == "The simple two-story building task":
121 |                 ranges = [1.5e6, 2e6]
122 |                 max_val = 3
123 |             if task == "The complex two-story building task":
124 |                 ranges = [1.5e6, 2e6]
125 |                 max_val = 20
126 |             if max_val is None:
127 |                 continue
128 |             else:
129 |                 for i in range(0, len(ys[0])):
130 |                     for j in range(0, len(ys)):
131 |                         success = 0
132 |                         total = 1
133 |                         if ys[j][i] >= max_val:
134 |                             success += 1
135 |                         success_rate[j].append(success / total)
136 |             print(len(success_rate))
137 |             ys = np.array(success_rate)
138 |             print(ys.shape)
139 | 
140 |         plt.plot(xs[0], np.mean(ys, axis=0), label=label, linewidth=2, alpha=1.)
141 |         plt.fill_between(xs[0], np.mean(ys, axis=0)+np.std(ys, axis=0), np.mean(ys, axis=0)-np.std(ys, axis=0), alpha=0.25)
142 |     plt.title('{}'.format(task))
143 |     plt.legend()
144 |     if success_rate_flag:
145 |         plt.savefig("SR_{}.pdf".format(task), bbox_inches='tight')
146 |     else:
147 |         plt.savefig("{}.pdf".format(task), bbox_inches='tight')
148 | 
149 |         
150 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/data/urdf/smartcar/smartcar.urdf.xacro:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <robot name="smartcar"
  3 |   xmlns:xacro="http://ros.org/wiki/xacro">
  4 |   <xacro:property name="PI" value="3.1415926535897931" />
  5 | 
  6 |   <xacro:property name="wheel_radius" value="0.043" />
  7 |   <xacro:property name="wheel_width" value="0.04" />
  8 | 
  9 |   <xacro:property name="wheel_offset_x" value="0.06" />
 10 |   <xacro:property name="wheel_offset_y" value="0.083" />
 11 | 
 12 |   <xacro:property name="board_length" value="0.194" />
 13 |   <xacro:property name="board_width" value="0.2" />
 14 |   <xacro:property name="board_height" value="0.003" />
 15 | 
 16 |   <material name="dark_grey">
 17 |     <color rgba="0.2 0.2 0.2 1.0" />
 18 |   </material>
 19 |   <material name="light_grey">
 20 |     <color rgba="0.4 0.4 0.4 1.0" />
 21 |   </material>
 22 |   <material name="red">
 23 |     <color rgba="0.8 0.0 0.0 1.0" />
 24 |   </material>
 25 |   <material name="white">
 26 |     <color rgba="0.9 0.9 0.9 1.0" />
 27 |   </material>
 28 |   <material name="yellow">
 29 |     <color rgba="0.8 0.8 0.0 1.0" />
 30 |   </material>
 31 |   <material name="black">
 32 |     <color rgba="0.15 0.15 0.15 1.0" />
 33 |   </material>
 34 | 
 35 |   <xacro:macro name="wheel" params="parent_link prefix side *joint_pose">
 36 |     <link name="${prefix}_${side}_wheel_link">
 37 |       <visual>
 38 |         <origin xyz="0 0.006 0" rpy="0 0 ${PI/2}"/>
 39 |         <geometry>
 40 |           <mesh filename="package://smartcar/meshes/wheel.STL"/>
 41 |         </geometry>
 42 |         <material name="black" />
 43 |       </visual>
 44 |       <collision>
 45 |         <origin xyz="0 0 0" rpy="${PI/2} 0 0"/>
 46 |         <geometry>
 47 |           <cylinder radius="${wheel_radius}" length="${wheel_width}"/>
 48 |         </geometry>
 49 |       </collision>
 50 |       <inertial>
 51 |         <origin xyz="0 0 0" rpy="0 0 0"/>
 52 |         <mass value="2.3"/>
 53 |         <inertia ixx="3.3212e-3" ixy="0" ixz="0" iyy="6.6424e-3" iyz="0" izz="3.3212e-3"/>
 54 |       </inertial>
 55 |     </link>
 56 | 
 57 |     <joint name="${prefix}_${side}_wheel" type="continuous">
 58 |       <parent link="${parent_link}"/>
 59 |       <child link="${prefix}_${side}_wheel_link" />
 60 |       <xacro:insert_block name="joint_pose" />
 61 |       <axis xyz="0 1 0" />
 62 |     </joint>
 63 | 
 64 |     <transmission name="${prefix}_wheel_trans">
 65 |       <type>transmission_interface/SimpleTransmission</type>
 66 |       <joint name="${prefix}_${side}_wheel">
 67 |         <hardwareInterface>hardware_interface/VelocityJointInterface</hardwareInterface>
 68 |       </joint>
 69 |       <actuator name="${prefix}_${side}_actuator">
 70 |         <mechanicalReduction>1</mechanicalReduction>
 71 |       </actuator>
 72 |     </transmission>
 73 |   </xacro:macro>
 74 | 
 75 |   <link name="base_link"></link>
 76 | 
 77 |   <joint name="base_link_joint" type="fixed">
 78 |     <origin xyz="0 0 0" rpy="0 0 0" />
 79 |     <parent link="base_link"/>
 80 |     <child link="chassis_link" />
 81 |   </joint>
 82 | 
 83 |   <link name="chassis_link">
 84 |     <visual>
 85 |       <origin xyz="0 0 0" rpy="0 0 0" />
 86 |       <geometry>
 87 |         <mesh filename="package://smartcar/meshes/body_link.STL"/>
 88 |       </geometry>
 89 |       <material name="black" />
 90 |     </visual>
 91 |     <collision>
 92 |       <origin xyz="0 0 0"/>
 93 |       <geometry>
 94 |         <mesh filename="package://smartcar/meshes/body_link.STL"/>
 95 |       </geometry>
 96 |     </collision>
 97 |     <inertial>
 98 |       <origin xyz="0 0 -0.025" rpy="0 0 0"/>
 99 |       <mass value="1"/>
100 |       <inertia ixx="0.025" ixy="0.0" ixz="0.0" iyy="0.025" iyz="0.0" izz="0.0375"/>
101 |     </inertial>
102 |   </link>
103 | 
104 |   <xacro:wheel parent_link="base_link" prefix="front" side="left">
105 |     <origin xyz="${wheel_offset_x} ${wheel_offset_y} 0" rpy="0 0 0" />
106 |   </xacro:wheel>
107 | 
108 |   <xacro:wheel parent_link="base_link" prefix="rear" side="left">
109 |     <origin xyz="-${wheel_offset_x} ${wheel_offset_y} 0" rpy="0 0 0" />
110 |   </xacro:wheel>
111 | 
112 |   <xacro:wheel parent_link="base_link" prefix="front" side="right">
113 |     <origin xyz="${wheel_offset_x} -${wheel_offset_y} 0" rpy="0 0 0" />
114 |   </xacro:wheel>
115 | 
116 |   <xacro:wheel parent_link="base_link" prefix="rear" side="right">
117 |     <origin xyz="-${wheel_offset_x} -${wheel_offset_y} 0" rpy="0 0 0" />
118 |   </xacro:wheel>
119 | 
120 |   <joint name="board_loading_joint" type="prismatic">
121 |     <origin xyz="0 0 0" rpy="0 0 0" />
122 |     <parent link="chassis_link"/>
123 |     <child link="board_link" />
124 |     <axis xyz="0 0 1"/>
125 |     <limit effort="10000" lower="0.0" upper="0.045" velocity="0.05"/>
126 |   </joint>
127 | 
128 |   <transmission name="board_loading_trans">
129 |     <type>transmission_interface/SimpleTransmission</type>
130 |     <joint name="board_loading_joint">
131 |       <hardwareInterface>hardware_interface/PositionJointInterface</hardwareInterface>
132 |     </joint>
133 |     <actuator name="board_loading_actuator">
134 |       <hardwareInterface>hardware_interface/PositionJointInterface</hardwareInterface>
135 |       <mechanicalReduction>1</mechanicalReduction>
136 |     </actuator>
137 |   </transmission>
138 | 
139 |   <link name="board_link">
140 |     <visual>
141 |       <origin xyz="0 0 0.053" rpy="0 0 0" />
142 |       <geometry>
143 |         <mesh filename="package://smartcar/meshes/board.STL" />
144 |       </geometry>
145 |       <material name="black" />
146 |     </visual>
147 |     <collision>
148 |       <origin xyz="0 0 0.053" rpy="0 0 0" />
149 |       <geometry>
150 |         <box size="${board_length} ${board_width} ${board_height}"/>
151 |       </geometry>
152 |     </collision>
153 |     <inertial>
154 |       <origin xyz="0 0 0.053" rpy="0 0 0"/>
155 |       <mass value="0.1"/>
156 |       <inertia ixx="0.001" ixy="0.0" ixz="0.0" iyy="0.001" iyz="0.0" izz="0.002"/>
157 |     </inertial>
158 |   </link>
159 | </robot>
160 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/planner/smartcar_planner.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A meta planner for smartcars planning in multi-flat 4-connected grid maps,
  3 | which supports planning with rotation and non-diagonal movement under action_mask constraints.
  4 | """
  5 | from ..grid_objs import FoldedSlope
  6 | from ..grid_objs import ObjType
  7 | 
  8 | 
  9 | class SmartCarNode:
 10 |     def __init__(self,
 11 |                  x,
 12 |                  y,
 13 |                  z=1,
 14 |                  yaw=0,
 15 |                  is_lift=False,
 16 |                  lift_obj=None,
 17 |                  moving_over_slope=0):
 18 |         self.x = x
 19 |         self.y = y
 20 |         self.z = z
 21 |         self.yaw = yaw
 22 |         self.is_lift = is_lift
 23 |         self.lift_obj = lift_obj
 24 |         self.moving_over_slope = moving_over_slope
 25 | 
 26 |     @property
 27 |     def key(self):
 28 |         return (self.x, self.y, self.z, self.yaw)
 29 | 
 30 |     @property
 31 |     def pos(self):
 32 |         return (self.x, self.y, self.z)
 33 | 
 34 |     def copy(self):
 35 |         return SmartCarNode(
 36 |             x=self.x,
 37 |             y=self.y,
 38 |             z=self.z,
 39 |             yaw=self.yaw,
 40 |             is_lift=self.is_lift,
 41 |             lift_obj=self.new_obj(self.lift_obj),
 42 |             moving_over_slope=self.moving_over_slope)
 43 | 
 44 |     def new_obj(self, obj):
 45 |         return FoldedSlope(obj.yaw) if obj is not None and obj.type is ObjType.FoldedSlope else obj
 46 | 
 47 | 
 48 | class SmartCarPlanner:
 49 |     Node = SmartCarNode
 50 | 
 51 |     def __init__(self, blackboard) -> None:
 52 |         self.move_cost = 1.0
 53 |         self.rotate_cost = 1.0
 54 |         self.blackboard = blackboard
 55 |         self.action_mask_proxy = None
 56 |         self.grid = None
 57 |         self.agent_id = None
 58 | 
 59 |     def reset(self):
 60 |         """
 61 |         Call blackboard.reset() before this function
 62 |         """
 63 |         self.action_mask_proxy = self.blackboard.action_mask_proxy
 64 |         self._set_grid(self.blackboard.grid)
 65 | 
 66 |     def _set_grid(self, grid):
 67 |         self.grid = grid
 68 |         self.length = len(grid)
 69 |         self.width = len(grid[0])
 70 |         self.height = len(grid[0][0])
 71 | 
 72 |     def is_inbound(self, x, y):
 73 |         if x < 0 or x >= self.length:
 74 |             return False
 75 |         if y < 0 or y >= self.width:
 76 |             return False
 77 |         return True
 78 | 
 79 |     def can_move(self, node: SmartCarNode, move_dir):
 80 |         if not self.is_inbound(node.x + move_dir[0], node.y + move_dir[1]):
 81 |             return False
 82 | 
 83 |         kw = dict(
 84 |             x=node.x,
 85 |             y=node.y,
 86 |             z=node.z,
 87 |             yaw=node.yaw,
 88 |             is_lift=node.is_lift,
 89 |             lift_obj=node.lift_obj,
 90 |             moving_over_slope=node.moving_over_slope
 91 |         )
 92 |         return self.action_mask_proxy.move_action_mask(move_dir, self.agent_id, kw)
 93 | 
 94 |     def can_rotate(self, node: SmartCarNode, rotate_dir):
 95 |         kw = dict(
 96 |             x=node.x,
 97 |             y=node.y,
 98 |             z=node.z,
 99 |             yaw=node.yaw,
100 |             is_lift=node.is_lift,
101 |             lift_obj=node.lift_obj,
102 |             moving_over_slope=node.moving_over_slope
103 |         )
104 |         return self.action_mask_proxy.rotate_mask(rotate_dir, self.agent_id, kw)
105 | 
106 |     def get_moved_node(self, curr_node: SmartCarNode, move_dir):
107 |         node = curr_node.copy()
108 |         node.x += move_dir[0]
109 |         node.y += move_dir[1]
110 | 
111 |         obj = self.grid[node.x][node.y][node.z]
112 |         blow_obj = self.grid[node.x][node.y][node.z - 1]
113 |         if obj.type is ObjType.UnfoldedSlopeFoot and node.moving_over_slope == 0:
114 |             node.moving_over_slope = 1
115 |         elif obj.type is ObjType.UnfoldedSlopeBody and node.moving_over_slope == 1:
116 |             node.moving_over_slope = 2
117 |             node.z += 1
118 |         elif blow_obj.type is ObjType.UnfoldedSlopeBody and node.moving_over_slope == 0:
119 |             node.moving_over_slope = 2
120 |         elif blow_obj.type is ObjType.UnfoldedSlopeFoot and node.moving_over_slope == 2:
121 |             node.moving_over_slope = 1
122 |             node.z -= 1
123 |         else:
124 |             node.moving_over_slope = 0
125 |         return node
126 | 
127 |     def get_rotated_node(self, curr_node: SmartCarNode, rotate_dir):
128 |         node = curr_node.copy()
129 |         node.yaw = (node.yaw + rotate_dir) % 4
130 |         if node.is_lift and node.lift_obj.type is ObjType.FoldedSlope:
131 |             node.lift_obj.yaw = (node.lift_obj.yaw + rotate_dir) % 4
132 |         return node
133 | 
134 |     def get_successors(self, curr_node: SmartCarNode):
135 |         successors = []
136 | 
137 |         # move action
138 |         for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
139 |             if not self.can_move(curr_node, (dx, dy)):
140 |                 continue
141 |             node = self.get_moved_node(curr_node, (dx, dy))
142 |             successors.append(node)
143 | 
144 |         # rotate action
145 |         for d_yaw in [-1, 1]:
146 |             if not self.can_rotate(curr_node, d_yaw):
147 |                 continue
148 |             node = self.get_rotated_node(curr_node, d_yaw)
149 |             successors.append(node)
150 | 
151 |         return successors
152 | 
153 |     def plan(self, agent_id, start_x, start_y, start_z, yaw, is_lift, lift_obj, moving_over_slope,
154 |              goal_x, goal_y, goal_z, verbose=False):
155 | 
156 |         assert self.grid is not None, \
157 |             'Grid map not specified, please call set_grid() before planning'
158 | 
159 |         self.agent_id = agent_id
160 | 
161 |         start_node = self.Node(
162 |             x=start_x, y=start_y, z=start_z,
163 |             yaw=yaw, is_lift=is_lift, lift_obj=lift_obj,
164 |             moving_over_slope=moving_over_slope)
165 | 
166 |         goal_node = self.Node(x=goal_x, y=goal_y, z=goal_z)
167 | 
168 |         return self._plan(start_node, goal_node, verbose)
169 | 
170 |     def _plan(self, start_node, goal_node, verbose):
171 |         raise NotImplementedError
172 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/matrix_env.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import random
  3 | import time
  4 | 
  5 | import gym
  6 | import numpy as np
  7 | import pybullet
  8 | from pybullet_utils import bullet_client
  9 | 
 10 | from craft import utils
 11 | 
 12 | from .action_mask import ActionMask
 13 | from .blackboard import Blackboard
 14 | from .goal import Goal
 15 | from .matrix_to_bullet import MatrixToBullet
 16 | from .planner import BreadthFirstSearch
 17 | from .scene import Scene
 18 | from .smartcar import Smartcar
 19 | 
 20 | 
 21 | class MatrixEnv(gym.Env):
 22 | 
 23 |     def __init__(self, enable_render, init_blueprint_path, env_config):
 24 |         self.enable_render = enable_render
 25 |         self._blackboard = Blackboard(init_blueprint_path)
 26 |         search_depth = env_config.get('search_depth', 10)
 27 |         self.smartcars = [
 28 |             Smartcar(self._blackboard, i, search_depth=search_depth)
 29 |             for i in range(self._blackboard.smartcar_num)
 30 |         ]
 31 |         self.scene = Scene(self._blackboard)
 32 | 
 33 |         self._blackboard.smartcars = self.smartcars
 34 |         self._blackboard.scene = self.scene
 35 |         self._blackboard.action_mask_proxy = ActionMask(self._blackboard)
 36 |         self._blackboard.bfs = BreadthFirstSearch(self._blackboard)
 37 | 
 38 |         self._blackboard.goal = Goal(self._blackboard)
 39 | 
 40 |         if self.enable_render:
 41 |             self._bullet_client = bullet_client.BulletClient(
 42 |                 connection_mode=pybullet.GUI)
 43 |         else:
 44 |             self._bullet_client = bullet_client.BulletClient(
 45 |                 connection_mode=pybullet.DIRECT)
 46 |         self._blackboard._bullet_client = self._bullet_client
 47 |         self.matrix_to_bullet = MatrixToBullet(self._bullet_client,
 48 |                                                self._blackboard)
 49 |         self._step = 0
 50 |         self.env_config = env_config
 51 |         if "work_mode" in self.env_config:
 52 |             # 0: train mode, 1: record mode, 2: play mode
 53 |             self.work_mode = self.env_config["work_mode"]
 54 |         else:
 55 |             self.work_mode = 0
 56 |         self._total_step = 0
 57 | 
 58 |     def predict(self, action: list, blackboard=None):
 59 |         """predict the observation after an action with specific blackboard."""
 60 |         predict_blackboard = blackboard
 61 |         if predict_blackboard is None:
 62 |             predict_blackboard = copy.deepcopy(self._blackboard)
 63 | 
 64 |         reward = 0
 65 |         done = False
 66 |         action_order = np.arange(predict_blackboard.smartcar_num)
 67 |         random.shuffle(action_order)
 68 |         for i in action_order:
 69 |             predict_blackboard.smartcars[i].step(action[i])
 70 |             if done:
 71 |                 reward = 1
 72 |                 break
 73 |         done = self._is_done(predict_blackboard)
 74 |         obs = self.get_obs(predict_blackboard)
 75 |         info = {predict_blackboard}
 76 |         return obs, reward, done, info
 77 | 
 78 |     def reset(self):
 79 |         blueprint = random.choice(self._blackboard.blueprint_path_list)
 80 |         self._blackboard.reset(blueprint)
 81 |         self._blackboard.goal.reset()
 82 |         self.scene.reset()
 83 |         for smartcar in self.smartcars:
 84 |             smartcar.reset()
 85 |         self.matrix_to_bullet.sync()
 86 |         if self.work_mode == 1:
 87 |             self.record_action = []
 88 |             self.record_action_order = []
 89 |         elif self.work_mode == 2:
 90 |             assert "yaml_save_path" in self.env_config, \
 91 |                 "yaml_save_path arg is needed in play mode"
 92 |             assert "action_yaml_path" in self.env_config, \
 93 |                 "action_yaml_path arg is needed in play mode"
 94 |             self.record_action, self.record_action_order = \
 95 |                 utils.load_action2yaml(self._blackboard.smartcar_num,
 96 |                                        self.env_config['action_yaml_path'])
 97 |         self._step = 0
 98 | 
 99 |         return self.get_obs()
100 | 
101 |     def step(self, action: list):
102 |         done = False
103 |         action_order = np.arange(self._blackboard.smartcar_num)
104 |         random.shuffle(action_order)
105 | 
106 |         if self.work_mode == 2:
107 |             try:
108 |                 action, action_order = self.record_action[
109 |                     self._step], self.record_action_order[self._step]
110 |             except IndexError:
111 |                 print("record actions reach limit.")
112 |                 return None, 0, False, {}
113 | 
114 |         for i in action_order:
115 |             self.smartcars[i].step(action[i])
116 | 
117 |         obs = self.get_obs()
118 |         reward = self._compute_reward()
119 |         done = self._is_done()
120 |         info = {}
121 | 
122 |         self.matrix_to_bullet.sync()
123 |         if self.work_mode == 1:
124 |             assert "yaml_save_path" in self.env_config, \
125 |                 "yaml_save_path arg is needed in record mode"
126 |             timestamp = time.strftime("%b-%d-%H:%M:%S", time.localtime())
127 |             utils.save_scene2yaml(self._blackboard, self._step, timestamp,
128 |                                   self.env_config["yaml_save_path"])
129 |             self.record_action.append(action)
130 |             self.record_action_order.append(action_order)
131 |             if self._step == 0:
132 |                 self.act_timestamp = timestamp
133 |             utils.save_action2yaml(self.record_action,
134 |                                    self.record_action_order,
135 |                                    self.act_timestamp,
136 |                                    self.env_config["yaml_save_path"])
137 | 
138 |         self._step += 1
139 |         self._total_step += 1
140 |         return obs, reward, done, info
141 | 
142 |     def get_obs(self, blackboard=None):
143 |         raise NotImplementedError
144 | 
145 |     def _is_done(self, blackboard=None):
146 |         """Indicates whether or not the episode is done."""
147 |         return False
148 | 
149 |     def _compute_reward(self, blackboard=None):
150 |         """Calculates the reward to give based on the observations given."""
151 |         return 1
152 | 
153 |     def get_flag_pos(self, blackboard=None):
154 |         raise NotImplementedError
155 | 


--------------------------------------------------------------------------------
/PyMARL/src/learners/q_learner.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from components.episode_buffer import EpisodeBatch
  3 | from modules.mixers.vdn import VDNMixer
  4 | from modules.mixers.qmix import QMixer
  5 | import torch as th
  6 | from torch.optim import RMSprop
  7 | 
  8 | 
  9 | class QLearner:
 10 |     def __init__(self, mac, scheme, logger, args):
 11 |         self.args = args
 12 |         self.mac = mac
 13 |         self.logger = logger
 14 | 
 15 |         self.params = list(mac.parameters())
 16 | 
 17 |         self.last_target_update_episode = 0
 18 | 
 19 |         self.mixer = None
 20 |         if args.mixer is not None:
 21 |             if args.mixer == "vdn":
 22 |                 self.mixer = VDNMixer()
 23 |             elif args.mixer == "qmix":
 24 |                 self.mixer = QMixer(args)
 25 |             else:
 26 |                 raise ValueError("Mixer {} not recognised.".format(args.mixer))
 27 |             self.params += list(self.mixer.parameters())
 28 |             self.target_mixer = copy.deepcopy(self.mixer)
 29 | 
 30 |         self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps)
 31 | 
 32 |         # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
 33 |         self.target_mac = copy.deepcopy(mac)
 34 | 
 35 |         self.log_stats_t = -self.args.learner_log_interval - 1
 36 | 
 37 |     def train(self, batch: EpisodeBatch, t_env: int, episode_num: int):
 38 |         # Get the relevant quantities
 39 |         rewards = batch["reward"][:, :-1]
 40 |         actions = batch["actions"][:, :-1]
 41 |         terminated = batch["terminated"][:, :-1].float()
 42 |         mask = batch["filled"][:, :-1].float()
 43 |         mask[:, 1:] = mask[:, 1:] * (1 - terminated[:, :-1])
 44 |         avail_actions = batch["avail_actions"]
 45 | 
 46 |         # Calculate estimated Q-Values
 47 |         mac_out = []
 48 |         self.mac.init_hidden(batch.batch_size)
 49 |         for t in range(batch.max_seq_length):
 50 |             agent_outs = self.mac.forward(batch, t=t)
 51 |             mac_out.append(agent_outs)
 52 |         mac_out = th.stack(mac_out, dim=1)  # Concat over time
 53 | 
 54 |         # Pick the Q-Values for the actions taken by each agent
 55 |         chosen_action_qvals = th.gather(mac_out[:, :-1], dim=3, index=actions).squeeze(3)  # Remove the last dim
 56 | 
 57 |         # Calculate the Q-Values necessary for the target
 58 |         target_mac_out = []
 59 |         self.target_mac.init_hidden(batch.batch_size)
 60 |         for t in range(batch.max_seq_length):
 61 |             target_agent_outs = self.target_mac.forward(batch, t=t)
 62 |             target_mac_out.append(target_agent_outs)
 63 | 
 64 |         # We don't need the first timesteps Q-Value estimate for calculating targets
 65 |         target_mac_out = th.stack(target_mac_out[1:], dim=1)  # Concat across time
 66 | 
 67 |         # Mask out unavailable actions
 68 |         target_mac_out[avail_actions[:, 1:] == 0] = -9999999
 69 | 
 70 |         # Max over target Q-Values
 71 |         if self.args.double_q:
 72 |             # Get actions that maximise live Q (for double q-learning)
 73 |             mac_out_detach = mac_out.clone().detach()
 74 |             mac_out_detach[avail_actions == 0] = -9999999
 75 |             cur_max_actions = mac_out_detach[:, 1:].max(dim=3, keepdim=True)[1]
 76 |             target_max_qvals = th.gather(target_mac_out, 3, cur_max_actions).squeeze(3)
 77 |         else:
 78 |             target_max_qvals = target_mac_out.max(dim=3)[0]
 79 | 
 80 |         # Mix
 81 |         if self.mixer is not None:
 82 |             chosen_action_qvals = self.mixer(chosen_action_qvals, batch["state"][:, :-1])
 83 |             target_max_qvals = self.target_mixer(target_max_qvals, batch["state"][:, 1:])
 84 | 
 85 |         # Calculate 1-step Q-Learning targets
 86 |         targets = rewards + self.args.gamma * (1 - terminated) * target_max_qvals
 87 | 
 88 |         # Td-error
 89 |         td_error = (chosen_action_qvals - targets.detach())
 90 | 
 91 |         mask = mask.expand_as(td_error)
 92 | 
 93 |         # 0-out the targets that came from padded data
 94 |         masked_td_error = td_error * mask
 95 | 
 96 |         # Normal L2 loss, take mean over actual data
 97 |         loss = (masked_td_error ** 2).sum() / mask.sum()
 98 | 
 99 |         # Optimise
100 |         self.optimiser.zero_grad()
101 |         loss.backward()
102 |         grad_norm = th.nn.utils.clip_grad_norm_(self.params, self.args.grad_norm_clip)
103 |         self.optimiser.step()
104 | 
105 |         if (episode_num - self.last_target_update_episode) / self.args.target_update_interval >= 1.0:
106 |             self._update_targets()
107 |             self.last_target_update_episode = episode_num
108 | 
109 |         if t_env - self.log_stats_t >= self.args.learner_log_interval:
110 |             self.logger.log_stat("loss", loss.item(), t_env)
111 |             self.logger.log_stat("grad_norm", grad_norm, t_env)
112 |             mask_elems = mask.sum().item()
113 |             self.logger.log_stat("td_error_abs", (masked_td_error.abs().sum().item()/mask_elems), t_env)
114 |             self.logger.log_stat("q_taken_mean", (chosen_action_qvals * mask).sum().item()/(mask_elems * self.args.n_agents), t_env)
115 |             self.logger.log_stat("target_mean", (targets * mask).sum().item()/(mask_elems * self.args.n_agents), t_env)
116 |             self.log_stats_t = t_env
117 | 
118 |     def _update_targets(self):
119 |         self.target_mac.load_state(self.mac)
120 |         if self.mixer is not None:
121 |             self.target_mixer.load_state_dict(self.mixer.state_dict())
122 |         self.logger.console_logger.info("Updated target network")
123 | 
124 |     def cuda(self):
125 |         self.mac.cuda()
126 |         self.target_mac.cuda()
127 |         if self.mixer is not None:
128 |             self.mixer.cuda()
129 |             self.target_mixer.cuda()
130 | 
131 |     def save_models(self, path):
132 |         self.mac.save_models(path)
133 |         if self.mixer is not None:
134 |             th.save(self.mixer.state_dict(), "{}/mixer.th".format(path))
135 |         th.save(self.optimiser.state_dict(), "{}/opt.th".format(path))
136 | 
137 |     def load_models(self, path):
138 |         self.mac.load_models(path)
139 |         # Not quite right but I don't want to save target networks
140 |         self.target_mac.load_models(path)
141 |         if self.mixer is not None:
142 |             self.mixer.load_state_dict(th.load("{}/mixer.th".format(path), map_location=lambda storage, loc: storage))
143 |         self.optimiser.load_state_dict(th.load("{}/opt.th".format(path), map_location=lambda storage, loc: storage))
144 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/matrix_to_bullet.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import numpy as np
  3 | from scipy.spatial.transform import Rotation as R
  4 | from .bullet_block import BulletBlocks
  5 | from .bullet_wall import BulletWalls
  6 | from .bullet_flag import BulletFlag
  7 | from .bullet_goal import BulletGoal
  8 | from .bullet_slope import BulletSlope
  9 | from .bullet_smartcar import BulletSmartcar
 10 | from .grid_objs import ObjType
 11 | 
 12 | 
 13 | class MatrixToBullet:
 14 |     def __init__(self, bullet_client, blackboard):
 15 |         self._bullet_client = bullet_client
 16 |         self._blackboard = blackboard
 17 |         self.area_size = blackboard.area_size
 18 |         self.block_length = blackboard.BLOCK_LENGTH
 19 |         self.block_height = blackboard.BLOCK_HEIGHT
 20 | 
 21 |         bullet_client.resetDebugVisualizerCamera(
 22 |             cameraDistance=2,
 23 |             cameraYaw=0,
 24 |             cameraPitch=-30,
 25 |             cameraTargetPosition=[5, 0, 4],
 26 |         )
 27 | 
 28 |         bullet_client.setPhysicsEngineParameter(collisionFilterMode=0)
 29 |         bullet_client.configureDebugVisualizer(bullet_client.COV_ENABLE_RENDERING, 0)
 30 | 
 31 |         self.blocks = BulletBlocks(bullet_client, blackboard)
 32 |         self.slopes = [
 33 |             BulletSlope(bullet_client) for _ in range(self._blackboard.slope_num)
 34 |         ]
 35 |         self.smartcars = [
 36 |             BulletSmartcar(bullet_client) for _ in range(self._blackboard.smartcar_num)
 37 |         ]
 38 |         self.flag = BulletFlag(bullet_client)
 39 |         self.goal = BulletGoal(bullet_client)
 40 |         if self._blackboard.wall_num != 0:
 41 |             self.walls = BulletWalls(bullet_client, blackboard)
 42 | 
 43 |         bullet_client.configureDebugVisualizer(bullet_client.COV_ENABLE_RENDERING, 1)
 44 | 
 45 |         def yaw_to_quaternion(yaw):
 46 |             r = R.from_euler("z", yaw, degrees=False)
 47 |             return r.as_quat()
 48 | 
 49 |         self.yaw_to_quat = {
 50 |             0: yaw_to_quaternion(0 * np.pi / 2),
 51 |             1: yaw_to_quaternion(1 * np.pi / 2),
 52 |             2: yaw_to_quaternion(2 * np.pi / 2),
 53 |             3: yaw_to_quaternion(3 * np.pi / 2),
 54 |         }
 55 | 
 56 |     def get_bullet_position(self, i, j, k):
 57 |         x = i * self.block_length + 0.5 * self.block_length
 58 |         y = j * self.block_length + 0.5 * self.block_length
 59 |         z = k * self.block_height
 60 |         return [x, y, z]
 61 | 
 62 |     def sync(self):
 63 |         grid = self._blackboard.grid
 64 |         wall_id = 0
 65 |         block_id = 0
 66 |         slope_id = 0
 67 |         for i, j, k in itertools.product(
 68 |             range(self.area_size[0]),
 69 |             range(self.area_size[1]),
 70 |             range(1, self.area_size[2] + 1),
 71 |         ):
 72 |             obj = grid[i][j][k]
 73 | 
 74 |             if self._blackboard.wall_num != 0 and obj.type is ObjType.Wall:
 75 |                 position = self.get_bullet_position(i, j, k)
 76 |                 self._bullet_client.resetBasePositionAndOrientation(
 77 |                     self.walls.ids[wall_id], position, self.yaw_to_quat[0]
 78 |                 )
 79 |                 wall_id += 1
 80 | 
 81 |             if obj.type is ObjType.Block:
 82 |                 position = self.get_bullet_position(i, j, k)
 83 |                 self._bullet_client.resetBasePositionAndOrientation(
 84 |                     self.blocks.ids[block_id], position, self.yaw_to_quat[0]
 85 |                 )
 86 |                 block_id += 1
 87 | 
 88 |             if obj.type is ObjType.FoldedSlope:
 89 |                 position = self.get_bullet_position(i, j, k - 1)
 90 |                 self._bullet_client.resetBasePositionAndOrientation(
 91 |                     self.slopes[slope_id].robot_id, position, self.yaw_to_quat[obj.yaw]
 92 |                 )
 93 |                 self.slopes[slope_id].fold()
 94 |                 slope_id += 1
 95 | 
 96 |             if obj.type is ObjType.UnfoldedSlopeBody:
 97 |                 position = self.get_bullet_position(i, j, k - 1)
 98 |                 self._bullet_client.resetBasePositionAndOrientation(
 99 |                     self.slopes[slope_id].robot_id, position, self.yaw_to_quat[obj.yaw]
100 |                 )
101 |                 self.slopes[slope_id].unfold()
102 |                 slope_id += 1
103 | 
104 |             if obj.type is ObjType.Flag:
105 |                 position = self.get_bullet_position(i, j, k - 1)
106 |                 self._bullet_client.resetBasePositionAndOrientation(
107 |                     self.flag.robot_id, position, self.yaw_to_quat[0]
108 |                 )
109 | 
110 |         for smartcar_id, smartcar in enumerate(self._blackboard.smartcars):
111 |             position = self.get_bullet_position(smartcar.x, smartcar.y, smartcar.z - 1)
112 |             position[2] += 0.043
113 |             self._bullet_client.resetBasePositionAndOrientation(
114 |                 self.smartcars[smartcar_id].robot_id,
115 |                 position,
116 |                 self.yaw_to_quat[smartcar.yaw],
117 |             )
118 |             if smartcar.is_lift:
119 |                 if smartcar.lift_obj.type is ObjType.Block:
120 |                     position = self.get_bullet_position(
121 |                         smartcar.x, smartcar.y, smartcar.z + 1
122 |                     )
123 |                     self._bullet_client.resetBasePositionAndOrientation(
124 |                         self.blocks.ids[block_id], position, self.yaw_to_quat[0]
125 |                     )
126 |                     block_id += 1
127 |                 elif smartcar.lift_obj.type is ObjType.FoldedSlope:
128 |                     position = self.get_bullet_position(
129 |                         smartcar.x, smartcar.y, smartcar.z
130 |                     )
131 |                     self._bullet_client.resetBasePositionAndOrientation(
132 |                         self.slopes[slope_id].robot_id,
133 |                         position,
134 |                         self.yaw_to_quat[smartcar.lift_obj.yaw],
135 |                     )
136 |                     self.slopes[slope_id].fold()
137 |                     slope_id += 1
138 |                 elif smartcar.lift_obj.type is ObjType.Flag:
139 |                     position = self.get_bullet_position(
140 |                         smartcar.x, smartcar.y, smartcar.z
141 |                     )
142 |                     self._bullet_client.resetBasePositionAndOrientation(
143 |                         self.flag.robot_id, position, self.yaw_to_quat[0]
144 |                     )
145 | 
146 |         goal_coord = self.get_bullet_position(
147 |             self._blackboard.goal.x,
148 |             self._blackboard.goal.y,
149 |             self._blackboard.goal.z - 1,
150 |         )
151 |         self._bullet_client.resetBasePositionAndOrientation(
152 |             self.goal.robot_id, goal_coord, self.yaw_to_quat[0]
153 |         )
154 | 
155 |         self._bullet_client.stepSimulation()
156 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/utils.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | from enum import IntEnum, auto, unique
  3 | from math import cos, sin
  4 | 
  5 | import numpy as np
  6 | import yaml
  7 | from scipy.spatial.transform import Rotation as R
  8 | 
  9 | from .grid_objs import ObjType
 10 | 
 11 | 
 12 | @unique
 13 | class Direction(IntEnum):
 14 |     """
 15 |     World coordinate system
 16 |     DIR_0:       -- >
 17 |     yaw = 0
 18 | 
 19 |     DIR_1:         ^
 20 |                    |
 21 |     yaw = np.pi * 0.5
 22 | 
 23 |     DIR_2:       < --
 24 |     yaw = np.pi * 1
 25 | 
 26 |     DIR_3:         |
 27 |                    v
 28 |     yaw = np.pi * 1.5
 29 |     """
 30 | 
 31 |     DIR_0 = 0
 32 |     DIR_1 = auto()
 33 |     DIR_2 = auto()
 34 |     DIR_3 = auto()
 35 | 
 36 | 
 37 | def next_step(x, y, theta):
 38 |     next_x = x + round(cos(theta))
 39 |     next_y = y + round(sin(theta))
 40 |     return next_x, next_y
 41 | 
 42 | 
 43 | def world_to_local(move_dir, yaw):
 44 |     theta = yaw * np.pi / 2
 45 |     r = R.from_matrix(
 46 |         [[cos(theta), -sin(theta), 0], [sin(theta), cos(theta), 0], [0, 0, 1]]
 47 |     )
 48 |     move_dir = np.append(move_dir, 0)
 49 |     a = r.inv().apply(move_dir)
 50 |     return a[:2].astype(np.int)
 51 | 
 52 | 
 53 | def is_move_action(action):
 54 |     return 6 <= action <= 9
 55 | 
 56 | 
 57 | def is_smartcar_on_slope(smartcar):
 58 |     return smartcar.moving_over_slope
 59 | 
 60 | 
 61 | def save_scene2yaml(blackboard, step, timestamp, saved_path):
 62 |     for smartcar in blackboard.smartcars:
 63 |         if is_smartcar_on_slope(smartcar):
 64 |             return
 65 |     step = str(step).zfill(5)
 66 |     timestamp = f"{timestamp}-{step}"
 67 |     blackboard._bullet_client.addUserDebugText(text=timestamp,
 68 |                                                textPosition=[6, 12, 7],
 69 |                                                textColorRGB=[0, 0, 1],
 70 |                                                lifeTime=0.7,
 71 |                                                textSize=1.2,
 72 |                                                )
 73 |     grid_items = {
 74 |         "yaml_generated_time": timestamp,
 75 |         "area_length": blackboard.area_size[0],
 76 |         "area_width": blackboard.area_size[1],
 77 |         "area_height": blackboard.area_size[2],
 78 |         "wall_num": 0,
 79 |         "block_num": 0,
 80 |         "slope_num": 0,
 81 |         "smartcar_num": 0,
 82 |         "legged_robot_num": 2,
 83 |     }
 84 |     wall_list = []
 85 |     block_list = []
 86 |     fold_slope_list = []
 87 |     unfold_slope_list = []
 88 |     flag_list = []
 89 |     goal_list = []
 90 |     smartcar_list = []
 91 |     legged_robot_list = [
 92 |         {"id": 0, "x": 1, "y": 1, "z": 1, "yaw": 0},
 93 |         {"id": 1, "x": 1, "y": 2, "z": 1, "yaw": 0},
 94 |     ]
 95 | 
 96 |     grid = blackboard.grid
 97 |     wall_id = 0
 98 |     block_id = 0
 99 |     slope_id = 0
100 |     for i, j, k in itertools.product(
101 |         range(blackboard.area_size[0]),
102 |         range(blackboard.area_size[1]),
103 |         range(1, blackboard.area_size[2] + 1),
104 |     ):
105 |         obj = grid[i][j][k]
106 | 
107 |         if obj.type is ObjType.Wall:
108 |             wall_list.append({"id": wall_id, "x": i, "y": j, "z": k})
109 |             wall_id += 1
110 | 
111 |         if obj.type is ObjType.Block:
112 |             block_list.append({"id": block_id, "x": i, "y": j, "z": k})
113 |             block_id += 1
114 | 
115 |         if obj.type is ObjType.FoldedSlope:
116 |             fold_slope_list.append(
117 |                 {"id": slope_id, "x": i, "y": j, "z": k, "yaw": obj.yaw}
118 |             )
119 |             slope_id += 1
120 | 
121 |         if obj.type is ObjType.UnfoldedSlopeBody:
122 |             unfold_slope_list.append(
123 |                 {"id": slope_id, "x": i, "y": j, "z": k, "yaw": obj.yaw}
124 |             )
125 |             slope_id += 1
126 | 
127 |         if obj.type is ObjType.Flag:
128 |             flag_list.append({"id": 0, "x": i, "y": j, "z": k})
129 | 
130 |     for smartcar_id, smartcar in enumerate(blackboard.smartcars):
131 |         smartcar_list.append(
132 |             {
133 |                 "id": int(smartcar_id),
134 |                 "x": int(smartcar.x),
135 |                 "y": int(smartcar.y),
136 |                 "z": int(smartcar.z),
137 |                 "yaw": int(smartcar.yaw),
138 |             }
139 |         )
140 |         if smartcar.is_lift:
141 |             if smartcar.lift_obj.type is ObjType.Block:
142 |                 block_list.append(
143 |                     {"id": int(block_id), "x": int(smartcar.x), "y": int(smartcar.y), "z": int(smartcar.z)}
144 |                 )
145 |                 block_id += 1
146 |             elif smartcar.lift_obj.type is ObjType.FoldedSlope:
147 |                 fold_slope_list.append(
148 |                     {
149 |                         "id": int(slope_id),
150 |                         "x": int(smartcar.x),
151 |                         "y": int(smartcar.y),
152 |                         "z": int(smartcar.z),
153 |                         "yaw": int(smartcar.lift_obj.yaw),
154 |                     }
155 |                 )
156 |                 slope_id += 1
157 |             elif smartcar.lift_obj.type is ObjType.Flag:
158 |                 flag_list.append(
159 |                     {"id": 0, "x": int(smartcar.x), "y": int(smartcar.y), "z": int(smartcar.z)}
160 |                 )
161 | 
162 |     goal_list.append(
163 |         {
164 |             "id": 0,
165 |             "x": int(blackboard.goal.x),
166 |             "y": int(blackboard.goal.y),
167 |             "z": int(blackboard.goal.z),
168 |         }
169 |     )
170 | 
171 |     grid_items.update({"wall_num": wall_id})
172 |     grid_items.update({"block_num": block_id})
173 |     grid_items.update({"slope_num": slope_id})
174 |     grid_items.update({"smartcar_num": len(blackboard.smartcars)})
175 |     with open(f"{saved_path}/{timestamp}.yaml", "w") as f:
176 |         yaml.dump_all(
177 |             [
178 |                 grid_items,
179 |                 {"block": block_list},
180 |                 {"fold_slope": fold_slope_list},
181 |                 {"unfold_slope": unfold_slope_list},
182 |                 {"smartcar": smartcar_list},
183 |                 {"flag": flag_list},
184 |                 {"goal": goal_list},
185 |                 {"legged_robot": legged_robot_list},
186 |                 {"wall": wall_list},
187 |             ],
188 |             f,
189 |             sort_keys=False,
190 |         )
191 | 
192 | 
193 | def save_action2yaml(record_action, record_action_order, timestamp, saved_path):
194 |     action = np.array(record_action)
195 |     action_order = np.array(record_action_order)
196 |     data = np.concatenate((action, action_order), axis=1)
197 |     # saved format: action, action order
198 |     with open(f"{saved_path}/{timestamp}_record_action.yaml", "w") as f:
199 |         np.savetxt(f, data, fmt='%i')
200 | 
201 | 
202 | def load_action2yaml(robot_num, saved_path):
203 |     with open(f"{saved_path}", "r") as f:
204 |         data = np.loadtxt(f, dtype=int)
205 |         action, action_order = data[:, :robot_num], data[:, robot_num:]
206 |         return action, action_order
207 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/scene.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from craft import utils
  4 | 
  5 | from .blackboard import Point
  6 | from .grid_objs import (Block, Flag, FoldedSlope, FoldedSlopeGear, ObjType,
  7 |                         UnfoldedSlopeBody, UnfoldedSlopeFoot, Wall)
  8 | 
  9 | 
 10 | class Scene:
 11 | 
 12 |     def __init__(self, blackboard):
 13 |         self._blackboard = blackboard
 14 |         self.area_size = self._blackboard.area_size
 15 | 
 16 |     def checke_coord_legal(self, obj):
 17 |         if obj["x"] >= self.area_size[0] or \
 18 |            obj["y"] >= self.area_size[1] or \
 19 |            obj["z"] > self.area_size[2]:
 20 |             raise ValueError("object coordinate out of bounds")
 21 | 
 22 |     def random_place_block(self, max_id):
 23 |         i = 0
 24 |         while i < max_id:
 25 |             name = "block"
 26 |             p, _ = self._blackboard.random_spawn_obj(name)
 27 |             self._blackboard.grid[p.x][p.y][p.z] = Block()
 28 |             self._blackboard.spawn_point_set.add(p)
 29 |             i += 1
 30 | 
 31 |     def random_place_slope(self, max_id):
 32 |         i = 0
 33 |         while i < max_id:
 34 |             name = "slope"
 35 |             while True:
 36 |                 p, direction = self._blackboard.random_spawn_obj(name)
 37 |                 # +2 to consider the Gear
 38 |                 pre_x, pre_y = utils.next_step(p.x, p.y,
 39 |                                                (direction + 2) * np.pi / 2)
 40 |                 next_x, next_y = utils.next_step(p.x, p.y,
 41 |                                                  direction * np.pi / 2)
 42 |                 if self._blackboard.grid[pre_x][pre_y][p.z].type is not \
 43 |                         ObjType.Wall and \
 44 |                         self._blackboard.grid[next_x][next_y][p.z].type \
 45 |                         is ObjType.Air:
 46 |                     break
 47 |             self._blackboard.grid[p.x][p.y][p.z] = FoldedSlope(direction)
 48 |             self._blackboard.grid[next_x][next_y][p.z] = FoldedSlopeGear(
 49 |                 direction)
 50 |             self._blackboard.spawn_point_set.add(p)
 51 |             self._blackboard.spawn_point_set.add(Point(next_x, next_y, p.z))
 52 |             i += 1
 53 | 
 54 |     def z_axis(self, elem):
 55 |         return elem["z"]
 56 | 
 57 |     def reset(self):
 58 |         self.template = self._blackboard.template
 59 | 
 60 |         # must process block first!
 61 |         try:
 62 |             self.template["block"].sort(key=self.z_axis)
 63 |             for i in self.template["block"]:
 64 |                 self.checke_coord_legal(i)
 65 |                 self._blackboard.grid[i["x"]][i["y"]][i["z"]] = Block()
 66 |                 self._blackboard.grid[i["x"]][i["y"]][i["z"] -
 67 |                                                       1].obj_on_it = -2
 68 |                 self._blackboard.spawn_point_set.add(
 69 |                     Point(i["x"], i["y"], i["z"]))
 70 |                 # place_block = i["id"] + 1
 71 |         except KeyError as e:
 72 |             print("KeyError, ", e)
 73 |             pass
 74 |         try:
 75 |             self.template["fold_slope"].sort(key=self.z_axis)
 76 |             for i in self.template["fold_slope"]:
 77 |                 self.checke_coord_legal(i)
 78 |                 self._blackboard.grid[i["x"]][i["y"]][i["z"]] = FoldedSlope(
 79 |                     i["yaw"])
 80 |                 self._blackboard.grid[i["x"]][i["y"]][i["z"] -
 81 |                                                       1].obj_on_it = -2
 82 |                 n_x, n_y = utils.next_step(i["x"], i["y"],
 83 |                                            i["yaw"] * np.pi / 2)
 84 |                 self._blackboard.grid[n_x][n_y][i["z"]] = FoldedSlopeGear(
 85 |                     i["yaw"])
 86 |                 self._blackboard.spawn_point_set.add(
 87 |                     Point(i["x"], i["y"], i["z"]))
 88 |                 # place_slope = i["id"] + 1
 89 |         except KeyError as e:
 90 |             print("KeyError, ", e)
 91 |             pass
 92 |         try:
 93 |             self.template["unfold_slope"].sort(key=self.z_axis)
 94 |             for i in self.template["unfold_slope"]:
 95 |                 self.checke_coord_legal(i)
 96 |                 self._blackboard.grid[i["x"]][i["y"]][
 97 |                     i["z"]] = UnfoldedSlopeBody(i["yaw"])
 98 |                 self._blackboard.grid[i["x"]][i["y"]][i["z"] -
 99 |                                                       1].obj_on_it = -2
100 |                 pre_x, pre_y = utils.next_step(i["x"], i["y"],
101 |                                                (i["yaw"] + 2) * np.pi / 2)
102 |                 pre_obj = self._blackboard.grid[pre_x][pre_y][i["z"]]
103 |                 if isinstance(pre_obj, Block):
104 |                     pre_obj.near_unfold_slope_body = True
105 |                 self._blackboard.spawn_point_set.add(
106 |                     Point(i["x"], i["y"], i["z"]))
107 |                 n_x, n_y = utils.next_step(i["x"], i["y"],
108 |                                            i["yaw"] * np.pi / 2)
109 |                 self._blackboard.grid[n_x][n_y][i["z"]] = UnfoldedSlopeFoot(
110 |                     i["yaw"])
111 |                 self._blackboard.grid[n_x][n_y][i["z"] - 1].obj_on_it = -2
112 |                 front_x, front_y = utils.next_step(n_x, n_y,
113 |                                                    i["yaw"] * np.pi / 2)
114 |                 front_blow_obj = self._blackboard.grid[front_x][front_y][i["z"]
115 |                                                                          - 1]
116 |                 if isinstance(front_blow_obj, Block):
117 |                     front_blow_obj.near_blow_unfold_slope_foot = True
118 |                 self._blackboard.spawn_point_set.add(Point(n_x, n_y, i["z"]))
119 |                 # place_slope = i["id"] + 1
120 |         except KeyError as e:
121 |             print("KeyError, ", e)
122 |             pass
123 |         try:
124 |             for i in self.template["flag"]:
125 |                 self.checke_coord_legal(i)
126 |                 self._blackboard.grid[i["x"]][i["y"]][i["z"]] = Flag()
127 |                 self._blackboard.grid[i["x"]][i["y"]][i["z"] -
128 |                                                       1].obj_on_it = -2
129 |                 self._blackboard.spawn_point_set.add(
130 |                     Point(i["x"], i["y"], i["z"]))
131 |         except KeyError as e:
132 |             print("KeyError, ", e)
133 |             pass
134 |         try:
135 |             for i in self.template["wall"]:
136 |                 self.checke_coord_legal(i)
137 |                 self._blackboard.grid[i["x"]][i["y"]][i["z"]] = Wall()
138 |         except KeyError as e:
139 |             print("KeyError, ", e)
140 |             pass
141 |         place_block = len(
142 |             self.template["block"]) if "block" in self.template else 0
143 |         place_slope = 0
144 |         if "fold_slope" in self.template:
145 |             place_slope = len(self.template["fold_slope"])
146 |         if "unfold_slope" in self.template:
147 |             place_slope += len(self.template["unfold_slope"])
148 | 
149 |         self.random_place_slope(self._blackboard.slope_num - place_slope)
150 |         self.random_place_block(self._blackboard.block_num - place_block)
151 | 


--------------------------------------------------------------------------------
/CraftEnv/src/craft/planner/breadth_first_search.py:
--------------------------------------------------------------------------------
  1 | from collections import deque
  2 | import numpy as np
  3 | from .smartcar_planner import SmartCarPlanner
  4 | from ..grid_objs import ObjType
  5 | 
  6 | 
  7 | class BreadthFirstSearch(SmartCarPlanner):
  8 |     def __init__(self, blackboard):
  9 |         super().__init__(blackboard)
 10 | 
 11 |     def is_inbound(self, x, y, z):
 12 |         if x < self.x_min or x > self.x_max:
 13 |             return False
 14 |         if y < self.y_min or y > self.y_max:
 15 |             return False
 16 |         if z < self.z_min or z > self.z_max:
 17 |             return False
 18 |         return True
 19 | 
 20 |     def set_bound(self, size, start_x, start_y):
 21 |         if size is None:
 22 |             self.x_min, self.x_max = 0, self.length - 1
 23 |             self.y_min, self.y_max = 0, self.width - 1
 24 |             self.z_min, self.z_max = 0, self.height - 1
 25 |             return
 26 | 
 27 |         if isinstance(size, int):
 28 |             l, w, h = size, size, size
 29 |         elif isinstance(size, (tuple, list)):
 30 |             l, w, h = size
 31 |         else:
 32 |             raise TypeError
 33 | 
 34 |         self.x_min = max(0, start_x - l / 2)
 35 |         self.x_max = min(self.length - 1, start_x + l / 2)
 36 |         self.y_min = max(0, start_y - w / 2)
 37 |         self.y_max = min(self.width - 1, start_y + w / 2)
 38 |         self.z_min = 0
 39 |         self.z_max = min(self.height - 1, h)
 40 | 
 41 |     def can_move(self, node, move_dir):
 42 |         x, y, z, yaw, moving_over_slope = node
 43 |         kw = dict(
 44 |             x=x,
 45 |             y=y,
 46 |             z=z,
 47 |             yaw=yaw,
 48 |             is_lift=False,
 49 |             lift_obj=None,
 50 |             moving_over_slope=moving_over_slope
 51 |         )
 52 |         return self.action_mask_proxy.move_action_mask(move_dir, self.agent_id, kw, ignore_cars=self.ignore_cars)
 53 | 
 54 |     def can_rotate(self, node, rotate_dir):
 55 |         x, y, z, yaw, moving_over_slope = node
 56 |         kw = dict(
 57 |             x=x,
 58 |             y=y,
 59 |             z=z,
 60 |             yaw=yaw,
 61 |             is_lift=False,
 62 |             lift_obj=None,
 63 |             moving_over_slope=moving_over_slope
 64 |         )
 65 |         return self.action_mask_proxy.rotate_mask(rotate_dir, self.agent_id, kw, ignore_cars=self.ignore_cars)
 66 | 
 67 |     def get_moved_node(self, curr_node, move_dir):
 68 |         x, y, z, yaw, moving_over_slope = curr_node
 69 |         x += move_dir[0]
 70 |         y += move_dir[1]
 71 | 
 72 |         obj = self.grid[x][y][z]
 73 |         blow_obj = self.grid[x][y][z - 1]
 74 |         if obj.type is ObjType.UnfoldedSlopeFoot and moving_over_slope == 0:
 75 |             moving_over_slope = 1
 76 |         elif obj.type is ObjType.UnfoldedSlopeBody and moving_over_slope == 1:
 77 |             moving_over_slope = 2
 78 |             z += 1
 79 |         elif blow_obj.type is ObjType.UnfoldedSlopeBody and moving_over_slope == 0:
 80 |             moving_over_slope = 2
 81 |         elif blow_obj.type is ObjType.UnfoldedSlopeFoot and moving_over_slope == 2:
 82 |             moving_over_slope = 1
 83 |             z -= 1
 84 |         else:
 85 |             moving_over_slope = 0
 86 |         return (x, y, z, yaw, moving_over_slope)
 87 | 
 88 |     def get_rotated_node(self, curr_node, rotate_dir):
 89 |         x, y, z, yaw, moving_over_slope = curr_node
 90 |         yaw = (yaw + rotate_dir) % 4
 91 |         return (x, y, z, yaw, moving_over_slope)
 92 | 
 93 |     def get_successors(self, curr_node):
 94 |         successors = []
 95 | 
 96 |         # move action
 97 |         for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
 98 |             if not self.can_move(curr_node, (dx, dy)):
 99 |                 continue
100 |             node = self.get_moved_node(curr_node, (dx, dy))
101 |             if not self.is_inbound(*node[:3]):
102 |                 continue
103 |             successors.append(node)
104 | 
105 |         # rotate action
106 |         for d_yaw in [-1, 1]:
107 |             if not self.can_rotate(curr_node, d_yaw):
108 |                 continue
109 |             node = self.get_rotated_node(curr_node, d_yaw)
110 |             successors.append(node)
111 | 
112 |         return successors
113 | 
114 |     def search(self, agent_id, start_x, start_y, start_z, yaw,
115 |                moving_over_slope=0, visualize=False, view_size=None, ignore_cars=False):
116 |         """
117 |         :param size: int or tuple, specify bounded search space. If None, bfs searches the entire space.
118 |         """
119 |         self.agent_id = agent_id
120 |         self.reset()
121 |         self.set_bound(view_size, start_x, start_y)
122 |         self.ignore_cars = ignore_cars
123 | 
124 |         visited = np.zeros((self.length, self.width, self.height))
125 |         close_set = np.zeros((self.length, self.width, self.height, 4))
126 |         open_set = np.zeros((self.length, self.width, self.height, 4))
127 | 
128 |         q = deque()
129 |         start_node = (start_x, start_y, start_z, yaw, moving_over_slope)
130 |         q.append(start_node)
131 |         open_set[start_node[:4]] = 1
132 | 
133 |         block_length = self.blackboard.BLOCK_LENGTH
134 |         block_height = self.blackboard.BLOCK_HEIGHT
135 |         if visualize:
136 |             import pybullet_data
137 |             self.blackboard._bullet_client.setAdditionalSearchPath(
138 |                 pybullet_data.getDataPath())
139 |             vis_obj_list = []
140 | 
141 |             self.blackboard._bullet_client.configureDebugVisualizer(
142 |                 self.blackboard._bullet_client.COV_ENABLE_RENDERING, 0)
143 | 
144 |         while q:
145 |             curr_node = q.popleft()
146 |             open_set[curr_node[:4]] = 0
147 |             close_set[curr_node[:4]] = 1
148 |             visited[curr_node[:3]] = 1
149 | 
150 |             if visualize:
151 |                 x, y, z = curr_node[:3]
152 |                 vis_x = x * block_length + block_length / 2
153 |                 vis_y = y * block_length + block_length / 2
154 |                 vis_z = (z - 1) * block_height + block_height / 2
155 |                 handle = self.blackboard._bullet_client.loadURDF(
156 |                     'cube.urdf', (vis_x, vis_y, vis_z), [1, 0, 0, 1], globalScaling=0.05)
157 |                 for iii in range(self.blackboard._bullet_client.getNumJoints(handle)):
158 |                     self.blackboard._bullet_client.changeVisualShape(
159 |                         handle, iii, rgbaColor=[1, 0, 0, 1]
160 |                     )
161 |                 vis_obj_list.append(handle)
162 | 
163 |             successed_nodes = self.get_successors(curr_node)
164 |             for node in successed_nodes:
165 |                 if close_set[node[:4]] or open_set[node[:4]]:
166 |                     continue
167 | 
168 |                 q.append(node)
169 |                 open_set[node[:4]] = 1
170 | 
171 |         if visualize:
172 |             self.blackboard._bullet_client.configureDebugVisualizer(
173 |                 self.blackboard._bullet_client.COV_ENABLE_RENDERING, 1)
174 |             input('===== Enter to remove BFS blocks =====\n')
175 |             self.blackboard._bullet_client.configureDebugVisualizer(
176 |                 self.blackboard._bullet_client.COV_ENABLE_RENDERING, 0)
177 |             for i in vis_obj_list:
178 |                 self.blackboard._bullet_client.removeBody(i)
179 |             self.blackboard._bullet_client.configureDebugVisualizer(
180 |                 self.blackboard._bullet_client.COV_ENABLE_RENDERING, 1)
181 |         return visited
182 | 


--------------------------------------------------------------------------------