├── PyMARL ├── src │ ├── __init__.py │ ├── components │ │ ├── __init__.py │ │ ├── transforms.py │ │ ├── epsilon_schedules.py │ │ ├── standarize_stream.py │ │ └── action_selectors.py │ ├── modules │ │ ├── __init__.py │ │ ├── mixers │ │ │ ├── __init__.py │ │ │ ├── vdn.py │ │ │ ├── qmix.py │ │ │ └── qtran.py │ │ ├── agents │ │ │ ├── __init__.py │ │ │ ├── rnn_agent.py │ │ │ └── mlp_agent.py │ │ └── critics │ │ │ ├── mlp.py │ │ │ ├── __init__.py │ │ │ ├── maddpg.py │ │ │ ├── ac.py │ │ │ ├── maddpg_ns.py │ │ │ ├── ac_ns.py │ │ │ ├── centralV.py │ │ │ ├── coma.py │ │ │ ├── centralV_ns.py │ │ │ └── coma_ns.py │ ├── controllers │ │ ├── __init__.py │ │ └── basic_controller.py │ ├── utils │ │ ├── dict2namedtuple.py │ │ ├── rl_utils.py │ │ ├── timehelper.py │ │ └── logging.py │ ├── runners │ │ ├── __init__.py │ │ └── episode_runner.py │ ├── learners │ │ ├── __init__.py │ │ └── q_learner.py │ ├── config │ │ ├── algs │ │ │ ├── vdn.yaml │ │ │ ├── iql.yaml │ │ │ ├── vdn_beta.yaml │ │ │ ├── iql_beta.yaml │ │ │ ├── qmix_beta.yaml │ │ │ ├── qmix.yaml │ │ │ ├── qtran.yaml │ │ │ ├── coma.yaml │ │ │ └── mappo.yaml │ │ ├── envs │ │ │ ├── sc2_beta.yaml │ │ │ ├── sc2.yaml │ │ │ ├── multicar.yaml │ │ │ ├── free.yaml │ │ │ └── flag.yaml │ │ └── default.yaml │ ├── envs │ │ ├── __init__.py │ │ ├── multiagentenv.py │ │ ├── flagenv.py │ │ ├── freeenv.py │ │ └── multicar_env.py │ ├── main.py │ └── painter.py ├── README.local └── README.md.pymarl ├── CraftEnv ├── src │ ├── __init__.py │ ├── craft │ │ ├── planner │ │ │ ├── __init__.py │ │ │ ├── smartcar_planner.py │ │ │ └── breadth_first_search.py │ │ ├── data │ │ │ └── urdf │ │ │ │ ├── block │ │ │ │ ├── meshes │ │ │ │ │ └── base_link.STL │ │ │ │ └── block.urdf │ │ │ │ ├── flag │ │ │ │ ├── meshes │ │ │ │ │ └── base_link.STL │ │ │ │ └── block.urdf │ │ │ │ ├── goal │ │ │ │ ├── meshes │ │ │ │ │ └── base_link.STL │ │ │ │ └── block.urdf │ │ │ │ ├── slope │ │ │ │ ├── meshes │ │ │ │ │ ├── slope_end.STL │ │ │ │ │ ├── slope_base.STL │ │ │ │ │ ├── slope_end_collision.STL │ │ │ │ │ └── slope_base_collision.STL │ │ │ │ └── slope.urdf.xacro │ │ │ │ ├── smartcar │ │ │ │ ├── meshes │ │ │ │ │ ├── board.STL │ │ │ │ │ ├── wheel.STL │ │ │ │ │ └── body_link.STL │ │ │ │ └── smartcar.urdf.xacro │ │ │ │ └── wall │ │ │ │ ├── meshes │ │ │ │ └── base_link.STL │ │ │ │ └── block.urdf │ │ ├── blueprint │ │ │ ├── block_shaped_goal.yaml │ │ │ ├── strip_shaped_goal.yaml │ │ │ ├── simple_twolayer_goal.yaml │ │ │ ├── free_building_goal.yaml │ │ │ ├── breaking_barrier_goal.yaml │ │ │ ├── complex_twolayer_goal.yaml │ │ │ ├── simple_twolayer_init.yaml │ │ │ ├── block_shaped_init.yaml │ │ │ ├── strip_shaped_init.yaml │ │ │ ├── breaking_barrier_init.yaml │ │ │ ├── free_building_init.yaml │ │ │ └── complex_twolayer_init.yaml │ │ ├── __init__.py │ │ ├── goal.py │ │ ├── bullet_goal.py │ │ ├── bullet_flag.py │ │ ├── bullet_smartcar.py │ │ ├── bullet_slope.py │ │ ├── bullet_wall.py │ │ ├── action_enum.py │ │ ├── bullet_block.py │ │ ├── bread_first_search.py │ │ ├── grid_objs.py │ │ ├── blackboard.py │ │ ├── flag_env.py │ │ ├── free_env.py │ │ ├── matrix_env.py │ │ ├── matrix_to_bullet.py │ │ ├── utils.py │ │ └── scene.py │ └── create_pybullet_envs.py └── setup.py ├── .code.yml ├── LICENSE.md ├── README.md └── environment.yaml /PyMARL/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PyMARL/src/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PyMARL/src/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /PyMARL/src/modules/mixers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CraftEnv/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .create_pybullet_envs import * 2 | -------------------------------------------------------------------------------- /.code.yml: -------------------------------------------------------------------------------- 1 | source: 2 | third_party_source: 3 | filepath_regex: 4 | - /PyMARL/.* 5 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/planner/__init__.py: -------------------------------------------------------------------------------- 1 | from .a_star import AStarPlanner 2 | from .breadth_first_search import BreadthFirstSearch 3 | -------------------------------------------------------------------------------- /PyMARL/src/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | REGISTRY = {} 2 | 3 | from .basic_controller import BasicMAC 4 | 5 | REGISTRY["basic_mac"] = BasicMAC -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/block/meshes/base_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/block/meshes/base_link.STL -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/flag/meshes/base_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/flag/meshes/base_link.STL -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/goal/meshes/base_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/goal/meshes/base_link.STL -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/slope/meshes/slope_end.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/slope/meshes/slope_end.STL -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/smartcar/meshes/board.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/smartcar/meshes/board.STL -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/smartcar/meshes/wheel.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/smartcar/meshes/wheel.STL -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/wall/meshes/base_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/wall/meshes/base_link.STL -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/slope/meshes/slope_base.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/slope/meshes/slope_base.STL -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/smartcar/meshes/body_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/smartcar/meshes/body_link.STL -------------------------------------------------------------------------------- /PyMARL/src/utils/dict2namedtuple.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | 4 | def convert(dictionary): 5 | return namedtuple('GenericDict', dictionary.keys())(**dictionary) 6 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/slope/meshes/slope_end_collision.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/slope/meshes/slope_end_collision.STL -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/block_shaped_goal.yaml: -------------------------------------------------------------------------------- 1 | block: 2 | - {id: 0, x: 2, y: 1, z: 1} 3 | - {id: 1, x: 2, y: 2, z: 1} 4 | - {id: 2, x: 3, y: 1, z: 1} 5 | - {id: 3, x: 3, y: 2, z: 1} -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/strip_shaped_goal.yaml: -------------------------------------------------------------------------------- 1 | block: 2 | - {id: 0, x: 3, y: 0, z: 1} 3 | - {id: 1, x: 3, y: 1, z: 1} 4 | - {id: 2, x: 3, y: 2, z: 1} 5 | - {id: 3, x: 3, y: 3, z: 1} -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/slope/meshes/slope_base_collision.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-RoboticsX/CraftEnv/HEAD/CraftEnv/src/craft/data/urdf/slope/meshes/slope_base_collision.STL -------------------------------------------------------------------------------- /PyMARL/src/modules/agents/__init__.py: -------------------------------------------------------------------------------- 1 | REGISTRY = {} 2 | 3 | from .rnn_agent import RNNAgent 4 | REGISTRY["rnn"] = RNNAgent 5 | 6 | from .mlp_agent import MLPAgent 7 | REGISTRY["mlp"] = MLPAgent -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/simple_twolayer_goal.yaml: -------------------------------------------------------------------------------- 1 | block: 2 | - {id: 0, x: 2, y: 3, z: 1} 3 | - {id: 1, x: 2, y: 3, z: 2} 4 | unfolded_body: 5 | - {id: 0, x: 2, y: 2, z: 1} 6 | unfolded_foot: 7 | - {id: 0, x: 2, y: 1, z: 1} -------------------------------------------------------------------------------- /PyMARL/src/runners/__init__.py: -------------------------------------------------------------------------------- 1 | REGISTRY = {} 2 | 3 | from .episode_runner import EpisodeRunner 4 | REGISTRY["episode"] = EpisodeRunner 5 | 6 | from .parallel_runner import ParallelRunner 7 | REGISTRY["parallel"] = ParallelRunner 8 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def get_data_path(): 5 | return os.path.join(os.path.dirname(__file__), "data") 6 | 7 | 8 | def get_urdf_path(): 9 | return os.path.join(get_data_path(), "urdf") 10 | -------------------------------------------------------------------------------- /PyMARL/README.local: -------------------------------------------------------------------------------- 1 | PyMARL git commit: c971afdceb34635d31b778021b0ef90d7af51e86 2 | EPyMARL git commit: f355a55262ac9afecdb53368fec6337c549cc160 3 | We merged the MAPPO implementation of EPyMARL into PyMARL. 4 | We also made modifications to connect CraftEnv environment to the PyMARL framework. 5 | -------------------------------------------------------------------------------- /PyMARL/src/modules/mixers/vdn.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | 4 | 5 | class VDNMixer(nn.Module): 6 | def __init__(self): 7 | super(VDNMixer, self).__init__() 8 | 9 | def forward(self, agent_qs, batch): 10 | return th.sum(agent_qs, dim=2, keepdim=True) -------------------------------------------------------------------------------- /CraftEnv/src/craft/goal.py: -------------------------------------------------------------------------------- 1 | class Goal: 2 | def __init__(self, blackboard): 3 | self._blackboard = blackboard 4 | self.reset() 5 | 6 | def reset(self): 7 | self.template = self._blackboard.template 8 | self.x = self.template["goal"][0]["x"] 9 | self.y = self.template["goal"][0]["y"] 10 | self.z = self.template["goal"][0]["z"] 11 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/free_building_goal.yaml: -------------------------------------------------------------------------------- 1 | block: 2 | - {id: 0, x: 3, y: 1, z: 1} 3 | - {id: 1, x: 3, y: 2, z: 1} 4 | - {id: 2, x: 4, y: 1, z: 1} 5 | - {id: 3, x: 4, y: 2, z: 1} 6 | unfolded_body: 7 | - {id: 0, x: 2, y: 1, z: 1} 8 | - {id: 1, x: 2, y: 2, z: 1} 9 | unfolded_foot: 10 | - {id: 0, x: 1, y: 1, z: 1} 11 | - {id: 1, x: 1, y: 2, z: 1} 12 | # no use -------------------------------------------------------------------------------- /PyMARL/src/learners/__init__.py: -------------------------------------------------------------------------------- 1 | from .q_learner import QLearner 2 | from .coma_learner import COMALearner 3 | from .qtran_learner import QLearner as QTranLearner 4 | from .ppo_learner import PPOLearner 5 | 6 | REGISTRY = {} 7 | 8 | REGISTRY["q_learner"] = QLearner 9 | REGISTRY["coma_learner"] = COMALearner 10 | REGISTRY["qtran_learner"] = QTranLearner 11 | REGISTRY["ppo_learner"] = PPOLearner -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/breaking_barrier_goal.yaml: -------------------------------------------------------------------------------- 1 | block: 2 | - {id: 0, x: 3, y: 1, z: 1} 3 | - {id: 1, x: 3, y: 2, z: 1} 4 | - {id: 2, x: 4, y: 1, z: 1} 5 | - {id: 3, x: 4, y: 2, z: 1} 6 | - {id: 4, x: 3, y: 1, z: 2} 7 | unfolded_body: 8 | - {id: 0, x: 2, y: 1, z: 1} 9 | - {id: 1, x: 2, y: 2, z: 1} 10 | unfolded_foot: 11 | - {id: 0, x: 1, y: 1, z: 1} 12 | - {id: 1, x: 1, y: 2, z: 1} 13 | # no use -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/complex_twolayer_goal.yaml: -------------------------------------------------------------------------------- 1 | block: 2 | - {id: 0, x: 3, y: 1, z: 1} 3 | - {id: 1, x: 3, y: 2, z: 1} 4 | - {id: 2, x: 4, y: 1, z: 1} 5 | - {id: 3, x: 4, y: 2, z: 1} 6 | - {id: 4, x: 3, y: 1, z: 2} 7 | - {id: 5, x: 4, y: 2, z: 2} 8 | unfolded_body: 9 | - {id: 0, x: 2, y: 1, z: 1} 10 | - {id: 1, x: 2, y: 2, z: 1} 11 | unfolded_foot: 12 | - {id: 0, x: 1, y: 1, z: 1} 13 | - {id: 1, x: 1, y: 2, z: 1} 14 | -------------------------------------------------------------------------------- /PyMARL/src/config/algs/vdn.yaml: -------------------------------------------------------------------------------- 1 | # --- VDN specific parameters --- 2 | 3 | # use epsilon greedy action selector 4 | action_selector: "epsilon_greedy" 5 | epsilon_start: 0.21 # 1.0 6 | epsilon_finish: 0.2 # 0.05 7 | epsilon_anneal_time: 1 # 50000 8 | 9 | runner: "episode" 10 | 11 | buffer_size: 5000 12 | 13 | # update the target network every {} episodes 14 | target_update_interval: 200 15 | 16 | # use the Q_Learner to train 17 | agent_output_type: "q" 18 | learner: "q_learner" 19 | double_q: True 20 | mixer: "vdn" 21 | 22 | name: "vdn" 23 | -------------------------------------------------------------------------------- /PyMARL/src/config/algs/iql.yaml: -------------------------------------------------------------------------------- 1 | # --- QMIX specific parameters --- 2 | 3 | # use epsilon greedy action selector 4 | action_selector: "epsilon_greedy" 5 | epsilon_start: 0.21 # 1.0 6 | epsilon_finish: 0.2 # 0.05 7 | epsilon_anneal_time: 1 # 50000 8 | 9 | runner: "episode" 10 | 11 | buffer_size: 5000 12 | 13 | # update the target network every {} episodes 14 | target_update_interval: 200 15 | 16 | # use the Q_Learner to train 17 | agent_output_type: "q" 18 | learner: "q_learner" 19 | double_q: True 20 | mixer: # Mixer becomes None 21 | 22 | name: "iql" 23 | -------------------------------------------------------------------------------- /PyMARL/src/config/algs/vdn_beta.yaml: -------------------------------------------------------------------------------- 1 | # --- VDN specific parameters --- 2 | 3 | # use epsilon greedy action selector 4 | action_selector: "epsilon_greedy" 5 | epsilon_start: 1.0 6 | epsilon_finish: 0.05 7 | epsilon_anneal_time: 50000 8 | 9 | runner: "parallel" 10 | batch_size_run: 8 11 | 12 | buffer_size: 5000 13 | 14 | # update the target network every {} episodes 15 | target_update_interval: 200 16 | 17 | # use the Q_Learner to train 18 | agent_output_type: "q" 19 | learner: "q_learner" 20 | double_q: True 21 | mixer: "vdn" 22 | 23 | name: "vdn_smac_parallel" 24 | -------------------------------------------------------------------------------- /PyMARL/src/config/algs/iql_beta.yaml: -------------------------------------------------------------------------------- 1 | # --- IQL specific parameters --- 2 | 3 | # use epsilon greedy action selector 4 | action_selector: "epsilon_greedy" 5 | epsilon_start: 1.0 6 | epsilon_finish: 0.05 7 | epsilon_anneal_time: 50000 8 | 9 | runner: "parallel" 10 | batch_size_run: 8 11 | 12 | buffer_size: 5000 13 | 14 | # update the target network every {} episodes 15 | target_update_interval: 200 16 | 17 | # use the Q_Learner to train 18 | agent_output_type: "q" 19 | learner: "q_learner" 20 | double_q: True 21 | mixer: # Mixer becomes None 22 | 23 | name: "iql_smac_parallel" -------------------------------------------------------------------------------- /PyMARL/src/modules/critics/mlp.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class MLP(nn.Module): 7 | def __init__(self, input_shape, hidden_dim, output_dim): 8 | super(MLP, self).__init__() 9 | self.fc1 = nn.Linear(input_shape, hidden_dim) 10 | self.fc2 = nn.Linear(hidden_dim, hidden_dim) 11 | self.fc3 = nn.Linear(hidden_dim, output_dim) 12 | 13 | def forward(self, inputs): 14 | x = F.relu(self.fc1(inputs)) 15 | x = F.relu(self.fc2(x)) 16 | q = self.fc3(x) 17 | return q -------------------------------------------------------------------------------- /PyMARL/src/config/algs/qmix_beta.yaml: -------------------------------------------------------------------------------- 1 | # --- QMIX specific parameters --- 2 | 3 | # use epsilon greedy action selector 4 | action_selector: "epsilon_greedy" 5 | epsilon_start: 1.0 6 | epsilon_finish: 0.05 7 | epsilon_anneal_time: 50000 8 | 9 | runner: "parallel" 10 | batch_size_run: 8 11 | 12 | buffer_size: 5000 13 | 14 | # update the target network every {} episodes 15 | target_update_interval: 200 16 | 17 | # use the Q_Learner to train 18 | agent_output_type: "q" 19 | learner: "q_learner" 20 | double_q: True 21 | mixer: "qmix" 22 | mixing_embed_dim: 32 23 | 24 | name: "qmix_smac_parallel" 25 | -------------------------------------------------------------------------------- /PyMARL/src/config/algs/qmix.yaml: -------------------------------------------------------------------------------- 1 | # --- QMIX specific parameters --- 2 | 3 | # use epsilon greedy action selector 4 | action_selector: "epsilon_greedy" 5 | epsilon_start: 0.21 # 1 6 | epsilon_finish: 0.2 # 0/05 7 | epsilon_anneal_time: 1 # 500000 8 | 9 | runner: "episode" 10 | 11 | buffer_size: 5000 12 | 13 | # update the target network every {} episodes 14 | target_update_interval: 200 15 | 16 | # use the Q_Learner to train 17 | agent_output_type: "q" 18 | learner: "q_learner" 19 | double_q: True 20 | mixer: "qmix" 21 | mixing_embed_dim: 32 22 | hypernet_layers: 2 23 | hypernet_embed: 64 24 | 25 | name: "qmix" 26 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/simple_twolayer_init.yaml: -------------------------------------------------------------------------------- 1 | area_length: 4 2 | area_width: 4 3 | area_height: 3 4 | block_num: 2 5 | slope_num: 1 6 | smartcar_num: 2 7 | legged_robot_num: 2 8 | 9 | block: 10 | - {id: 0, x: 1, y: 0, z: 1} 11 | - {id: 1, x: 1, y: 2, z: 1} 12 | smartcar: 13 | - {id: 0, x: 0, y: 2, z: 1, yaw: 0} 14 | - {id: 1, x: 2, y: 0, z: 1, yaw: 0} 15 | fold_slope: 16 | - {id: 0, x: 2, y: 2, z: 1, yaw: 3} 17 | flag: 18 | - {id: 0, x: -1, y: -2, z: 1} 19 | goal: 20 | - {id: 0, x: -1, y: -2, z: 1} 21 | legged_robot: 22 | - {id: 0, x: -1, y: 0, z: 0.335, yaw: 1} 23 | - {id: 1, x: -1, y: -3, z: 0.335, yaw: 0} 24 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/block_shaped_init.yaml: -------------------------------------------------------------------------------- 1 | area_length: 4 2 | area_width: 4 3 | area_height: 3 4 | block_num: 4 5 | slope_num: 0 6 | smartcar_num: 2 7 | legged_robot_num: 2 8 | 9 | block: 10 | - {id: 0, x: 1, y: 1, z: 1} 11 | - {id: 1, x: 1, y: 2, z: 1} 12 | - {id: 2, x: 3, y: 0, z: 1} 13 | - {id: 3, x: 3, y: 3, z: 1} 14 | smartcar: 15 | - {id: 0, x: 1, y: 0, z: 1, yaw: 0} 16 | - {id: 1, x: 1, y: 3, z: 1, yaw: 0} 17 | 18 | flag: 19 | - {id: 0, x: -1, y: -2, z: 1} 20 | goal: 21 | - {id: 0, x: -1, y: -2, z: 1} 22 | legged_robot: 23 | - {id: 0, x: -1, y: 0, z: 0.335, yaw: 1} 24 | - {id: 1, x: -1, y: -3, z: 0.335, yaw: 0} 25 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/strip_shaped_init.yaml: -------------------------------------------------------------------------------- 1 | area_length: 4 2 | area_width: 4 3 | area_height: 3 4 | block_num: 4 5 | slope_num: 0 6 | smartcar_num: 2 7 | legged_robot_num: 2 8 | 9 | block: 10 | - {id: 0, x: 1, y: 0, z: 1} 11 | - {id: 1, x: 2, y: 1, z: 1} 12 | - {id: 2, x: 1, y: 2, z: 1} 13 | - {id: 3, x: 2, y: 3, z: 1} 14 | smartcar: 15 | - {id: 0, x: 1, y: 1, z: 1, yaw: 0} 16 | - {id: 1, x: 1, y: 3, z: 1, yaw: 0} 17 | 18 | flag: 19 | - {id: 0, x: -1, y: -2, z: 1} 20 | goal: 21 | - {id: 0, x: -1, y: -2, z: 1} 22 | legged_robot: 23 | - {id: 0, x: -1, y: 0, z: 0.335, yaw: 1} 24 | - {id: 1, x: -1, y: -3, z: 0.335, yaw: 0} 25 | -------------------------------------------------------------------------------- /PyMARL/src/config/algs/qtran.yaml: -------------------------------------------------------------------------------- 1 | # --- QMIX specific parameters --- 2 | 3 | # use epsilon greedy action selector 4 | action_selector: "epsilon_greedy" 5 | epsilon_start: 0.21 #1.0 6 | epsilon_finish: 0.2 #0.05 7 | epsilon_anneal_time: 1 #50000 8 | 9 | runner: "episode" 10 | 11 | buffer_size: 5000 12 | 13 | # update the target network every {} episodes 14 | target_update_interval: 200 15 | 16 | # use the Q_Learner to train 17 | agent_output_type: "q" 18 | learner: "qtran_learner" 19 | double_q: True 20 | mixer: "qtran_base" 21 | mixing_embed_dim: 64 22 | qtran_arch: "qtran_paper" 23 | 24 | opt_loss: 1 25 | nopt_min_loss: 0.1 26 | 27 | network_size: small 28 | 29 | name: "qtran" 30 | -------------------------------------------------------------------------------- /PyMARL/src/components/transforms.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | 3 | 4 | class Transform: 5 | def transform(self, tensor): 6 | raise NotImplementedError 7 | 8 | def infer_output_info(self, vshape_in, dtype_in): 9 | raise NotImplementedError 10 | 11 | 12 | class OneHot(Transform): 13 | def __init__(self, out_dim): 14 | self.out_dim = out_dim 15 | 16 | def transform(self, tensor): 17 | y_onehot = tensor.new(*tensor.shape[:-1], self.out_dim).zero_() 18 | y_onehot.scatter_(-1, tensor.long(), 1) 19 | return y_onehot.float() 20 | 21 | def infer_output_info(self, vshape_in, dtype_in): 22 | return (self.out_dim,), th.float32 -------------------------------------------------------------------------------- /PyMARL/src/modules/critics/__init__.py: -------------------------------------------------------------------------------- 1 | from .coma import COMACritic 2 | from .centralV import CentralVCritic 3 | from .coma_ns import COMACriticNS 4 | from .centralV_ns import CentralVCriticNS 5 | from .maddpg import MADDPGCritic 6 | from .maddpg_ns import MADDPGCriticNS 7 | from .ac import ACCritic 8 | from .ac_ns import ACCriticNS 9 | REGISTRY = {} 10 | 11 | REGISTRY["coma_critic"] = COMACritic 12 | REGISTRY["cv_critic"] = CentralVCritic 13 | REGISTRY["coma_critic_ns"] = COMACriticNS 14 | REGISTRY["cv_critic_ns"] = CentralVCriticNS 15 | REGISTRY["maddpg_critic"] = MADDPGCritic 16 | REGISTRY["maddpg_critic_ns"] = MADDPGCriticNS 17 | REGISTRY["ac_critic"] = ACCritic 18 | REGISTRY["ac_critic_ns"] = ACCriticNS -------------------------------------------------------------------------------- /PyMARL/src/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | # from smac.env import MultiAgentEnv, StarCraft2Env 3 | from .multiagentenv import MultiAgentEnv 4 | from .multicar_env import MultiCarEnv 5 | from .flagenv import FlagEnv 6 | from .freeenv import FreeEnv 7 | import sys 8 | import os 9 | 10 | def env_fn(env, **kwargs) -> MultiAgentEnv: 11 | return env(**kwargs) 12 | 13 | REGISTRY = {} 14 | # REGISTRY["sc2"] = partial(env_fn, env=StarCraft2Env) 15 | REGISTRY["multicar"] = partial(env_fn, env=MultiCarEnv) 16 | REGISTRY["flag"] = partial(env_fn, env=FlagEnv) 17 | REGISTRY["free"] = partial(env_fn, env=FreeEnv) 18 | 19 | # if sys.platform == "linux": 20 | # os.environ.setdefault("SC2PATH", 21 | # os.path.join(os.getcwd(), "3rdparty", "StarCraftII")) 22 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/breaking_barrier_init.yaml: -------------------------------------------------------------------------------- 1 | area_length: 5 2 | area_width: 5 3 | area_height: 2 4 | block_num: 8 5 | slope_num: 0 6 | smartcar_num: 2 7 | legged_robot_num: 2 8 | 9 | block: 10 | - {id: 0, x: 0, y: 1, z: 1} 11 | - {id: 1, x: 0, y: 3, z: 1} 12 | - {id: 2, x: 1, y: 2, z: 1} 13 | - {id: 3, x: 3, y: 0, z: 1} 14 | - {id: 4, x: 3, y: 1, z: 1} 15 | - {id: 5, x: 3, y: 2, z: 1} 16 | - {id: 6, x: 3, y: 3, z: 1} 17 | - {id: 7, x: 3, y: 4, z: 1} 18 | smartcar: 19 | - {id: 0, x: 0, y: 2, z: 1, yaw: 0} 20 | - {id: 1, x: 4, y: 3, z: 1, yaw: 0} 21 | flag: 22 | - {id: 0, x: 4, y: 1, z: 1} 23 | goal: 24 | - {id: 0, x: 0, y: 2, z: 1} 25 | legged_robot: 26 | - {id: 0, x: -1, y: 0, z: 0.335, yaw: 1} 27 | - {id: 1, x: -1, y: -3, z: 0.335, yaw: 0} 28 | -------------------------------------------------------------------------------- /CraftEnv/setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from setuptools import setup, find_packages 6 | 7 | setup( 8 | name="craftenv", 9 | version="0.1", 10 | description="The CraftEnv MARL environment for CRC", 11 | keywords="Robotics, Reinforcement Learning", 12 | package_dir={"": "src"}, 13 | packages=find_packages(where="src"), 14 | python_requires=">=3.5, <4", 15 | install_requires=[ 16 | "gym", 17 | "numpy", 18 | "scipy", 19 | "wheel", 20 | "pybullet", 21 | "absl-py", 22 | "mpi4py", 23 | "torch", 24 | "scipy", 25 | "cloudpickle", 26 | "pandas", 27 | "matplotlib" 28 | ], 29 | ) 30 | 31 | print(find_packages(where="src")) 32 | -------------------------------------------------------------------------------- /PyMARL/src/utils/rl_utils.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | 3 | 4 | def build_td_lambda_targets(rewards, terminated, mask, target_qs, n_agents, gamma, td_lambda): 5 | # Assumes in B*T*A and , , in (at least) B*T-1*1 6 | # Initialise last lambda -return for not terminated episodes 7 | ret = target_qs.new_zeros(*target_qs.shape) 8 | ret[:, -1] = target_qs[:, -1] * (1 - th.sum(terminated, dim=1)) 9 | # Backwards recursive update of the "forward view" 10 | for t in range(ret.shape[1] - 2, -1, -1): 11 | ret[:, t] = td_lambda * gamma * ret[:, t + 1] + mask[:, t] \ 12 | * (rewards[:, t] + (1 - td_lambda) * gamma * target_qs[:, t + 1] * (1 - terminated[:, t])) 13 | # Returns lambda-return from t=0 to t=T-1, i.e. in B*T-1*A 14 | return ret[:, 0:-1] 15 | 16 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/free_building_init.yaml: -------------------------------------------------------------------------------- 1 | area_length: 5 2 | area_width: 5 3 | area_height: 2 4 | block_num: 10 5 | slope_num: 0 6 | smartcar_num: 2 7 | legged_robot_num: 2 8 | 9 | block: 10 | - {id: 0, x: 0, y: 1, z: 1} 11 | - {id: 1, x: 0, y: 3, z: 1} 12 | - {id: 2, x: 1, y: 0, z: 1} 13 | - {id: 3, x: 1, y: 2, z: 1} 14 | - {id: 4, x: 1, y: 4, z: 1} 15 | - {id: 5, x: 3, y: 0, z: 1} 16 | - {id: 6, x: 3, y: 2, z: 1} 17 | - {id: 7, x: 3, y: 4, z: 1} 18 | - {id: 8, x: 4, y: 1, z: 1} 19 | - {id: 9, x: 4, y: 3, z: 1} 20 | smartcar: 21 | - {id: 0, x: 2, y: 1, z: 1, yaw: 0} 22 | - {id: 1, x: 2, y: 3, z: 1, yaw: 0} 23 | flag: 24 | - {id: 0, x: -1, y: -2, z: 1} 25 | goal: 26 | - {id: 0, x: -1, y: -2, z: 1} 27 | legged_robot: 28 | - {id: 0, x: -1, y: 0, z: 0.335, yaw: 1} 29 | - {id: 1, x: -1, y: -3, z: 0.335, yaw: 0} 30 | -------------------------------------------------------------------------------- /PyMARL/src/modules/agents/rnn_agent.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class RNNAgent(nn.Module): 6 | def __init__(self, input_shape, args): 7 | super(RNNAgent, self).__init__() 8 | self.args = args 9 | 10 | self.fc1 = nn.Linear(input_shape, args.rnn_hidden_dim) 11 | self.rnn = nn.GRUCell(args.rnn_hidden_dim, args.rnn_hidden_dim) 12 | self.fc2 = nn.Linear(args.rnn_hidden_dim, args.n_actions) 13 | 14 | def init_hidden(self): 15 | # make hidden states on same device as model 16 | return self.fc1.weight.new(1, self.args.rnn_hidden_dim).zero_() 17 | 18 | def forward(self, inputs, hidden_state): 19 | x = F.relu(self.fc1(inputs)) 20 | h_in = hidden_state.reshape(-1, self.args.rnn_hidden_dim) 21 | h = self.rnn(x, h_in) 22 | q = self.fc2(h) 23 | return q, h 24 | -------------------------------------------------------------------------------- /PyMARL/src/components/epsilon_schedules.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class DecayThenFlatSchedule(): 5 | 6 | def __init__(self, 7 | start, 8 | finish, 9 | time_length, 10 | decay="exp"): 11 | 12 | self.start = start 13 | self.finish = finish 14 | self.time_length = time_length 15 | self.delta = (self.start - self.finish) / self.time_length 16 | self.decay = decay 17 | 18 | if self.decay in ["exp"]: 19 | self.exp_scaling = (-1) * self.time_length / np.log(self.finish) if self.finish > 0 else 1 20 | 21 | def eval(self, T): 22 | if self.decay in ["linear"]: 23 | return max(self.finish, self.start - self.delta * T) 24 | elif self.decay in ["exp"]: 25 | return min(self.start, max(self.finish, np.exp(- T / self.exp_scaling))) 26 | pass 27 | -------------------------------------------------------------------------------- /PyMARL/src/config/algs/coma.yaml: -------------------------------------------------------------------------------- 1 | # --- COMA specific parameters --- 2 | 3 | action_selector: "multinomial" 4 | epsilon_start: 0.21 #.5 5 | epsilon_finish: 0.2 # .01 6 | epsilon_anneal_time: 1 # 100000 7 | mask_before_softmax: False 8 | 9 | runner: "episode" 10 | 11 | buffer_size: 5000 # 8 # size of the replay buffer 12 | # batch_size_run: 1 # number of environments to run in parallel 13 | # batch_size: 32 # 8 # batch size 14 | 15 | env_args: 16 | state_last_action: False # critic adds last action internally 17 | 18 | # update the target network every {} training steps 19 | target_update_interval: 200 20 | 21 | # lr: 0.0005 22 | # critic_lr: 0.0005 23 | td_lambda: 0.8 24 | 25 | # use COMA 26 | agent_output_type: "pi_logits" 27 | learner: "coma_learner" 28 | critic_q_fn: "coma" 29 | critic_baseline_fn: "coma" 30 | critic_train_mode: "seq" 31 | critic_train_reps: 1 32 | q_nstep: 0 # 0 corresponds to default Q, 1 is r + gamma*Q, etc 33 | 34 | name: "coma" 35 | -------------------------------------------------------------------------------- /PyMARL/src/config/envs/sc2_beta.yaml: -------------------------------------------------------------------------------- 1 | env: sc2 2 | 3 | env_args: 4 | continuing_episode: False 5 | difficulty: "7" 6 | game_version: null 7 | map_name: "3m" 8 | move_amount: 2 9 | obs_all_health: True 10 | obs_instead_of_state: False 11 | obs_last_action: False 12 | obs_own_health: True 13 | obs_pathing_grid: False 14 | obs_terrain_height: False 15 | obs_timestep_number: False 16 | reward_death_value: 10 17 | reward_defeat: 0 18 | reward_negative_scale: 0.5 19 | reward_only_positive: True 20 | reward_scale: True 21 | reward_scale_rate: 20 22 | reward_sparse: False 23 | reward_win: 200 24 | replay_dir: "" 25 | replay_prefix: "" 26 | state_last_action: True 27 | state_timestep_number: False 28 | step_mul: 8 29 | seed: null 30 | heuristic_ai: False 31 | debug: False 32 | 33 | learner_log_interval: 20000 34 | log_interval: 20000 35 | runner_log_interval: 20000 36 | t_max: 10050000 37 | test_interval: 20000 38 | test_nepisode: 24 39 | test_greedy: True 40 | -------------------------------------------------------------------------------- /PyMARL/src/config/envs/sc2.yaml: -------------------------------------------------------------------------------- 1 | env: sc2 2 | 3 | env_args: 4 | continuing_episode: False 5 | difficulty: "7" 6 | game_version: null 7 | map_name: "3m" 8 | move_amount: 2 9 | obs_all_health: True 10 | obs_instead_of_state: False 11 | obs_last_action: False 12 | obs_own_health: True 13 | obs_pathing_grid: False 14 | obs_terrain_height: False 15 | obs_timestep_number: False 16 | reward_death_value: 10 17 | reward_defeat: 0 18 | reward_negative_scale: 0.5 19 | reward_only_positive: True 20 | reward_scale: True 21 | reward_scale_rate: 20 22 | reward_sparse: False 23 | reward_win: 200 24 | replay_dir: "" 25 | replay_prefix: "" 26 | state_last_action: True 27 | state_timestep_number: False 28 | step_mul: 8 29 | seed: null 30 | heuristic_ai: False 31 | heuristic_rest: False 32 | debug: False 33 | 34 | test_greedy: True 35 | test_nepisode: 32 36 | test_interval: 10000 37 | log_interval: 10000 38 | runner_log_interval: 10000 39 | learner_log_interval: 10000 40 | t_max: 2050000 41 | -------------------------------------------------------------------------------- /PyMARL/src/config/algs/mappo.yaml: -------------------------------------------------------------------------------- 1 | # --- MAPPO specific parameters --- 2 | 3 | # action_selector: "soft_policies" 4 | # mask_before_softmax: True 5 | action_selector: "epsilon_greedy" 6 | epsilon_start: 0.21 # 1 7 | epsilon_finish: 0.2 # 0/05 8 | epsilon_anneal_time: 1 # 500000 9 | 10 | runner: "episode" 11 | 12 | buffer_size: 5000 # 10 13 | # batch_size_run: 1 # 10 14 | # batch_size: 32 # 8 # 10 15 | 16 | env_args: 17 | state_last_action: False # critic adds last action internally 18 | 19 | # update the target network every {} training steps 20 | target_update_interval_or_tau: 200 21 | 22 | lr: 0.0005 23 | 24 | obs_agent_id: True 25 | obs_last_action: False 26 | obs_individual_obs: False 27 | 28 | agent_output_type: "pi_logits" 29 | learner: "ppo_learner" 30 | entropy_coef: 0.01 31 | use_rnn: False 32 | standardise_returns: True 33 | standardise_rewards: False 34 | q_nstep: 5 # 1 corresponds to normal r + gammaV 35 | critic_type: "cv_critic" 36 | epochs: 4 37 | eps_clip: 0.2 38 | name: "mappo" 39 | 40 | # t_max: 20050000 41 | hidden_dim: 64 42 | add_value_last_step: True -------------------------------------------------------------------------------- /CraftEnv/src/craft/bullet_goal.py: -------------------------------------------------------------------------------- 1 | import os 2 | from craft import get_urdf_path 3 | 4 | 5 | class BulletGoal: 6 | def __init__(self, bullet_client): 7 | self._bullet_client = bullet_client 8 | self.init_pose = [0.0, 0.0, 1.0] 9 | self.init_quat = [0.0, 0.0, 0.0, 1.0] 10 | self._init_model(self.init_pose, self.init_quat) 11 | 12 | def _init_model(self, init_pose, init_quat): 13 | robot_path = os.path.join(get_urdf_path(), "goal/block.urdf") 14 | self.robot_id = self._bullet_client.loadURDF( 15 | robot_path, 16 | init_pose, 17 | init_quat, 18 | flags=(self._bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES), 19 | useFixedBase=True, 20 | ) 21 | for i in range(-1, self._bullet_client.getNumJoints(self.robot_id)): 22 | self._bullet_client.setCollisionFilterGroupMask( 23 | self.robot_id, i, collisionFilterGroup=0, collisionFilterMask=0 24 | ) 25 | self._bullet_client.changeVisualShape(self.robot_id, -1, rgbaColor=[0, 1, 0, 1]) 26 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (C) 2023 THL A29 Limited 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /PyMARL/src/config/envs/multicar.yaml: -------------------------------------------------------------------------------- 1 | env: multicar 2 | 3 | env_args: 4 | arena_id: 'Craft-v0' 5 | init_blueprint_path: '/home/xliu/craft/CraftEnv/src/craft/blueprint/strip_shaped_init.yaml' 6 | design_path: '/home/xliu/craft/CraftEnv/src/craft/blueprint/strip_shaped_goal.yaml' 7 | reward_cnt: 1 8 | act_lift: 0 9 | act_drop: 0 10 | act_fold: 0 11 | act_unfo: 0 12 | lift_block: 0 13 | lift_slope: 0 14 | second_floor: 0 15 | third_floor: 0 16 | lift_flag: 0 17 | reach_goal: 0 18 | max_steps: 18 # 8 # 18 # 48 # 28 19 | step_penalty: 0 20 | block_near_unfold: 1 21 | building_complexity: 1 22 | complexity_1: 1 23 | complexity_2: 5 24 | complexity_3: 50 25 | complexity_4: 10 26 | complexity_5: 25 27 | complexity_6: 50 28 | building_complexity_max: 10000 29 | second_floor_block: 0 30 | reachable_space: 0 31 | block_on_block: 1 32 | fold_on_block: 5 33 | unfold_on_block: 10 34 | block_unfold_on_block: 25 35 | block_on_block_on_block: 50 36 | enable_local_obs: False 37 | local_max_free_num: 10 38 | local_max_block_num: 10 39 | local_max_slope_num: 10 40 | search_depth: 20 -------------------------------------------------------------------------------- /CraftEnv/src/craft/bullet_flag.py: -------------------------------------------------------------------------------- 1 | import os 2 | from craft import get_urdf_path 3 | 4 | 5 | class BulletFlag: 6 | def __init__(self, bullet_client): 7 | self._bullet_client = bullet_client 8 | self.init_pose = [0.0, 0.0, 1.0] 9 | self.init_quat = [0.0, 0.0, 0.0, 1.0] 10 | self._init_model(self.init_pose, self.init_quat) 11 | 12 | def _init_model(self, init_pose, init_quat): 13 | robot_path = os.path.join(get_urdf_path(), "flag/block.urdf") 14 | self.robot_id = self._bullet_client.loadURDF( 15 | robot_path, 16 | init_pose, 17 | init_quat, 18 | flags=(self._bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES), 19 | useFixedBase=True, 20 | ) 21 | 22 | for i in range(self._bullet_client.getNumJoints(self.robot_id)): 23 | self._bullet_client.changeVisualShape( 24 | self.robot_id, i, rgbaColor=[1, 0, 0, 1] 25 | ) 26 | 27 | self._bullet_client.setCollisionFilterGroupMask( 28 | self.robot_id, -1, collisionFilterGroup=3, collisionFilterMask=3 29 | ) 30 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/bullet_smartcar.py: -------------------------------------------------------------------------------- 1 | import os 2 | from craft import get_urdf_path 3 | 4 | 5 | class BulletSmartcar: 6 | def __init__(self, bullet_client): 7 | self._bullet_client = bullet_client 8 | self.init_pose = [0.0, 0.0, 1.0] 9 | self.init_quat = [0.0, 0.0, 0.0, 1.0] 10 | self._init_model(self.init_pose, self.init_quat) 11 | 12 | def _init_model(self, init_pose, init_quat): 13 | self.robot_id = self._bullet_client.loadURDF( 14 | os.path.join(get_urdf_path(), "smartcar/smartcar.urdf"), 15 | flags=(self._bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES), 16 | globalScaling=1, 17 | ) 18 | for i in range(-1, self._bullet_client.getNumJoints(self.robot_id)): 19 | self._bullet_client.setCollisionFilterGroupMask( 20 | self.robot_id, i, collisionFilterGroup=3, collisionFilterMask=3 21 | ) 22 | self._bullet_client.changeVisualShape(self.robot_id, 0, rgbaColor=[211 / 255, 211 / 255, 211 / 255, 1]) 23 | self._bullet_client.changeVisualShape(self.robot_id, 1, rgbaColor=[211 / 255, 211 / 255, 211 / 255, 1]) 24 | -------------------------------------------------------------------------------- /PyMARL/src/config/envs/free.yaml: -------------------------------------------------------------------------------- 1 | env: free 2 | 3 | env_args: 4 | arena_id: 'Craft-v0' 5 | init_blueprint_path: '/home/droid/Downloads/CraftEnv/CraftEnv/src/craftenv/sim_envs/pybullet_envs/craft/blueprint/free_building_init.yaml' 6 | design_path: '/home/droid/Downloads/CraftEnv/CraftEnv/src/craftenv/sim_envs/pybullet_envs/craft/blueprint/free_building_goal.yaml' 7 | reward_cnt: 1 8 | act_lift: 0 9 | act_drop: 0 10 | act_fold: 0 11 | act_unfo: 0 12 | lift_block: 0 13 | lift_slope: 0 14 | second_floor: 0 15 | third_floor: 0 16 | lift_flag: 0 17 | reach_goal: 0 18 | max_steps: 18 # 8 # 18 # 48 # 28 19 | step_penalty: 0 20 | block_near_unfold: 1 21 | building_complexity: 1 22 | complexity_1: 1 23 | complexity_2: 5 24 | complexity_3: 50 25 | complexity_4: 10 26 | complexity_5: 25 27 | complexity_6: 50 28 | building_complexity_max: 10000 29 | second_floor_block: 0 30 | reachable_space: 0 31 | block_on_block: 1 32 | fold_on_block: 5 33 | unfold_on_block: 10 34 | block_unfold_on_block: 25 35 | block_on_block_on_block: 50 36 | enable_local_obs: False 37 | local_max_free_num: 10 38 | local_max_block_num: 10 39 | local_max_slope_num: 10 40 | search_depth: 20 -------------------------------------------------------------------------------- /PyMARL/src/config/envs/flag.yaml: -------------------------------------------------------------------------------- 1 | env: flag 2 | 3 | env_args: 4 | arena_id: 'Craft-v0' 5 | init_blueprint_path: '/home/droid/Downloads/CraftEnv/CraftEnv/src/craftenv/sim_envs/pybullet_envs/craft/blueprint/breaking_barrier_init.yaml' 6 | design_path: '/home/droid/Downloads/CraftEnv/CraftEnv/src/craftenv/sim_envs/pybullet_envs/craft/blueprint/breaking_barrier_goal.yaml' 7 | reward_cnt: 1 8 | act_lift: 0 9 | act_drop: 0 10 | act_fold: 0 11 | act_unfo: 0 12 | lift_block: 0 13 | lift_slope: 0 14 | second_floor: 0 15 | third_floor: 0 16 | lift_flag: 0 17 | reach_goal: 0 18 | max_steps: 18 # 8 # 18 # 48 # 28 19 | step_penalty: 0 20 | block_near_unfold: 1 21 | building_complexity: 1 22 | complexity_1: 1 23 | complexity_2: 5 24 | complexity_3: 50 25 | complexity_4: 10 26 | complexity_5: 25 27 | complexity_6: 50 28 | building_complexity_max: 10000 29 | second_floor_block: 0 30 | reachable_space: 0 31 | block_on_block: 1 32 | fold_on_block: 5 33 | unfold_on_block: 10 34 | block_unfold_on_block: 25 35 | block_on_block_on_block: 50 36 | enable_local_obs: False 37 | local_max_free_num: 10 38 | local_max_block_num: 10 39 | local_max_slope_num: 10 40 | search_depth: 20 -------------------------------------------------------------------------------- /PyMARL/src/modules/agents/mlp_agent.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class MLPAgent(nn.Module): 6 | def __init__(self, input_shape, args): 7 | super(MLPAgent, self).__init__() 8 | self.args = args 9 | # self.fc1 = nn.Linear(input_shape, args.rnn_hidden_dim) 10 | # self.rnn = nn.GRUCell(args.rnn_hidden_dim, args.rnn_hidden_dim) 11 | # self.fc2 = nn.Linear(args.rnn_hidden_dim, args.n_actions) 12 | self.fc1 = nn.Linear(input_shape, 256) 13 | self.relu1 = nn.ReLU() 14 | self.fc2 = nn.Linear(256, 256) 15 | self.relu2 = nn.ReLU() 16 | self.fc3 = nn.Linear(256, 64) 17 | self.relu3 = nn.ReLU() 18 | self.fc4 = nn.Linear(64, 64) 19 | self.relu4 = nn.ReLU() 20 | self.fc5 = nn.Linear(64, args.n_actions) 21 | 22 | def init_hidden(self): 23 | # make hidden states on same device as model 24 | return self.fc1.weight.new(1, self.args.rnn_hidden_dim).zero_() 25 | 26 | def forward(self, inputs, hidden_state): 27 | x = self.relu1(self.fc1(inputs)) 28 | x = self.relu2(self.fc2(x)) 29 | x = self.relu3(self.fc3(x)) 30 | x = self.relu4(self.fc4(x)) 31 | x = self.fc5(x) 32 | h = hidden_state.reshape(-1, self.args.rnn_hidden_dim) 33 | return x, h -------------------------------------------------------------------------------- /CraftEnv/src/craft/bullet_slope.py: -------------------------------------------------------------------------------- 1 | import os 2 | from craft import get_urdf_path 3 | 4 | 5 | class BulletSlope: 6 | def __init__(self, bullet_client): 7 | self._bullet_client = bullet_client 8 | self.init_pose = [0.0, 0.0, 1.0] 9 | self.init_quat = [0.0, 0.0, 0.0, 1.0] 10 | self._init_model(self.init_pose, self.init_quat) 11 | 12 | def _init_model(self, init_pose, init_quat): 13 | robot_path = os.path.join(get_urdf_path(), "slope/slope.urdf.xacro") 14 | self.robot_id = self._bullet_client.loadURDF( 15 | robot_path, 16 | init_pose, 17 | init_quat, 18 | flags=(self._bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES), 19 | useFixedBase=True, 20 | ) 21 | for i in range(-1, self._bullet_client.getNumJoints(self.robot_id)): 22 | self._bullet_client.setCollisionFilterGroupMask( 23 | self.robot_id, i, collisionFilterGroup=12, collisionFilterMask=1 24 | ) 25 | self.fold() 26 | 27 | def fold(self): 28 | self._bullet_client.resetJointState( 29 | bodyUniqueId=self.robot_id, 30 | jointIndex=1, 31 | targetValue=-0.02, 32 | targetVelocity=0, 33 | ) 34 | 35 | def unfold(self): 36 | self._bullet_client.resetJointState( 37 | bodyUniqueId=self.robot_id, 38 | jointIndex=1, 39 | targetValue=-3.12, 40 | targetVelocity=0, 41 | ) 42 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/goal/block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /PyMARL/src/modules/critics/maddpg.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class MADDPGCritic(nn.Module): 7 | def __init__(self, scheme, args): 8 | super(MADDPGCritic, self).__init__() 9 | self.args = args 10 | self.n_actions = args.n_actions 11 | self.n_agents = args.n_agents 12 | self.input_shape = self._get_input_shape(scheme) + self.n_actions * self.n_agents 13 | if self.args.obs_last_action: 14 | self.input_shape += self.n_actions 15 | self.output_type = "q" 16 | 17 | # Set up network layers 18 | self.fc1 = nn.Linear(self.input_shape, args.hidden_dim) 19 | self.fc2 = nn.Linear(args.hidden_dim, args.hidden_dim) 20 | self.fc3 = nn.Linear(args.hidden_dim, 1) 21 | 22 | def forward(self, inputs, actions): 23 | inputs = th.cat((inputs, actions), dim=-1) 24 | x = F.relu(self.fc1(inputs)) 25 | x = F.relu(self.fc2(x)) 26 | q = self.fc3(x) 27 | return q 28 | 29 | def _get_input_shape(self, scheme): 30 | # state 31 | input_shape = scheme["state"]["vshape"] 32 | # print(scheme["state"]["vshape"], scheme["obs"]["vshape"], self.n_agents, scheme["actions_one"]) 33 | # whether to add the individual observation 34 | if self.args.obs_individual_obs: 35 | input_shape += scheme["obs"]["vshape"] 36 | # agent id 37 | if self.args.obs_agent_id: 38 | input_shape += self.n_agents 39 | return input_shape -------------------------------------------------------------------------------- /CraftEnv/src/create_pybullet_envs.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.spaces import Tuple as GymTuple 3 | 4 | from craftenv.sim_envs.pybullet_envs.craft.craft_env import CraftEnv 5 | 6 | 7 | class SingleAgentWrapper(gym.Wrapper): 8 | def __init__(self, env): 9 | super(SingleAgentWrapper, self).__init__(env) 10 | 11 | self.observation_space = GymTuple([env.observation_space]) 12 | self.action_space = GymTuple([env.action_space]) 13 | 14 | def reset(self, **kwargs): 15 | obs = self.env.reset() 16 | return (obs,) 17 | 18 | def step(self, action): 19 | obs, rwd, done, info = super(SingleAgentWrapper, self).step(action[0]) 20 | if "post_process_data" in info: 21 | info["post_process_data"] = (info["post_process_data"],) 22 | return (obs,), (rwd,), done, info 23 | 24 | 25 | def create_pybullet_env(**env_config): 26 | arena_id = env_config["arena_id"] 27 | assert arena_id in [ 28 | "Craft-v0" 29 | ] 30 | enable_render = env_config["render"] if "render" in env_config else False 31 | 32 | enable_render = env_config["render"] if "render" in env_config else False 33 | 34 | def create_single_env(): 35 | if arena_id in ["Craft-v0"]: 36 | init_blueprint_path = list( 37 | env_config["init_blueprint_path"].split(",")) 38 | env0 = CraftEnv(enable_render, init_blueprint_path, env_config) 39 | else: 40 | raise NotImplementedError 41 | env0 = SingleAgentWrapper(env0) 42 | return env0 43 | env = create_single_env() 44 | return env 45 | -------------------------------------------------------------------------------- /PyMARL/src/components/standarize_stream.py: -------------------------------------------------------------------------------- 1 | """ 2 | Taken from: https://github.com/semitable/fast-marl 3 | """ 4 | 5 | import torch 6 | from typing import Tuple 7 | 8 | 9 | class RunningMeanStd(object): 10 | def __init__(self, epsilon: float = 1e-4, shape: Tuple[int, ...] = (), device="cpu"): 11 | """ 12 | https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm 13 | """ 14 | self.mean = torch.zeros(shape, dtype=torch.float32, device=device) 15 | self.var = torch.ones(shape, dtype=torch.float32, device=device) 16 | self.count = epsilon 17 | 18 | def update(self, arr): 19 | arr = arr.reshape(-1, arr.size(-1)) 20 | batch_mean = torch.mean(arr, dim=0) 21 | batch_var = torch.var(arr, dim=0) 22 | batch_count = arr.shape[0] 23 | self.update_from_moments(batch_mean, batch_var, batch_count) 24 | 25 | def update_from_moments(self, batch_mean, batch_var, batch_count: int): 26 | delta = batch_mean - self.mean 27 | tot_count = self.count + batch_count 28 | 29 | new_mean = self.mean + delta * batch_count / tot_count 30 | m_a = self.var * self.count 31 | m_b = batch_var * batch_count 32 | m_2 = ( 33 | m_a 34 | + m_b 35 | + torch.square(delta) 36 | * self.count 37 | * batch_count 38 | / (self.count + batch_count) 39 | ) 40 | new_var = m_2 / (self.count + batch_count) 41 | 42 | new_count = batch_count + self.count 43 | 44 | self.mean = new_mean 45 | self.var = new_var 46 | self.count = new_count -------------------------------------------------------------------------------- /PyMARL/src/modules/critics/ac.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ACCritic(nn.Module): 7 | def __init__(self, scheme, args): 8 | super(ACCritic, self).__init__() 9 | 10 | self.args = args 11 | self.n_actions = args.n_actions 12 | self.n_agents = args.n_agents 13 | 14 | input_shape = self._get_input_shape(scheme) 15 | self.output_type = "v" 16 | 17 | # Set up network layers 18 | self.fc1 = nn.Linear(input_shape, args.hidden_dim) 19 | self.fc2 = nn.Linear(args.hidden_dim, args.hidden_dim) 20 | self.fc3 = nn.Linear(args.hidden_dim, 1) 21 | 22 | def forward(self, batch, t=None): 23 | inputs, bs, max_t = self._build_inputs(batch, t=t) 24 | x = F.relu(self.fc1(inputs)) 25 | x = F.relu(self.fc2(x)) 26 | q = self.fc3(x) 27 | return q 28 | 29 | def _build_inputs(self, batch, t=None): 30 | bs = batch.batch_size 31 | max_t = batch.max_seq_length if t is None else 1 32 | ts = slice(None) if t is None else slice(t, t+1) 33 | inputs = [] 34 | # observations 35 | inputs.append(batch["obs"][:, ts]) 36 | 37 | inputs.append(th.eye(self.n_agents, device=batch.device).unsqueeze(0).unsqueeze(0).expand(bs, max_t, -1, -1)) 38 | 39 | inputs = th.cat(inputs, dim=-1) 40 | return inputs, bs, max_t 41 | 42 | def _get_input_shape(self, scheme): 43 | # observations 44 | input_shape = scheme["obs"]["vshape"] 45 | # agent id 46 | input_shape += self.n_agents 47 | return input_shape 48 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/bullet_wall.py: -------------------------------------------------------------------------------- 1 | import os 2 | from craft import get_urdf_path 3 | 4 | 5 | class BulletWalls: 6 | """ 7 | pybullet API createMultiBody 8 | """ 9 | 10 | def __init__(self, bullet_client, blackboard): 11 | self._bullet_client = bullet_client 12 | self._blackboard = blackboard 13 | self.init_pose = [0.0, 0.0, 1.0] 14 | self.init_quat = [0.0, 0.0, 0.0, 1.0] 15 | self.num = self._blackboard.wall_num 16 | 17 | self._init_model(self.num) 18 | 19 | def _init_model(self, num): 20 | visual_file_name = os.path.join(get_urdf_path(), "wall/meshes/base_link.STL") 21 | visual_shape = self._bullet_client.createVisualShape( 22 | shapeType=self._bullet_client.GEOM_MESH, 23 | fileName=visual_file_name, 24 | rgbaColor=[211 / 255, 211 / 255, 211 / 255, 0.1], 25 | ) 26 | collision_shape = self._bullet_client.createCollisionShape( 27 | shapeType=self._bullet_client.GEOM_BOX, 28 | halfExtents=[ 29 | self._blackboard.BLOCK_LENGTH / 2, 30 | self._blackboard.BLOCK_LENGTH / 2, 31 | self._blackboard.BLOCK_HEIGHT / 2, 32 | ], 33 | ) 34 | 35 | position = [[0, 0, 0] for _ in range(num)] 36 | self.ids = self._bullet_client.createMultiBody( 37 | baseCollisionShapeIndex=collision_shape, 38 | baseVisualShapeIndex=visual_shape, 39 | batchPositions=position, 40 | ) 41 | 42 | for id_ in self.ids: 43 | self._bullet_client.setCollisionFilterGroupMask( 44 | id_, -1, collisionFilterGroup=3, collisionFilterMask=3 45 | ) 46 | -------------------------------------------------------------------------------- /PyMARL/src/utils/timehelper.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | 4 | 5 | def print_time(start_time, T, t_max, episode, episode_rewards): 6 | time_elapsed = time.time() - start_time 7 | T = max(1, T) 8 | time_left = time_elapsed * (t_max - T) / T 9 | # Just in case its over 100 days 10 | time_left = min(time_left, 60 * 60 * 24 * 100) 11 | last_reward = "N\A" 12 | if len(episode_rewards) > 5: 13 | last_reward = "{:.2f}".format(np.mean(episode_rewards[-50:])) 14 | print("\033[F\033[F\x1b[KEp: {:,}, T: {:,}/{:,}, Reward: {}, \n\x1b[KElapsed: {}, Left: {}\n".format(episode, T, t_max, last_reward, time_str(time_elapsed), time_str(time_left)), " " * 10, end="\r") 15 | 16 | 17 | def time_left(start_time, t_start, t_current, t_max): 18 | if t_current >= t_max: 19 | return "-" 20 | time_elapsed = time.time() - start_time 21 | t_current = max(1, t_current) 22 | time_left = time_elapsed * (t_max - t_current) / (t_current - t_start) 23 | # Just in case its over 100 days 24 | time_left = min(time_left, 60 * 60 * 24 * 100) 25 | return time_str(time_left) 26 | 27 | 28 | def time_str(s): 29 | """ 30 | Convert seconds to a nicer string showing days, hours, minutes and seconds 31 | """ 32 | days, remainder = divmod(s, 60 * 60 * 24) 33 | hours, remainder = divmod(remainder, 60 * 60) 34 | minutes, seconds = divmod(remainder, 60) 35 | string = "" 36 | if days > 0: 37 | string += "{:d} days, ".format(int(days)) 38 | if hours > 0: 39 | string += "{:d} hours, ".format(int(hours)) 40 | if minutes > 0: 41 | string += "{:d} minutes, ".format(int(minutes)) 42 | string += "{:d} seconds".format(int(seconds)) 43 | return string 44 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/action_enum.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum, auto, unique 2 | import numpy as np 3 | 4 | 5 | ACTION_ARG = [ 6 | None, 7 | None, 8 | None, 9 | None, 10 | 1, # TURN_LEFT 11 | -1, # TURN_RIGHT 12 | # (x, y) 13 | np.array((0, 1)), # MOVE_FORWARD 14 | np.array((0, -1)), # MOVE_BACK 15 | np.array((-1, 0)), # MOVE_LEFT 16 | np.array((1, 0)), # MOVE_RIGHT 17 | None, # STOP 18 | np.array((-1, 1)), # MOVE_FORWARD_LEFT 19 | np.array((1, 1)), # MOVE_FORWARD_RIGHT 20 | np.array((-1, -1)), # MOVE_BACK_LEFT 21 | np.array((1, -1)), # MOVE_BACK_RIGHT 22 | ] 23 | 24 | 25 | @unique 26 | class ActionEnum(IntEnum): 27 | """ 28 | ^ y 29 | | 30 | | 31 | | 32 | o---------> x 33 | world coordinate 34 | 35 | smartcar action enum 36 | 37 | MOVE_FORWARD: ^ 38 | | 39 | 40 | MOVE_BACK: | 41 | v 42 | 43 | MOVE_LEFT: <-- 44 | 45 | MOVE_RIGHT: --> 46 | 47 | TURN_LEFT <-- 48 | | 49 | 50 | TURN_RIGHT --> 51 | | 52 | 53 | LIFT 54 | 55 | DROP 56 | 57 | FOLD 58 | 59 | UNFOLD 60 | 61 | STOP 62 | """ 63 | 64 | LIFT = 0 # 0 65 | DROP = auto() # 1 66 | FOLD = auto() # 2 67 | UNFOLD = auto() # 3 68 | ROTATE_LEFT = auto() # 4 69 | ROTATE_RIGHT = auto() # 5 70 | MOVE_FORWARD = auto() # 6 71 | MOVE_BACK = auto() # 7 72 | MOVE_LEFT = auto() # 8 73 | MOVE_RIGHT = auto() # 9 74 | STOP = auto() # 10 75 | MOVE_FORWARD_LEFT = auto() # 11 76 | MOVE_FORWARD_RIGHT = auto() # 12 77 | MOVE_BACK_LEFT = auto() # 13 78 | MOVE_BACK_RIGHT = auto() # 14 79 | -------------------------------------------------------------------------------- /PyMARL/src/modules/critics/maddpg_ns.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from modules.critics.mlp import MLP 5 | 6 | 7 | class MADDPGCriticNS(nn.Module): 8 | def __init__(self, scheme, args): 9 | super(MADDPGCriticNS, self).__init__() 10 | self.args = args 11 | self.n_actions = args.n_actions 12 | self.n_agents = args.n_agents 13 | self.input_shape = self._get_input_shape(scheme) + self.n_actions * self.n_agents 14 | if self.args.obs_last_action: 15 | self.input_shape += self.n_actions 16 | self.output_type = "q" 17 | self.critics = [MLP(self.input_shape, self.args.hidden_dim, 1) for _ in range(self.n_agents)] 18 | 19 | def forward(self, inputs, actions): 20 | inputs = th.cat((inputs, actions), dim=-1) 21 | qs = [] 22 | for i in range(self.n_agents): 23 | q = self.critics[i](inputs[:, :, i]).unsqueeze(2) 24 | qs.append(q) 25 | return th.cat(qs, dim=2) 26 | 27 | def _get_input_shape(self, scheme): 28 | # state 29 | input_shape = scheme["state"]["vshape"] 30 | # observation 31 | if self.args.obs_individual_obs: 32 | input_shape += scheme["obs"]["vshape"] 33 | return input_shape 34 | 35 | def parameters(self): 36 | params = list(self.critics[0].parameters()) 37 | for i in range(1, self.n_agents): 38 | params += list(self.critics[i].parameters()) 39 | return params 40 | 41 | def state_dict(self): 42 | return [a.state_dict() for a in self.critics] 43 | 44 | def load_state_dict(self, state_dict): 45 | for i, c in enumerate(self.critics): 46 | c.load_state_dict(state_dict[i]) 47 | 48 | def cuda(self): 49 | for c in self.critics: 50 | c.cuda() 51 | -------------------------------------------------------------------------------- /PyMARL/src/modules/critics/ac_ns.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from modules.critics.mlp import MLP 5 | 6 | 7 | class ACCriticNS(nn.Module): 8 | def __init__(self, scheme, args): 9 | super(ACCriticNS, self).__init__() 10 | 11 | self.args = args 12 | self.n_actions = args.n_actions 13 | self.n_agents = args.n_agents 14 | 15 | input_shape = self._get_input_shape(scheme) 16 | self.output_type = "v" 17 | 18 | # Set up network layers 19 | self.critics = [MLP(input_shape, args.hidden_dim, 1) for _ in range(self.n_agents)] 20 | 21 | def forward(self, batch, t=None): 22 | inputs, bs, max_t = self._build_inputs(batch, t=t) 23 | qs = [] 24 | for i in range(self.n_agents): 25 | q = self.critics[i](inputs[:, :, i]) 26 | qs.append(q.view(bs, max_t, 1, -1)) 27 | q = th.cat(qs, dim=2) 28 | return q 29 | 30 | def _build_inputs(self, batch, t=None): 31 | bs = batch.batch_size 32 | max_t = batch.max_seq_length if t is None else 1 33 | ts = slice(None) if t is None else slice(t, t+1) 34 | inputs = batch["obs"][:, ts] 35 | return inputs, bs, max_t 36 | 37 | def _get_input_shape(self, scheme): 38 | # observations 39 | input_shape = scheme["obs"]["vshape"] 40 | return input_shape 41 | 42 | def parameters(self): 43 | params = list(self.critics[0].parameters()) 44 | for i in range(1, self.n_agents): 45 | params += list(self.critics[i].parameters()) 46 | return params 47 | 48 | def state_dict(self): 49 | return [a.state_dict() for a in self.critics] 50 | 51 | def load_state_dict(self, state_dict): 52 | for i, a in enumerate(self.critics): 53 | a.load_state_dict(state_dict[i]) 54 | 55 | def cuda(self): 56 | for c in self.critics: 57 | c.cuda() -------------------------------------------------------------------------------- /PyMARL/src/envs/multiagentenv.py: -------------------------------------------------------------------------------- 1 | class MultiAgentEnv(object): 2 | 3 | def step(self, actions): 4 | """ Returns reward, terminated, info """ 5 | raise NotImplementedError 6 | 7 | def get_obs(self): 8 | """ Returns all agent observations in a list """ 9 | raise NotImplementedError 10 | 11 | def get_obs_agent(self, agent_id): 12 | """ Returns observation for agent_id """ 13 | raise NotImplementedError 14 | 15 | def get_obs_size(self): 16 | """ Returns the shape of the observation """ 17 | raise NotImplementedError 18 | 19 | def get_state(self): 20 | raise NotImplementedError 21 | 22 | def get_state_size(self): 23 | """ Returns the shape of the state""" 24 | raise NotImplementedError 25 | 26 | def get_avail_actions(self): 27 | raise NotImplementedError 28 | 29 | def get_avail_agent_actions(self, agent_id): 30 | """ Returns the available actions for agent_id """ 31 | raise NotImplementedError 32 | 33 | def get_total_actions(self): 34 | """ Returns the total number of actions an agent could ever take """ 35 | # TODO: This is only suitable for a discrete 1 dimensional action space for each agent 36 | raise NotImplementedError 37 | 38 | def reset(self): 39 | """ Returns initial observations and states""" 40 | raise NotImplementedError 41 | 42 | def render(self): 43 | raise NotImplementedError 44 | 45 | def close(self): 46 | raise NotImplementedError 47 | 48 | def seed(self): 49 | raise NotImplementedError 50 | 51 | def save_replay(self): 52 | raise NotImplementedError 53 | 54 | def get_env_info(self): 55 | env_info = {"state_shape": self.get_state_size(), 56 | "obs_shape": self.get_obs_size(), 57 | "n_actions": self.get_total_actions(), 58 | "n_agents": self.n_agents, 59 | "episode_limit": self.episode_limit} 60 | return env_info 61 | -------------------------------------------------------------------------------- /PyMARL/src/config/default.yaml: -------------------------------------------------------------------------------- 1 | # --- Defaults --- 2 | 3 | # --- pymarl options --- 4 | runner: "episode" # Runs 1 env for an episode 5 | mac: "basic_mac" # Basic controller 6 | env: "sc2" # Environment name 7 | env_args: {} # Arguments for the environment 8 | batch_size_run: 1 # Number of environments to run in parallel 9 | test_nepisode: 20 # Number of episodes to test for 10 | test_interval: 10000 # 2000 # Test after {} timesteps have passed 11 | test_greedy: True # Use greedy evaluation (if False, will set epsilon floor to 0 12 | log_interval: 2000 # Log summary of stats after every {} timesteps 13 | runner_log_interval: 2000 # Log runner stats (not test stats) every {} timesteps 14 | learner_log_interval: 2000 # Log training stats every {} timesteps 15 | t_max: 1000500 # 1000500 # 2000000 # Stop running after this many timesteps 16 | use_cuda: True # Use gpu by default unless it isn't available 17 | buffer_cpu_only: True # If true we won't keep all of the replay buffer in vram 18 | 19 | # --- Logging options --- 20 | use_tensorboard: False # Log results to tensorboard 21 | save_model: True # Save the models to disk 22 | save_model_interval: 1000000 # Save models after this many timesteps 23 | checkpoint_path: "" # Load a checkpoint from this path 24 | evaluate: False # Evaluate model for test_nepisode episodes and quit (no training) 25 | load_step: 0 # Load model trained on this many timesteps (0 if choose max possible) 26 | save_replay: False # Saving the replay of the model loaded from checkpoint_path 27 | local_results_path: "results" # Path for local results 28 | 29 | # --- RL hyperparameters --- 30 | gamma: 0.99 31 | batch_size: 32 # Number of episodes to train on 32 | buffer_size: 32 # Size of the replay buffer 33 | lr: 0.0005 # Learning rate for agents 34 | critic_lr: 0.0005 # Learning rate for critics 35 | optim_alpha: 0.99 # RMSProp alpha 36 | optim_eps: 0.00001 # RMSProp epsilon 37 | grad_norm_clip: 10 # Reduce magnitude of gradients above this L2 norm 38 | 39 | # --- Agent parameters --- 40 | agent: "rnn" # Default rnn agent 41 | rnn_hidden_dim: 64 # Size of hidden state for default rnn agent 42 | obs_agent_id: True # Include the agent's one_hot id in the observation 43 | obs_last_action: True # Include the agent's last action (one_hot) in the observation 44 | 45 | # --- Experiment running params --- 46 | repeat_id: 1 47 | label: "default_label" 48 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/bullet_block.py: -------------------------------------------------------------------------------- 1 | import os 2 | from craft import get_urdf_path 3 | 4 | 5 | class BulletBlock: 6 | def __init__(self, bullet_client): 7 | self._bullet_client = bullet_client 8 | self.init_pose = [0.0, 0.0, 1.0] 9 | self.init_quat = [0.0, 0.0, 0.0, 1.0] 10 | self._init_model(self.init_pose, self.init_quat) 11 | 12 | def _init_model(self, init_pose, init_quat): 13 | robot_path = os.path.join(get_urdf_path(), "block/block.urdf") 14 | self.robot_id = self._bullet_client.loadURDF( 15 | robot_path, 16 | init_pose, 17 | init_quat, 18 | flags=(self._bullet_client.URDF_ENABLE_CACHED_GRAPHICS_SHAPES), 19 | useFixedBase=True, 20 | ) 21 | 22 | 23 | class BulletBlocks: 24 | """ 25 | pybullet API createMultiBody 26 | """ 27 | 28 | def __init__(self, bullet_client, blackboard): 29 | self._bullet_client = bullet_client 30 | self._blackboard = blackboard 31 | self.init_pose = [0.0, 0.0, 1.0] 32 | self.init_quat = [0.0, 0.0, 0.0, 1.0] 33 | self.num = self._blackboard.block_num 34 | 35 | self._init_model(self.num) 36 | 37 | def _init_model(self, num): 38 | visual_file_name = os.path.join(get_urdf_path(), "block/meshes/base_link.STL") 39 | visual_shape = self._bullet_client.createVisualShape( 40 | shapeType=self._bullet_client.GEOM_MESH, 41 | fileName=visual_file_name, 42 | rgbaColor=[211 / 255, 211 / 255, 211 / 255, 1], 43 | ) 44 | collision_shape = self._bullet_client.createCollisionShape( 45 | shapeType=self._bullet_client.GEOM_BOX, 46 | halfExtents=[ 47 | self._blackboard.BLOCK_LENGTH / 2, 48 | self._blackboard.BLOCK_LENGTH / 2, 49 | self._blackboard.BLOCK_HEIGHT / 2, 50 | ], 51 | ) 52 | 53 | position = [[0, 0, 0] for _ in range(num)] 54 | self.ids = self._bullet_client.createMultiBody( 55 | baseCollisionShapeIndex=collision_shape, 56 | baseVisualShapeIndex=visual_shape, 57 | batchPositions=position, 58 | ) 59 | 60 | for id_ in self.ids: 61 | self._bullet_client.setCollisionFilterGroupMask( 62 | id_, -1, collisionFilterGroup=3, collisionFilterMask=3 63 | ) 64 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/blueprint/complex_twolayer_init.yaml: -------------------------------------------------------------------------------- 1 | area_length: 5 2 | area_width: 4 3 | area_height: 3 4 | block_num: 6 5 | slope_num: 2 6 | smartcar_num: 4 7 | legged_robot_num: 2 8 | 9 | # block: 10 | # - {id: 0, x: 3, y: 0, z: 1} 11 | # - {id: 1, x: 3, y: 3, z: 1} 12 | # - {id: 2, x: 4, y: 0, z: 1} 13 | # - {id: 3, x: 4, y: 3, z: 1} 14 | # - {id: 4, x: 0, y: 1, z: 1} 15 | # - {id: 5, x: 0, y: 2, z: 1} 16 | # smartcar: 17 | # - {id: 0, x: 1, y: 0, z: 1, yaw: 0} 18 | # - {id: 1, x: 1, y: 3, z: 1, yaw: 0} 19 | # - {id: 2, x: 3, y: 1, z: 1, yaw: 0} 20 | # - {id: 3, x: 3, y: 2, z: 1, yaw: 0} 21 | # fold_slope: 22 | # - {id: 0, x: 2, y: 1, z: 1, yaw: 2} 23 | # - {id: 1, x: 2, y: 2, z: 1, yaw: 2} 24 | # flag: 25 | # - {id: 0, x: -1, y: -2, z: 1} 26 | # goal: 27 | # - {id: 0, x: -1, y: -2, z: 1} 28 | # legged_robot: 29 | # - {id: 0, x: -1, y: 0, z: 0.335, yaw: 1} 30 | # - {id: 1, x: -1, y: -3, z: 0.335, yaw: 0} 31 | 32 | 33 | # block: 34 | # - {id: 0, x: 3, y: 1, z: 1} 35 | # - {id: 1, x: 3, y: 2, z: 1} 36 | # - {id: 2, x: 4, y: 1, z: 1} 37 | # - {id: 3, x: 4, y: 2, z: 1} 38 | # - {id: 4, x: 3, y: 1, z: 2} 39 | # - {id: 5, x: 4, y: 2, z: 2} 40 | # smartcar: 41 | # - {id: 0, x: 1, y: 0, z: 1, yaw: 0} 42 | # - {id: 1, x: 1, y: 3, z: 1, yaw: 0} 43 | # - {id: 2, x: 3, y: 1, z: 1, yaw: 0} 44 | # - {id: 3, x: 3, y: 2, z: 1, yaw: 0} 45 | # unfold_slope: 46 | # - {id: 0, x: 2, y: 1, z: 1, yaw: 2} 47 | # - {id: 1, x: 2, y: 2, z: 1, yaw: 2} 48 | # flag: 49 | # - {id: 0, x: -1, y: -2, z: 1} 50 | # goal: 51 | # - {id: 0, x: -1, y: -2, z: 1} 52 | # legged_robot: 53 | # - {id: 0, x: -1, y: 0, z: 0.335, yaw: 1} 54 | # - {id: 1, x: -1, y: -3, z: 0.335, yaw: 0} 55 | 56 | block: 57 | - {id: 0, x: 3, y: 0, z: 1} 58 | - {id: 1, x: 3, y: 3, z: 1} 59 | - {id: 2, x: 4, y: 0, z: 1} 60 | - {id: 3, x: 4, y: 3, z: 1} 61 | - {id: 4, x: 0, y: 1, z: 1} 62 | - {id: 5, x: 0, y: 2, z: 1} 63 | smartcar: 64 | - {id: 0, x: 2, y: 0, z: 1, yaw: 0} 65 | - {id: 1, x: 2, y: 3, z: 1, yaw: 0} 66 | - {id: 2, x: 0, y: 1, z: 1, yaw: 0} 67 | - {id: 3, x: 0, y: 2, z: 1, yaw: 0} 68 | fold_slope: 69 | - {id: 0, x: 2, y: 1, z: 1, yaw: 2} 70 | - {id: 1, x: 2, y: 2, z: 1, yaw: 2} 71 | flag: 72 | - {id: 0, x: -1, y: -2, z: 1} 73 | goal: 74 | - {id: 0, x: -1, y: -2, z: 1} 75 | legged_robot: 76 | - {id: 0, x: -1, y: 0, z: 0.335, yaw: 1} 77 | - {id: 1, x: -1, y: -3, z: 0.335, yaw: 0} -------------------------------------------------------------------------------- /PyMARL/src/utils/logging.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import logging 3 | import numpy as np 4 | import torch 5 | 6 | class Logger: 7 | def __init__(self, console_logger): 8 | self.console_logger = console_logger 9 | 10 | self.use_tb = False 11 | self.use_sacred = False 12 | self.use_hdf = False 13 | 14 | self.stats = defaultdict(lambda: []) 15 | 16 | def setup_tb(self, directory_name): 17 | # Import here so it doesn't have to be installed if you don't use it 18 | from tensorboard_logger import configure, log_value 19 | configure(directory_name) 20 | self.tb_logger = log_value 21 | self.use_tb = True 22 | 23 | def setup_sacred(self, sacred_run_dict): 24 | self.sacred_info = sacred_run_dict.info 25 | self.use_sacred = True 26 | 27 | def log_stat(self, key, value, t, to_sacred=True): 28 | self.stats[key].append((t, value)) 29 | 30 | if self.use_tb: 31 | self.tb_logger(key, value, t) 32 | 33 | if self.use_sacred and to_sacred: 34 | if key in self.sacred_info: 35 | self.sacred_info["{}_T".format(key)].append(t) 36 | self.sacred_info[key].append(value) 37 | else: 38 | self.sacred_info["{}_T".format(key)] = [t] 39 | self.sacred_info[key] = [value] 40 | 41 | def print_recent_stats(self): 42 | log_str = "Recent Stats | t_env: {:>10} | Episode: {:>8}\n".format(*self.stats["episode"][-1]) 43 | i = 0 44 | for (k, v) in sorted(self.stats.items()): 45 | if k == "episode": 46 | continue 47 | i += 1 48 | window = 5 if k != "epsilon" else 1 49 | item = "{:.4f}".format(np.mean([ 50 | x[1].item() if torch.is_tensor(x[1]) is True else x[1] 51 | for x in self.stats[k][-window:]])) 52 | log_str += "{:<25}{:>8}".format(k + ":", item) 53 | log_str += "\n" if i % 4 == 0 else "\t" 54 | self.console_logger.info(log_str) 55 | 56 | 57 | # set up a custom logger 58 | def get_logger(): 59 | logger = logging.getLogger() 60 | logger.handlers = [] 61 | ch = logging.StreamHandler() 62 | formatter = logging.Formatter('[%(levelname)s %(asctime)s] %(name)s %(message)s', '%H:%M:%S') 63 | ch.setFormatter(formatter) 64 | logger.addHandler(ch) 65 | logger.setLevel('DEBUG') 66 | 67 | return logger 68 | 69 | -------------------------------------------------------------------------------- /PyMARL/src/envs/flagenv.py: -------------------------------------------------------------------------------- 1 | from .multiagentenv import MultiAgentEnv 2 | from craft.flag_env import FlagEnv as Env 3 | 4 | 5 | class FlagEnv(MultiAgentEnv): 6 | 7 | def __init__(self, **env_config): 8 | enable_render = env_config["render"] if "render" in env_config else False 9 | init_blueprint_path = list(env_config['init_blueprint_path'].split(',')) 10 | self.env = Env(enable_render, init_blueprint_path, env_config) 11 | self.env.reset() 12 | self.episode_limit = env_config["max_steps"] + 10 13 | self.n_agents = self.env._blackboard.smartcar_num 14 | return 15 | 16 | def step(self, actions): 17 | obs, reward, done, info = self.env.step(actions) 18 | return reward, done, info 19 | 20 | def get_obs(self): 21 | obs_tuple = self.env.get_obs() 22 | obs_list = list(obs_tuple) 23 | result = [obs_list[i][0] for i in range(0, len(obs_list))] 24 | return result 25 | 26 | def get_obs_agent(self, agent_id): 27 | all_obs = self.get_obs() 28 | return all_obs[agent_id] 29 | 30 | def get_obs_size(self): 31 | return self.env.ob_dim 32 | 33 | def get_state(self): 34 | obs_tuple = self.get_obs() 35 | obs_list = list(obs_tuple) 36 | global_state = obs_list[0][self.env._ob_dim:] 37 | return global_state 38 | 39 | def get_state_size(self): 40 | state = self.get_state() 41 | return len(state) 42 | 43 | def get_avail_actions(self): 44 | obs_tuple = self.env.get_obs() 45 | obs_list = list(obs_tuple) 46 | result = [obs_list[i][1] for i in range(0, len(obs_list))] 47 | return result 48 | 49 | def get_avail_agent_actions(self, agent_id): 50 | all_masks = self.get_avail_actions() 51 | result = all_masks[agent_id] 52 | return result 53 | 54 | def get_total_actions(self): 55 | return self.env.ac_dim 56 | 57 | def reset(self): 58 | return self.env.reset() 59 | 60 | def render(self): 61 | return None 62 | 63 | def close(self): 64 | return 65 | 66 | def seed(self): 67 | return 0 68 | 69 | def save_replay(self): 70 | return 71 | 72 | def get_env_info(self): 73 | env_info = {"state_shape": self.get_state_size(), 74 | "obs_shape": self.get_obs_size(), 75 | "n_actions": self.get_total_actions(), 76 | "n_agents": self.n_agents, 77 | "episode_limit": self.episode_limit} 78 | return env_info 79 | 80 | def get_stats(self): 81 | return {} 82 | -------------------------------------------------------------------------------- /PyMARL/src/envs/freeenv.py: -------------------------------------------------------------------------------- 1 | from .multiagentenv import MultiAgentEnv 2 | from craft.free_env import FreeEnv as Env 3 | 4 | 5 | class FreeEnv(MultiAgentEnv): 6 | 7 | def __init__(self, **env_config): 8 | enable_render = env_config["render"] if "render" in env_config else False 9 | init_blueprint_path = list(env_config['init_blueprint_path'].split(',')) 10 | self.env = Env(enable_render, init_blueprint_path, env_config) 11 | self.env.reset() 12 | self.episode_limit = env_config["max_steps"] + 10 13 | self.n_agents = self.env._blackboard.smartcar_num 14 | return 15 | 16 | def step(self, actions): 17 | obs, reward, done, info = self.env.step(actions) 18 | return reward, done, info 19 | 20 | def get_obs(self): 21 | obs_tuple = self.env.get_obs() 22 | obs_list = list(obs_tuple) 23 | result = [obs_list[i][0] for i in range(0, len(obs_list))] 24 | return result 25 | 26 | def get_obs_agent(self, agent_id): 27 | all_obs = self.get_obs() 28 | return all_obs[agent_id] 29 | 30 | def get_obs_size(self): 31 | return self.env.ob_dim 32 | 33 | def get_state(self): 34 | obs_tuple = self.get_obs() 35 | obs_list = list(obs_tuple) 36 | global_state = obs_list[0][self.env._ob_dim:] 37 | return global_state 38 | 39 | def get_state_size(self): 40 | state = self.get_state() 41 | return len(state) 42 | 43 | def get_avail_actions(self): 44 | obs_tuple = self.env.get_obs() 45 | obs_list = list(obs_tuple) 46 | result = [obs_list[i][1] for i in range(0, len(obs_list))] 47 | return result 48 | 49 | def get_avail_agent_actions(self, agent_id): 50 | all_masks = self.get_avail_actions() 51 | result = all_masks[agent_id] 52 | return result 53 | 54 | def get_total_actions(self): 55 | return self.env.ac_dim 56 | 57 | def reset(self): 58 | return self.env.reset() 59 | 60 | def render(self): 61 | return None 62 | 63 | def close(self): 64 | return 65 | 66 | def seed(self): 67 | return 0 68 | 69 | def save_replay(self): 70 | return 71 | 72 | def get_env_info(self): 73 | env_info = {"state_shape": self.get_state_size(), 74 | "obs_shape": self.get_obs_size(), 75 | "n_actions": self.get_total_actions(), 76 | "n_agents": self.n_agents, 77 | "episode_limit": self.episode_limit} 78 | return env_info 79 | 80 | def get_stats(self): 81 | return {} 82 | -------------------------------------------------------------------------------- /PyMARL/src/envs/multicar_env.py: -------------------------------------------------------------------------------- 1 | from .multiagentenv import MultiAgentEnv 2 | from craft.goal_env import GoalEnv as Env 3 | 4 | 5 | class MultiCarEnv(MultiAgentEnv): 6 | 7 | def __init__(self, **env_config): 8 | enable_render = env_config["render"] if "render" in env_config else False 9 | init_blueprint_path = list(env_config['init_blueprint_path'].split(',')) 10 | self.env = Env(enable_render, init_blueprint_path, env_config) 11 | self.env.reset() 12 | self.episode_limit = env_config["max_steps"] + 10 13 | self.n_agents = self.env._blackboard.smartcar_num 14 | return 15 | 16 | def step(self, actions): 17 | obs, reward, done, info = self.env.step(actions) 18 | return reward, done, info 19 | 20 | def get_obs(self): 21 | obs_tuple = self.env.get_obs() 22 | obs_list = list(obs_tuple) 23 | result = [obs_list[i][0] for i in range(0, len(obs_list))] 24 | return result 25 | 26 | def get_obs_agent(self, agent_id): 27 | all_obs = self.get_obs() 28 | return all_obs[agent_id] 29 | 30 | def get_obs_size(self): 31 | return self.env.ob_dim 32 | 33 | def get_state(self): 34 | obs_tuple = self.get_obs() 35 | obs_list = list(obs_tuple) 36 | global_state = obs_list[0][self.env._ob_dim:] 37 | return global_state 38 | 39 | def get_state_size(self): 40 | state = self.get_state() 41 | return len(state) 42 | 43 | def get_avail_actions(self): 44 | obs_tuple = self.env.get_obs() 45 | obs_list = list(obs_tuple) 46 | result = [obs_list[i][1] for i in range(0, len(obs_list))] 47 | return result 48 | 49 | def get_avail_agent_actions(self, agent_id): 50 | all_masks = self.get_avail_actions() 51 | result = all_masks[agent_id] 52 | return result 53 | 54 | def get_total_actions(self): 55 | return self.env.ac_dim 56 | 57 | def reset(self): 58 | return self.env.reset() 59 | 60 | def render(self): 61 | return None 62 | 63 | def close(self): 64 | return 65 | 66 | def seed(self): 67 | return 0 68 | 69 | def save_replay(self): 70 | return 71 | 72 | def get_env_info(self): 73 | env_info = {"state_shape": self.get_state_size(), 74 | "obs_shape": self.get_obs_size(), 75 | "n_actions": self.get_total_actions(), 76 | "n_agents": self.n_agents, 77 | "episode_limit": self.episode_limit} 78 | return env_info 79 | 80 | def get_stats(self): 81 | return {} 82 | -------------------------------------------------------------------------------- /PyMARL/src/modules/mixers/qmix.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | 7 | class QMixer(nn.Module): 8 | def __init__(self, args): 9 | super(QMixer, self).__init__() 10 | 11 | self.args = args 12 | self.n_agents = args.n_agents 13 | self.state_dim = int(np.prod(args.state_shape)) 14 | 15 | self.embed_dim = args.mixing_embed_dim 16 | 17 | if getattr(args, "hypernet_layers", 1) == 1: 18 | self.hyper_w_1 = nn.Linear(self.state_dim, self.embed_dim * self.n_agents) 19 | self.hyper_w_final = nn.Linear(self.state_dim, self.embed_dim) 20 | elif getattr(args, "hypernet_layers", 1) == 2: 21 | hypernet_embed = self.args.hypernet_embed 22 | self.hyper_w_1 = nn.Sequential(nn.Linear(self.state_dim, hypernet_embed), 23 | nn.ReLU(), 24 | nn.Linear(hypernet_embed, self.embed_dim * self.n_agents)) 25 | self.hyper_w_final = nn.Sequential(nn.Linear(self.state_dim, hypernet_embed), 26 | nn.ReLU(), 27 | nn.Linear(hypernet_embed, self.embed_dim)) 28 | elif getattr(args, "hypernet_layers", 1) > 2: 29 | raise Exception("Sorry >2 hypernet layers is not implemented!") 30 | else: 31 | raise Exception("Error setting number of hypernet layers.") 32 | 33 | # State dependent bias for hidden layer 34 | self.hyper_b_1 = nn.Linear(self.state_dim, self.embed_dim) 35 | 36 | # V(s) instead of a bias for the last layers 37 | self.V = nn.Sequential(nn.Linear(self.state_dim, self.embed_dim), 38 | nn.ReLU(), 39 | nn.Linear(self.embed_dim, 1)) 40 | 41 | def forward(self, agent_qs, states): 42 | bs = agent_qs.size(0) 43 | states = states.reshape(-1, self.state_dim) 44 | agent_qs = agent_qs.view(-1, 1, self.n_agents) 45 | # First layer 46 | w1 = th.abs(self.hyper_w_1(states)) 47 | b1 = self.hyper_b_1(states) 48 | w1 = w1.view(-1, self.n_agents, self.embed_dim) 49 | b1 = b1.view(-1, 1, self.embed_dim) 50 | hidden = F.elu(th.bmm(agent_qs, w1) + b1) 51 | # Second layer 52 | w_final = th.abs(self.hyper_w_final(states)) 53 | w_final = w_final.view(-1, self.embed_dim, 1) 54 | # State-dependent bias 55 | v = self.V(states).view(-1, 1, 1) 56 | # Compute final output 57 | y = th.bmm(hidden, w_final) + v 58 | # Reshape and return 59 | q_tot = y.view(bs, -1, 1) 60 | return q_tot 61 | -------------------------------------------------------------------------------- /PyMARL/src/components/action_selectors.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | from torch.distributions import Categorical 3 | from .epsilon_schedules import DecayThenFlatSchedule 4 | 5 | REGISTRY = {} 6 | 7 | 8 | class MultinomialActionSelector(): 9 | 10 | def __init__(self, args): 11 | self.args = args 12 | 13 | self.schedule = DecayThenFlatSchedule(args.epsilon_start, args.epsilon_finish, args.epsilon_anneal_time, 14 | decay="linear") 15 | self.epsilon = self.schedule.eval(0) 16 | self.test_greedy = getattr(args, "test_greedy", True) 17 | 18 | def select_action(self, agent_inputs, avail_actions, t_env, test_mode=False): 19 | masked_policies = agent_inputs.clone() 20 | masked_policies[avail_actions == 0.0] = 0.0 21 | 22 | self.epsilon = self.schedule.eval(t_env) 23 | 24 | if test_mode and self.test_greedy: 25 | picked_actions = masked_policies.max(dim=2)[1] 26 | else: 27 | picked_actions = Categorical(masked_policies).sample().long() 28 | 29 | return picked_actions 30 | 31 | 32 | REGISTRY["multinomial"] = MultinomialActionSelector 33 | 34 | 35 | class EpsilonGreedyActionSelector(): 36 | 37 | def __init__(self, args): 38 | self.args = args 39 | 40 | self.schedule = DecayThenFlatSchedule(args.epsilon_start, args.epsilon_finish, args.epsilon_anneal_time, 41 | decay="linear") 42 | self.epsilon = self.schedule.eval(0) 43 | 44 | def select_action(self, agent_inputs, avail_actions, t_env, test_mode=False): 45 | 46 | # fix for special bug 20220822 47 | for i in range(0, len(avail_actions[0])): 48 | # print(avail_actions[0][i]) 49 | if th.sum(avail_actions[0][i]) == 0: 50 | avail_actions[0][i][10] = 1 51 | 52 | # Assuming agent_inputs is a batch of Q-Values for each agent bav 53 | self.epsilon = self.schedule.eval(t_env) 54 | 55 | if test_mode: 56 | # Greedy action selection only 57 | self.epsilon = 0.0 58 | 59 | # mask actions that are excluded from selection 60 | masked_q_values = agent_inputs.clone() 61 | masked_q_values[avail_actions == 0.0] = -float("inf") # should never be selected! 62 | 63 | random_numbers = th.rand_like(agent_inputs[:, :, 0]) 64 | pick_random = (random_numbers < self.epsilon).long() 65 | random_actions = Categorical(avail_actions.float()).sample().long() 66 | 67 | picked_actions = pick_random * random_actions + (1 - pick_random) * masked_q_values.max(dim=2)[1] 68 | return picked_actions 69 | 70 | 71 | REGISTRY["epsilon_greedy"] = EpsilonGreedyActionSelector 72 | -------------------------------------------------------------------------------- /PyMARL/src/modules/critics/centralV.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class CentralVCritic(nn.Module): 7 | def __init__(self, scheme, args): 8 | super(CentralVCritic, self).__init__() 9 | 10 | self.args = args 11 | self.n_actions = args.n_actions 12 | self.n_agents = args.n_agents 13 | 14 | input_shape = self._get_input_shape(scheme) 15 | self.output_type = "v" 16 | 17 | # Set up network layers 18 | self.fc1 = nn.Linear(input_shape, args.hidden_dim) 19 | self.fc2 = nn.Linear(args.hidden_dim, args.hidden_dim) 20 | self.fc3 = nn.Linear(args.hidden_dim, 1) 21 | 22 | def forward(self, batch, t=None): 23 | inputs, bs, max_t = self._build_inputs(batch, t=t) 24 | x = F.relu(self.fc1(inputs)) 25 | x = F.relu(self.fc2(x)) 26 | q = self.fc3(x) 27 | return q 28 | 29 | def _build_inputs(self, batch, t=None): 30 | bs = batch.batch_size 31 | max_t = batch.max_seq_length if t is None else 1 32 | ts = slice(None) if t is None else slice(t, t+1) 33 | inputs = [] 34 | # state 35 | inputs.append(batch["state"][:, ts].unsqueeze(2).repeat(1, 1, self.n_agents, 1)) 36 | 37 | # observations 38 | if self.args.obs_individual_obs: 39 | inputs.append(batch["obs"][:, ts].view(bs, max_t, -1).unsqueeze(2).repeat(1, 1, self.n_agents, 1)) 40 | 41 | # last actions 42 | if self.args.obs_last_action: 43 | if t == 0: 44 | inputs.append(th.zeros_like(batch["actions_onehot"][:, 0:1]).view(bs, max_t, 1, -1)) 45 | elif isinstance(t, int): 46 | inputs.append(batch["actions_onehot"][:, slice(t-1, t)].view(bs, max_t, 1, -1)) 47 | else: 48 | last_actions = th.cat([th.zeros_like(batch["actions_onehot"][:, 0:1]), batch["actions_onehot"][:, :-1]], dim=1) 49 | last_actions = last_actions.view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1) 50 | inputs.append(last_actions) 51 | 52 | inputs.append(th.eye(self.n_agents, device=batch.device).unsqueeze(0).unsqueeze(0).expand(bs, max_t, -1, -1)) 53 | 54 | inputs = th.cat(inputs, dim=-1) 55 | return inputs, bs, max_t 56 | 57 | def _get_input_shape(self, scheme): 58 | # state 59 | input_shape = scheme["state"]["vshape"] 60 | # observations 61 | if self.args.obs_individual_obs: 62 | input_shape += scheme["obs"]["vshape"] * self.n_agents 63 | # last actions 64 | if self.args.obs_last_action: 65 | input_shape += scheme["actions_onehot"]["vshape"][0] * self.n_agents 66 | input_shape += self.n_agents 67 | return input_shape -------------------------------------------------------------------------------- /PyMARL/src/modules/critics/coma.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class COMACritic(nn.Module): 7 | def __init__(self, scheme, args): 8 | super(COMACritic, self).__init__() 9 | 10 | self.args = args 11 | self.n_actions = args.n_actions 12 | self.n_agents = args.n_agents 13 | 14 | input_shape = self._get_input_shape(scheme) 15 | self.output_type = "q" 16 | 17 | # Set up network layers 18 | self.fc1 = nn.Linear(input_shape, 128) 19 | self.fc2 = nn.Linear(128, 128) 20 | self.fc3 = nn.Linear(128, self.n_actions) 21 | 22 | def forward(self, batch, t=None): 23 | inputs = self._build_inputs(batch, t=t) 24 | x = F.relu(self.fc1(inputs)) 25 | x = F.relu(self.fc2(x)) 26 | q = self.fc3(x) 27 | return q 28 | 29 | def _build_inputs(self, batch, t=None): 30 | bs = batch.batch_size 31 | max_t = batch.max_seq_length if t is None else 1 32 | ts = slice(None) if t is None else slice(t, t+1) 33 | inputs = [] 34 | # state 35 | inputs.append(batch["state"][:, ts].unsqueeze(2).repeat(1, 1, self.n_agents, 1)) 36 | 37 | # observation 38 | inputs.append(batch["obs"][:, ts]) 39 | 40 | # actions (masked out by agent) 41 | actions = batch["actions_onehot"][:, ts].view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1) 42 | agent_mask = (1 - th.eye(self.n_agents, device=batch.device)) 43 | agent_mask = agent_mask.view(-1, 1).repeat(1, self.n_actions).view(self.n_agents, -1) 44 | inputs.append(actions * agent_mask.unsqueeze(0).unsqueeze(0)) 45 | 46 | # last actions 47 | if t == 0: 48 | inputs.append(th.zeros_like(batch["actions_onehot"][:, 0:1]).view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1)) 49 | elif isinstance(t, int): 50 | inputs.append(batch["actions_onehot"][:, slice(t-1, t)].view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1)) 51 | else: 52 | last_actions = th.cat([th.zeros_like(batch["actions_onehot"][:, 0:1]), batch["actions_onehot"][:, :-1]], dim=1) 53 | last_actions = last_actions.view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1) 54 | inputs.append(last_actions) 55 | 56 | inputs.append(th.eye(self.n_agents, device=batch.device).unsqueeze(0).unsqueeze(0).expand(bs, max_t, -1, -1)) 57 | 58 | inputs = th.cat([x.reshape(bs, max_t, self.n_agents, -1) for x in inputs], dim=-1) 59 | return inputs 60 | 61 | def _get_input_shape(self, scheme): 62 | # state 63 | input_shape = scheme["state"]["vshape"] 64 | # observation 65 | input_shape += scheme["obs"]["vshape"] 66 | # actions and last actions 67 | input_shape += scheme["actions_onehot"]["vshape"][0] * self.n_agents * 2 68 | # agent id 69 | input_shape += self.n_agents 70 | return input_shape -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/slope/slope.urdf.xacro: -------------------------------------------------------------------------------- 1 | 2 | 5 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | transmission_interface/SimpleTransmission 61 | 62 | hardware_interface/EffortJointInterface 63 | 64 | 65 | hardware_interface/EffortJointInterface 66 | 1 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /PyMARL/src/modules/critics/centralV_ns.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from modules.critics.mlp import MLP 5 | 6 | 7 | class CentralVCriticNS(nn.Module): 8 | def __init__(self, scheme, args): 9 | super(CentralVCriticNS, self).__init__() 10 | 11 | self.args = args 12 | self.n_actions = args.n_actions 13 | self.n_agents = args.n_agents 14 | 15 | input_shape = self._get_input_shape(scheme) 16 | self.output_type = "v" 17 | 18 | # Set up network layers 19 | self.critics = [MLP(input_shape, args.hidden_dim, 1) for _ in range(self.n_agents)] 20 | 21 | def forward(self, batch, t=None): 22 | inputs, bs, max_t = self._build_inputs(batch, t=t) 23 | qs = [] 24 | for i in range(self.n_agents): 25 | q = self.critics[i](inputs) 26 | qs.append(q.view(bs, max_t, 1, -1)) 27 | q = th.cat(qs, dim=2) 28 | return q 29 | 30 | def _build_inputs(self, batch, t=None): 31 | bs = batch.batch_size 32 | max_t = batch.max_seq_length if t is None else 1 33 | ts = slice(None) if t is None else slice(t, t+1) 34 | inputs = [] 35 | # state 36 | inputs.append(batch["state"][:, ts]) 37 | 38 | # observations 39 | if self.args.obs_individual_obs: 40 | inputs.append(batch["obs"][:, ts].view(bs, max_t, -1)) 41 | 42 | if self.args.obs_last_action: 43 | # last actions 44 | if t == 0: 45 | inputs.append(th.zeros_like(batch["actions_onehot"][:, 0:1]).view(bs, max_t, 1, -1)) 46 | elif isinstance(t, int): 47 | inputs.append(batch["actions_onehot"][:, slice(t-1, t)].view(bs, max_t, 1, -1)) 48 | else: 49 | last_actions = th.cat([th.zeros_like(batch["actions_onehot"][:, 0:1]), batch["actions_onehot"][:, :-1]], dim=1) 50 | last_actions = last_actions.view(bs, max_t, 1, -1) 51 | inputs.append(last_actions) 52 | 53 | inputs = th.cat([x.reshape(bs * max_t, -1) for x in inputs], dim=1) 54 | return inputs, bs, max_t 55 | 56 | def _get_input_shape(self, scheme): 57 | # state 58 | input_shape = scheme["state"]["vshape"] 59 | # observations 60 | if self.args.obs_individual_obs: 61 | input_shape += scheme["obs"]["vshape"] 62 | # last actions 63 | if self.args.obs_last_action: 64 | input_shape += scheme["actions_onehot"]["vshape"][0] * self.n_agents 65 | 66 | return input_shape 67 | 68 | def parameters(self): 69 | params = list(self.critics[0].parameters()) 70 | for i in range(1, self.n_agents): 71 | params += list(self.critics[i].parameters()) 72 | return params 73 | 74 | def state_dict(self): 75 | return [a.state_dict() for a in self.critics] 76 | 77 | def load_state_dict(self, state_dict): 78 | for i, a in enumerate(self.critics): 79 | a.load_state_dict(state_dict[i]) 80 | 81 | def cuda(self): 82 | for c in self.critics: 83 | c.cuda() 84 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/bread_first_search.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | import numpy as np 3 | from craftenv.sim_envs.pybullet_envs.craft.grid_objs import ( 4 | Air, Block, Flag, FoldedSlope, FoldedSlopeGear, UnfoldedSlopeBody, UnfoldedSlopeFoot 5 | ) 6 | from craftenv.sim_envs.pybullet_envs.craft.utils import next_step 7 | 8 | 9 | class BreadthFirstSearch: 10 | 11 | def __init__(self, blackboard): 12 | """ 13 | Initialize grid map for bfs search 14 | """ 15 | self._blackboard = blackboard 16 | self.motion = self.get_motion_model() 17 | self.area_size = blackboard.area_size 18 | 19 | def calc_reachable_space(self, x, y, z): 20 | grid = self._blackboard.grid 21 | maxx = self._blackboard.area_size[0] + 1 22 | minx = 0 23 | maxy = self._blackboard.area_size[1] + 1 24 | miny = 0 25 | maxz = self._blackboard.area_size[2] 26 | minz = 1 27 | visited = np.zeros((maxx, maxy, maxz + 1)) 28 | 29 | q = deque() 30 | q.append((x, y, z)) 31 | visited[x][y][z] = 1 32 | cnt = 1 33 | 34 | while q: 35 | x, y, z = q.popleft() 36 | 37 | # expand_grid search grid based on motion model 38 | for i, _ in enumerate(self.motion): 39 | n_x, n_y, n_z = x + self.motion[i][0], y + self.motion[i][1], z 40 | 41 | if minx <= n_x < maxx and \ 42 | miny <= n_y < maxy and \ 43 | minz <= n_z <= maxz: 44 | n_obj = grid[n_x][n_y][n_z] 45 | blow_n_obj = grid[n_x][n_y][n_z - 1] 46 | 47 | if visited[n_x][n_y][n_z] == 0: 48 | if blow_n_obj.can_stand and isinstance(n_obj, (Air, Block, Flag, FoldedSlopeGear)): 49 | q.append((n_x, n_y, n_z)) 50 | visited[n_x][n_y][n_z] = 1 51 | cnt += 1 52 | 53 | elif isinstance(n_obj, UnfoldedSlopeFoot): 54 | yaw = (n_obj.yaw + 2) % 4 55 | nn_x, nn_y = next_step(n_x, n_y, yaw * np.pi / 2) 56 | q.append((n_x, n_y, n_z)) 57 | visited[n_x][n_y][n_z] = 1 58 | cnt += 1 59 | if minx <= nn_x < maxx and \ 60 | miny <= nn_y < maxy and \ 61 | minz <= n_z + 1 <= maxz: 62 | q.append((nn_x, nn_y, n_z + 1)) 63 | visited[nn_x][nn_y][n_z + 1] = 1 64 | cnt += 1 65 | 66 | elif isinstance(n_obj, (FoldedSlope, UnfoldedSlopeBody)): 67 | pre_yaw = (n_obj.yaw + 2) % 4 68 | pre_x, pre_y = next_step(n_x, n_y, pre_yaw * np.pi / 2) 69 | if pre_x == x and pre_y == y: 70 | visited[n_x][n_y][n_z] = 1 71 | cnt += 1 72 | 73 | return visited 74 | 75 | @staticmethod 76 | def get_motion_model(): 77 | return np.array([[1, 0], [0, 1], [-1, 0], [0, -1]]) 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The CraftEnv Environment 2 | 3 | CraftEnv is a flexible Multi-Agent Reinforcement Learning (MARL) environment for Collective Robotic Construction (CRC) systems, written in Python. 4 | 5 | The CraftEnv paper is accepted by the 22nd International Conference on Autonomous Agents and Multiagent Systems (AAMAS) 2023. 6 | 7 | ## Installation instructions 8 | 9 | To install the codebase, please clone this repo and install the `CraftEnv/setup.py` via `pip install -e .`. The file can be used to install the necessary packages into a virtual environment. 10 | We use the [PyMARL](https://github.com/oxwhirl/pymarl) and the [EPyMARL](https://github.com/uoe-agents/epymarl) framework for the deep multi-agent reinforcement learning algorithms. 11 | 12 | ## Run an experiment 13 | 14 | ```shell 15 | cd PyMARL 16 | python src/main.py --config=qmix --env-config=multicar 17 | ``` 18 | 19 | The config files act as defaults for an algorithm or environment. 20 | 21 | They are all located in `src/config`. 22 | `--config` refers to the config files in `src/config/algs` 23 | `--env-config` refers to the config files in `src/config/envs` 24 | 25 | Note that the `multicar` environment corresponds to the goal-conditioned tasks, the `multicar2` environment corresponds to the free building tasks, and the `flag` environment corresponds to the breaking barrier tasks. 26 | 27 | All results will be stored in the `Results` folder. 28 | 29 | Currently, supported algos and environments are: 30 | 31 | - IQL, MAPPO, QMIX, QTRAN, COMA, VDN 32 | - multicar, multicar2, goal 33 | 34 | ## Saving and loading learnt models 35 | 36 | ### Saving models 37 | 38 | You can save the learnt models to disk by setting `save_model = True`, which is set to `False` by default. The frequency of saving models can be adjusted using `save_model_interval` configuration. Models will be saved in the result directory, under the folder called *models*. The directory corresponding each run will contain models saved throughout the experiment, each within a folder corresponding to the number of timesteps passed since starting the learning process. 39 | 40 | ### Loading models 41 | 42 | Learnt models can be loaded using the `checkpoint_path` parameter, after which the learning will proceed from the corresponding timestep. 43 | 44 | ## Citation 45 | ``` 46 | @inproceedings{zhao2023craftenv, 47 | title={CraftEnv: A Flexible Collective Robotic Construction Environment for Multi-Agent Reinforcement Learning}, 48 | author={Zhao, Rui and Liu, Xu and Zhang, Yizheng and Li, Minghao and Zhou, Cheng and Li, Shuai and Han, Lei}, 49 | booktitle={2023 International Joint Conference on Autonomous Agents and Multi-agent Systems (AAMAS)}, 50 | year={2023}, 51 | } 52 | ``` 53 | 54 | ## License 55 | 56 | Use MIT license (see LICENSE.md) except for third-party softwares. They are all open-source softwares and have their own license types. 57 | 58 | ## Disclaimer 59 | 60 | This is not an officially supported Tencent product. The code and data in this repository are for research purpose only. No representation or warranty whatsoever, expressed or implied, is made as to its accuracy, reliability or completeness. We assume no liability and are not responsible for any misuse or damage caused by the code and data. Your use of the code and data are subject to applicable laws and your use of them is at your own risk. 61 | 62 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/grid_objs.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from enum import IntEnum, unique, auto 3 | 4 | 5 | @unique 6 | class ObjType(IntEnum): 7 | Undefined = -1 8 | Air = auto() 9 | Ground = auto() 10 | Wall = auto() 11 | Block = auto() 12 | Flag = auto() 13 | FoldedSlope = auto() 14 | FoldedSlopeGear = auto() 15 | UnfoldedSlopeBody = auto() 16 | UnfoldedSlopeFoot = auto() 17 | 18 | 19 | class WorldObj(ABC): 20 | """ 21 | Base class for grid world objects 22 | """ 23 | 24 | @abstractmethod 25 | def __init__(self): 26 | self.can_lift = False 27 | self.can_fold = False 28 | self.can_unfold = False 29 | self.can_stand = False 30 | self.near_unfold_slope_body = False 31 | self.near_blow_unfold_slope_foot = False 32 | """ 33 | the obj id on the WorldObj, -1 means there is nothing, -2 means there is something, 34 | 0, 1, 2... means there is a smartcar on it and the number represent the smartcar's id 35 | """ 36 | self.obj_on_it = -1 37 | self.type = ObjType.Undefined 38 | 39 | 40 | class Air(WorldObj): 41 | def __init__(self): 42 | super().__init__() 43 | self.can_lift = False 44 | self.can_stand = False 45 | self.type = ObjType.Air 46 | 47 | 48 | class Ground(WorldObj): 49 | def __init__(self): 50 | super().__init__() 51 | self.can_stand = True 52 | self.type = ObjType.Ground 53 | 54 | 55 | class Wall(WorldObj): 56 | def __init__(self): 57 | super().__init__() 58 | self.type = ObjType.Wall 59 | self.can_lift = False 60 | self.can_stand = False 61 | 62 | 63 | class Block(WorldObj): 64 | def __init__(self): 65 | super().__init__() 66 | self.can_lift = True 67 | self.can_stand = True 68 | self.type = ObjType.Block 69 | 70 | 71 | class Flag(WorldObj): 72 | def __init__(self): 73 | super().__init__() 74 | self.can_lift = True 75 | self.type = ObjType.Flag 76 | 77 | 78 | class FoldedSlope(WorldObj): 79 | """ 80 | ↑ 1 81 | ← → ↔ 2 0 82 | ↓ 3 83 | 84 | body-foot 85 | yaw = 0 86 | 87 | foot 88 | | 89 | body 90 | yaw = np.pi * 0.5 91 | 92 | foot-body 93 | yaw = np.pi * 1 94 | 95 | body 96 | | 97 | foot 98 | yaw = np.pi * 1.5 99 | """ 100 | 101 | def __init__(self, yaw): 102 | super().__init__() 103 | self.can_lift = True 104 | self.yaw = yaw 105 | self.type = ObjType.FoldedSlope 106 | 107 | 108 | class FoldedSlopeGear(WorldObj): 109 | def __init__(self, yaw): 110 | super().__init__() 111 | self.yaw = yaw 112 | self.type = ObjType.FoldedSlopeGear 113 | 114 | 115 | class UnfoldedSlopeBody(WorldObj): 116 | """ 117 | ↑ 1 118 | ← → ↔ 2 0 119 | ↓ 3 120 | """ 121 | 122 | def __init__(self, yaw): 123 | super().__init__() 124 | self.yaw = yaw 125 | self.type = ObjType.UnfoldedSlopeBody 126 | 127 | 128 | class UnfoldedSlopeFoot(WorldObj): 129 | """ 130 | ↑ 1 131 | ← → ↔ 2 0 132 | ↓ 3 133 | """ 134 | 135 | def __init__(self, yaw): 136 | super().__init__() 137 | self.yaw = yaw 138 | self.type = ObjType.UnfoldedSlopeFoot 139 | -------------------------------------------------------------------------------- /PyMARL/src/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import collections 4 | from os.path import dirname, abspath 5 | from copy import deepcopy 6 | from sacred import Experiment, SETTINGS 7 | from sacred.observers import FileStorageObserver 8 | from sacred.utils import apply_backspaces_and_linefeeds 9 | import sys 10 | import torch as th 11 | from utils.logging import get_logger 12 | import yaml 13 | 14 | from run import run 15 | 16 | SETTINGS['CAPTURE_MODE'] = "fd" # set to "no" if you want to see stdout/stderr in console 17 | logger = get_logger() 18 | 19 | ex = Experiment("pymarl") 20 | ex.logger = logger 21 | ex.captured_out_filter = apply_backspaces_and_linefeeds 22 | 23 | results_path = os.path.join(dirname(dirname(abspath(__file__))), "results") 24 | 25 | 26 | @ex.main 27 | def my_main(_run, _config, _log): 28 | # Setting the random seed throughout the modules 29 | config = config_copy(_config) 30 | np.random.seed(config["seed"]) 31 | th.manual_seed(config["seed"]) 32 | config['env_args']['seed'] = config["seed"] 33 | 34 | # run the framework 35 | run(_run, config, _log) 36 | 37 | 38 | def _get_config(params, arg_name, subfolder): 39 | config_name = None 40 | for _i, _v in enumerate(params): 41 | if _v.split("=")[0] == arg_name: 42 | config_name = _v.split("=")[1] 43 | del params[_i] 44 | break 45 | 46 | if config_name is not None: 47 | with open(os.path.join(os.path.dirname(__file__), "config", subfolder, "{}.yaml".format(config_name)), "r") as f: 48 | try: 49 | config_dict = yaml.load(f) 50 | except yaml.YAMLError as exc: 51 | assert False, "{}.yaml error: {}".format(config_name, exc) 52 | return config_dict 53 | 54 | 55 | def recursive_dict_update(d, u): 56 | for k, v in u.items(): 57 | if isinstance(v, collections.Mapping): 58 | d[k] = recursive_dict_update(d.get(k, {}), v) 59 | else: 60 | d[k] = v 61 | return d 62 | 63 | 64 | def config_copy(config): 65 | if isinstance(config, dict): 66 | return {k: config_copy(v) for k, v in config.items()} 67 | elif isinstance(config, list): 68 | return [config_copy(v) for v in config] 69 | else: 70 | return deepcopy(config) 71 | 72 | 73 | if __name__ == '__main__': 74 | params = deepcopy(sys.argv) 75 | 76 | # Get the defaults from default.yaml 77 | with open(os.path.join(os.path.dirname(__file__), "config", "default.yaml"), "r") as f: 78 | try: 79 | config_dict = yaml.load(f) 80 | except yaml.YAMLError as exc: 81 | assert False, "default.yaml error: {}".format(exc) 82 | 83 | # Load algorithm and env base configs 84 | env_config = _get_config(params, "--env-config", "envs") 85 | alg_config = _get_config(params, "--config", "algs") 86 | # config_dict = {**config_dict, **env_config, **alg_config} 87 | config_dict = recursive_dict_update(config_dict, env_config) 88 | config_dict = recursive_dict_update(config_dict, alg_config) 89 | 90 | # now add all the config to sacred 91 | ex.add_config(config_dict) 92 | 93 | # Save to disk by default for sacred 94 | logger.info("Saving to FileStorageObserver in results/sacred.") 95 | file_obs_path = os.path.join(results_path, "sacred") 96 | ex.observers.append(FileStorageObserver.create(file_obs_path)) 97 | 98 | ex.run_commandline(params) 99 | 100 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: craft 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - _openmp_mutex=5.1=1_gnu 8 | - blas=1.0=mkl 9 | - bzip2=1.0.8=h7b6447c_0 10 | - ca-certificates=2022.10.11=h06a4308_0 11 | - certifi=2021.5.30=py36h06a4308_0 12 | - cpuonly=2.0=0 13 | - dataclasses=0.8=pyh4f3eec9_6 14 | - ffmpeg=4.3=hf484d3e_0 15 | - freetype=2.12.1=h4a9f257_0 16 | - gmp=6.2.1=h295c915_3 17 | - gnutls=3.6.15=he1e5248_0 18 | - intel-openmp=2022.1.0=h9e868ea_3769 19 | - jpeg=9e=h7f8727e_0 20 | - lame=3.100=h7b6447c_0 21 | - lcms2=2.12=h3be6417_0 22 | - ld_impl_linux-64=2.38=h1181459_1 23 | - lerc=3.0=h295c915_0 24 | - libdeflate=1.8=h7f8727e_5 25 | - libffi=3.3=he6710b0_2 26 | - libgcc-ng=11.2.0=h1234567_1 27 | - libgomp=11.2.0=h1234567_1 28 | - libiconv=1.16=h7f8727e_2 29 | - libidn2=2.3.2=h7f8727e_0 30 | - libpng=1.6.37=hbc83047_0 31 | - libstdcxx-ng=11.2.0=h1234567_1 32 | - libtasn1=4.16.0=h27cfd23_0 33 | - libtiff=4.4.0=hecacb30_0 34 | - libunistring=0.9.10=h27cfd23_0 35 | - libuv=1.40.0=h7b6447c_0 36 | - libwebp-base=1.2.4=h5eee18b_0 37 | - lz4-c=1.9.3=h295c915_1 38 | - mkl=2020.2=256 39 | - mkl-service=2.3.0=py36he8ac12f_0 40 | - mkl_fft=1.3.0=py36h54f3939_0 41 | - mkl_random=1.1.1=py36h0573a6f_0 42 | - ncurses=6.3=h5eee18b_3 43 | - nettle=3.7.3=hbbd107a_1 44 | - numpy=1.19.2=py36h54aff64_0 45 | - numpy-base=1.19.2=py36hfa32c7d_0 46 | - olefile=0.46=py36_0 47 | - openh264=2.1.1=h4ff587b_0 48 | - openjpeg=2.4.0=h3ad879b_0 49 | - openssl=1.1.1q=h7f8727e_0 50 | - pillow=8.3.1=py36h2c7a002_0 51 | - pip=21.2.2=py36h06a4308_0 52 | - python=3.6.13=h12debd9_1 53 | - pytorch=1.10.2=py3.6_cpu_0 54 | - pytorch-mutex=1.0=cpu 55 | - readline=8.2=h5eee18b_0 56 | - setuptools=58.0.4=py36h06a4308_0 57 | - six=1.16.0=pyhd3eb1b0_1 58 | - sqlite=3.39.3=h5082296_0 59 | - tk=8.6.12=h1ccaba5_0 60 | - torchaudio=0.10.2=py36_cpu 61 | - torchvision=0.11.3=py36_cpu 62 | - typing_extensions=4.1.1=pyh06a4308_0 63 | - wheel=0.37.1=pyhd3eb1b0_0 64 | - xz=5.2.6=h5eee18b_0 65 | - zlib=1.2.13=h5eee18b_0 66 | - zstd=1.5.2=ha4553b6_0 67 | - pip: 68 | - absl-py==1.3.0 69 | - charset-normalizer==2.0.12 70 | - cloudpickle==2.2.0 71 | - cycler==0.11.0 72 | - deepdiff==5.7.0 73 | - dm-env==1.5 74 | - dm-env-rpc==1.1.0 75 | - dm-tree==0.1.7 76 | - docopt==0.6.2 77 | - enum34==1.1.10 78 | - googleapis-common-protos==1.56.3 79 | - grpcio==1.48.2 80 | - gym==0.21.0 81 | - idna==3.4 82 | - immutabledict==2.2.1 83 | - importlib-metadata==4.8.3 84 | - jsonpickle==0.9.6 85 | - kiwisolver==1.3.1 86 | - matplotlib==3.3.4 87 | - mock==4.0.3 88 | - mpi4py==3.0.3 89 | - mpyq==0.2.5 90 | - munch==2.5.0 91 | - ordered-set==4.0.2 92 | - pandas==1.1.5 93 | - portpicker==1.5.2 94 | - protobuf==3.19.5 95 | - psutil==5.9.3 96 | - pybullet==3.2.5 97 | - pygame==2.1.2 98 | - pyparsing==3.0.9 99 | - pysc2==4.0.0 100 | - python-dateutil==2.8.2 101 | - pytz==2022.6 102 | - pyyaml==3.13 103 | - requests==2.27.1 104 | - s2clientprotocol==5.0.10.88500.0 105 | - s2protocol==5.0.10.88500.0 106 | - sacred==0.7.2 107 | - scipy==1.5.4 108 | - sk-video==1.1.10 109 | - smac==1.0.0 110 | - urllib3==1.26.12 111 | - websocket-client==1.3.1 112 | - wrapt==1.14.1 113 | - zipp==3.6.0 114 | prefix: /home/droid/anaconda3/envs/craft2 115 | -------------------------------------------------------------------------------- /PyMARL/src/modules/critics/coma_ns.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from modules.critics.mlp import MLP 5 | 6 | 7 | class COMACriticNS(nn.Module): 8 | def __init__(self, scheme, args): 9 | super(COMACriticNS, self).__init__() 10 | 11 | self.args = args 12 | self.n_actions = args.n_actions 13 | self.n_agents = args.n_agents 14 | 15 | input_shape = self._get_input_shape(scheme) 16 | self.output_type = "q" 17 | 18 | # Set up network layers 19 | self.critics = [MLP(input_shape, args.hidden_dim, self.n_actions) for _ in range(self.n_agents)] 20 | 21 | def forward(self, batch, t=None): 22 | inputs = self._build_inputs(batch, t=t) 23 | qs = [] 24 | for i in range(self.n_agents): 25 | q = self.critics[i](inputs[:, :, i]).unsqueeze(2) 26 | qs.append(q) 27 | return th.cat(qs, dim=2) 28 | 29 | def _build_inputs(self, batch, t=None): 30 | bs = batch.batch_size 31 | max_t = batch.max_seq_length if t is None else 1 32 | ts = slice(None) if t is None else slice(t, t+1) 33 | inputs = [] 34 | # state 35 | inputs.append(batch["state"][:, ts].unsqueeze(2).repeat(1, 1, self.n_agents, 1)) 36 | 37 | # observation 38 | if self.args.obs_individual_obs: 39 | inputs.append(batch["obs"][:, ts]) 40 | 41 | # actions (masked out by agent) 42 | actions = batch["actions_onehot"][:, ts].view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1) 43 | agent_mask = (1 - th.eye(self.n_agents, device=batch.device)) 44 | agent_mask = agent_mask.view(-1, 1).repeat(1, self.n_actions).view(self.n_agents, -1) 45 | inputs.append(actions * agent_mask.unsqueeze(0).unsqueeze(0)) 46 | 47 | # last actions 48 | if self.args.obs_last_action: 49 | if t == 0: 50 | inputs.append(th.zeros_like(batch["actions_onehot"][:, 0:1]).view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1)) 51 | elif isinstance(t, int): 52 | inputs.append(batch["actions_onehot"][:, slice(t-1, t)].view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1)) 53 | else: 54 | last_actions = th.cat([th.zeros_like(batch["actions_onehot"][:, 0:1]), batch["actions_onehot"][:, :-1]], dim=1) 55 | last_actions = last_actions.view(bs, max_t, 1, -1).repeat(1, 1, self.n_agents, 1) 56 | inputs.append(last_actions) 57 | 58 | inputs = th.cat([x.reshape(bs, max_t, self.n_agents, -1) for x in inputs], dim=-1) 59 | return inputs 60 | 61 | def _get_input_shape(self, scheme): 62 | # state 63 | input_shape = scheme["state"]["vshape"] 64 | # observation 65 | if self.args.obs_individual_obs: 66 | input_shape += scheme["obs"]["vshape"] 67 | 68 | # actions 69 | input_shape += scheme["actions_onehot"]["vshape"][0] * self.n_agents 70 | 71 | # last action 72 | if self.args.obs_last_action: 73 | input_shape += scheme["actions_onehot"]["vshape"][0] * self.n_agents 74 | # agent id 75 | # input_shape += self.n_agents 76 | return input_shape 77 | 78 | def parameters(self): 79 | params = list(self.critics[0].parameters()) 80 | for i in range(1, self.n_agents): 81 | params += list(self.critics[i].parameters()) 82 | return params 83 | 84 | def state_dict(self): 85 | return [a.state_dict() for a in self.critics] 86 | 87 | def load_state_dict(self, state_dict): 88 | for i, a in enumerate(self.critics): 89 | a.load_state_dict(state_dict[i]) 90 | 91 | def cuda(self): 92 | for c in self.critics: 93 | c.cuda() -------------------------------------------------------------------------------- /CraftEnv/src/craft/blackboard.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | import numpy as np 4 | import yaml 5 | 6 | from .grid_objs import Air, Ground, ObjType, Wall 7 | from .utils import Direction 8 | 9 | 10 | class Point: 11 | def __init__(self, x, y, z=1): 12 | self.x = x 13 | self.y = y 14 | self.z = z 15 | 16 | def __add__(self, other): 17 | return Point(self.x + other, self.y + other) 18 | 19 | def __mul__(self, other): 20 | return Point(self.x * other, self.y * other) 21 | 22 | def __hash__(self): 23 | return 1 24 | 25 | def __eq__(self, other): 26 | return self.x == other.x and self.y == other.y and self.z == other.z 27 | 28 | def __repr__(self): 29 | return f"Point({self.x}, {self.y}, {self.z})" 30 | 31 | 32 | class Blackboard: 33 | BLOCK_LENGTH = 0.3225 34 | BLOCK_HEIGHT = 0.155 + 0.03 35 | SMARTCAR_LENGTH = 0.155 36 | SMARTCAR_WIDTH = 0.155 37 | SMARTCAR_HEIGHT = 0.155 38 | 39 | def __init__(self, blueprint_path_list): 40 | """ 41 | ^ y 42 | | 43 | | 44 | | 45 | o ----------> x 46 | world coordinate 47 | """ 48 | self.spawn_point_set = set() 49 | self.blueprint_path_list = blueprint_path_list 50 | self.area_size = [0, 0, 0] # length, width, height 51 | self.load_blueprint(blueprint_path_list[0]) 52 | 53 | def load_blueprint(self, blueprint_path): 54 | f = open(blueprint_path, "r", encoding="utf-8") 55 | self.blueprint_path = blueprint_path 56 | template_generator = yaml.safe_load_all(f) 57 | self.template = {} 58 | for t in template_generator: 59 | if t is not None: 60 | self.template.update(t) 61 | 62 | try: 63 | self.wall_num = self.template["wall_num"] 64 | except KeyError as e: 65 | self.wall_num = 0 66 | print("KeyError, ", e) 67 | self.block_num = self.template["block_num"] 68 | self.slope_num = self.template["slope_num"] 69 | self.smartcar_num = self.template["smartcar_num"] 70 | self.legged_robot_num = self.template["legged_robot_num"] 71 | self.area_size[0] = self.template["area_length"] 72 | self.area_size[1] = self.template["area_width"] 73 | self.area_size[2] = self.template["area_height"] 74 | 75 | def reset(self, blueprint_path=None): 76 | if blueprint_path is not None: 77 | self.load_blueprint(blueprint_path) 78 | 79 | self.spawn_point_set.clear() 80 | 81 | length = self.area_size[0] + 1 82 | width = self.area_size[1] + 1 83 | height = self.area_size[2] + 1 84 | 85 | self.grid = [ 86 | [[Air() for _ in range(height)] for _ in range(width)] 87 | for _ in range(length) 88 | ] 89 | 90 | for i, j in itertools.product(range(length), range(width)): 91 | self.grid[i][j][0] = Ground() 92 | for k in range(height): 93 | for i in range(width): 94 | self.grid[-1][i][k] = Wall() 95 | for j in range(length): 96 | self.grid[j][-1][k] = Wall() 97 | 98 | def random_spawn_obj(self, obj_type): 99 | direction = np.random.randint(Direction.DIR_0, Direction.DIR_3) 100 | random_cnt = 0 101 | while True: 102 | random_cnt += 1 103 | assert random_cnt < 1000, f"please reduce obj {obj_type} num" 104 | p = Point( 105 | np.random.randint(0, self.area_size[0]), 106 | np.random.randint(0, self.area_size[1]), 107 | ) 108 | obj = self.grid[p.x][p.y][p.z] 109 | blow_obj = self.grid[p.x][p.y][p.z - 1] 110 | if ( 111 | p not in self.spawn_point_set 112 | and obj.type is ObjType.Air 113 | and blow_obj.can_stand 114 | ): 115 | break 116 | return p, direction 117 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/flag/block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/wall/block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/block/block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/flag_env.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | import numpy as np 4 | import yaml 5 | 6 | from .flag_craft_env import CraftEnv 7 | from .grid_objs import ObjType 8 | 9 | 10 | class FlagEnv(CraftEnv): 11 | 12 | def __init__(self, enable_render, init_blueprint_path, env_config): 13 | super().__init__(enable_render, init_blueprint_path, env_config) 14 | self.key_mapping = { 15 | "block": 1, 16 | "folded_slope": 2, 17 | "unfolded_body": 3, 18 | "unfolded_foot": 4 19 | } 20 | ########## 21 | self.hit = False 22 | self.current_step = -1 23 | self.last_distance = None 24 | ########## 25 | self.last_pos_dict = None 26 | 27 | def read_design(self, design_path): 28 | with open(design_path) as f: 29 | source = yaml.load(f, Loader=yaml.loader.SafeLoader) 30 | design_list = np.zeros(self.area_size) 31 | design_dict = { 32 | "block": [], 33 | "folded_slope": [], 34 | "unfolded_body": [], 35 | "unfolded_foot": [] 36 | } 37 | for key in self.key_mapping.keys(): 38 | if key not in source.keys(): 39 | continue 40 | for obj in source[key]: 41 | x, y, z = int(obj['x']), int(obj['y']), int(obj['z']) 42 | design_list[x][y][z] = self.key_mapping[key] 43 | design_dict[key].append((x, y, z)) 44 | return design_list, design_dict 45 | 46 | def get_pos_list(self): 47 | raise NotImplementedError 48 | 49 | def get_pos_dict(self): 50 | result = { 51 | "block": [], 52 | "folded_slope": [], 53 | "unfolded_body": [], 54 | "unfolded_foot": [] 55 | } 56 | grid = self._blackboard.grid 57 | for i, j, k in itertools.product(range(self.area_size[0]), 58 | range(self.area_size[1]), 59 | range(self.area_size[2])): 60 | obj = grid[i][j][k] 61 | if obj.type is ObjType.Block or obj.type is \ 62 | ObjType.FoldedSlopeGear: 63 | result["block"].append((i, j, k)) 64 | elif obj.type is ObjType.FoldedSlope: 65 | result["folded_slope"].append((i, j, k)) 66 | elif obj.type is ObjType.UnfoldedSlopeBody: 67 | result["unfolded_body"].append((i, j, k)) 68 | elif obj.type is ObjType.UnfoldedSlopeFoot: 69 | result["unfolded_foot"].append((i, j, k)) 70 | else: 71 | pass 72 | return result 73 | 74 | def reset(self): 75 | self.hit = False 76 | self.current_step = -1 77 | self.last_distance = None 78 | obs = super().reset() 79 | return obs 80 | 81 | def _calculate_dist(self): 82 | flag = (self.flag_pos_x, self.flag_pos_y, self.flag_pos_z) 83 | goal = (self._blackboard.goal.x, self._blackboard.goal.y, 84 | self._blackboard.goal.z) 85 | dist = abs(flag[0] - goal[0]) + abs(flag[1] - goal[1]) + abs(flag[2] - 86 | goal[2]) 87 | return dist 88 | 89 | def _compute_reward(self, blackboard=None): 90 | reward = None 91 | self.current_step += 1 92 | if self.last_distance is None: 93 | dist = self._calculate_dist() 94 | reward = 0 95 | self.last_distance = dist 96 | else: 97 | dist = self._calculate_dist() 98 | reward = self.last_distance - dist 99 | self.last_distance = dist 100 | if dist == 0 and (self.hit is False): 101 | self.hit = True 102 | reward += (20 - self.current_step) 103 | return reward 104 | 105 | def step(self, action): 106 | enable_local_obs = self.env_config.get('enable_local_obs', False) 107 | if not enable_local_obs: 108 | obs, reward, done, info = super().step(action) 109 | return obs, reward, done, info 110 | else: 111 | return NotImplementedError 112 | -------------------------------------------------------------------------------- /PyMARL/src/runners/episode_runner.py: -------------------------------------------------------------------------------- 1 | from envs import REGISTRY as env_REGISTRY 2 | from functools import partial 3 | from components.episode_buffer import EpisodeBatch 4 | import numpy as np 5 | 6 | 7 | class EpisodeRunner: 8 | 9 | def __init__(self, args, logger): 10 | self.args = args 11 | self.logger = logger 12 | self.batch_size = self.args.batch_size_run 13 | assert self.batch_size == 1 14 | 15 | self.env = env_REGISTRY[self.args.env](**self.args.env_args) 16 | self.episode_limit = self.env.episode_limit 17 | self.t = 0 18 | 19 | self.t_env = 0 20 | 21 | self.train_returns = [] 22 | self.test_returns = [] 23 | self.train_stats = {} 24 | self.test_stats = {} 25 | 26 | # Log the first run 27 | self.log_train_stats_t = -1000000 28 | 29 | def setup(self, scheme, groups, preprocess, mac): 30 | self.new_batch = partial(EpisodeBatch, scheme, groups, self.batch_size, self.episode_limit + 1, 31 | preprocess=preprocess, device=self.args.device) 32 | self.mac = mac 33 | 34 | def get_env_info(self): 35 | return self.env.get_env_info() 36 | 37 | def save_replay(self): 38 | self.env.save_replay() 39 | 40 | def close_env(self): 41 | self.env.close() 42 | 43 | def reset(self): 44 | self.batch = self.new_batch() 45 | self.env.reset() 46 | self.t = 0 47 | 48 | def run(self, test_mode=False): 49 | self.reset() 50 | 51 | terminated = False 52 | episode_return = 0 53 | self.mac.init_hidden(batch_size=self.batch_size) 54 | 55 | while not terminated: 56 | 57 | pre_transition_data = { 58 | "state": [self.env.get_state()], 59 | "avail_actions": [self.env.get_avail_actions()], 60 | "obs": [self.env.get_obs()] 61 | } 62 | 63 | self.batch.update(pre_transition_data, ts=self.t) 64 | 65 | # Pass the entire batch of experiences up till now to the agents 66 | # Receive the actions for each agent at this timestep in a batch of size 1 67 | actions = self.mac.select_actions(self.batch, t_ep=self.t, t_env=self.t_env, test_mode=test_mode) 68 | 69 | reward, terminated, env_info = self.env.step(actions[0]) 70 | episode_return += reward 71 | 72 | post_transition_data = { 73 | "actions": actions, 74 | "reward": [(reward,)], 75 | "terminated": [(terminated != env_info.get("episode_limit", False),)], 76 | } 77 | 78 | self.batch.update(post_transition_data, ts=self.t) 79 | 80 | self.t += 1 81 | 82 | last_data = { 83 | "state": [self.env.get_state()], 84 | "avail_actions": [self.env.get_avail_actions()], 85 | "obs": [self.env.get_obs()] 86 | } 87 | self.batch.update(last_data, ts=self.t) 88 | 89 | # Select actions in the last stored state 90 | actions = self.mac.select_actions(self.batch, t_ep=self.t, t_env=self.t_env, test_mode=test_mode) 91 | self.batch.update({"actions": actions}, ts=self.t) 92 | 93 | cur_stats = self.test_stats if test_mode else self.train_stats 94 | cur_returns = self.test_returns if test_mode else self.train_returns 95 | log_prefix = "test_" if test_mode else "" 96 | cur_stats.update({k: cur_stats.get(k, 0) + env_info.get(k, 0) for k in set(cur_stats) | set(env_info)}) 97 | cur_stats["n_episodes"] = 1 + cur_stats.get("n_episodes", 0) 98 | cur_stats["ep_length"] = self.t + cur_stats.get("ep_length", 0) 99 | 100 | if not test_mode: 101 | self.t_env += self.t 102 | 103 | cur_returns.append(episode_return) 104 | 105 | if test_mode and (len(self.test_returns) == self.args.test_nepisode): 106 | self._log(cur_returns, cur_stats, log_prefix) 107 | elif self.t_env - self.log_train_stats_t >= self.args.runner_log_interval: 108 | self._log(cur_returns, cur_stats, log_prefix) 109 | if hasattr(self.mac.action_selector, "epsilon"): 110 | self.logger.log_stat("epsilon", self.mac.action_selector.epsilon, self.t_env) 111 | self.log_train_stats_t = self.t_env 112 | 113 | return self.batch 114 | 115 | def _log(self, returns, stats, prefix): 116 | self.logger.log_stat(prefix + "return_mean", np.mean(returns), self.t_env) 117 | self.logger.log_stat(prefix + "return_std", np.std(returns), self.t_env) 118 | returns.clear() 119 | 120 | for k, v in stats.items(): 121 | if k != "n_episodes": 122 | self.logger.log_stat(prefix + k + "_mean" , v/stats["n_episodes"], self.t_env) 123 | stats.clear() 124 | -------------------------------------------------------------------------------- /PyMARL/README.md.pymarl: -------------------------------------------------------------------------------- 1 | ```diff 2 | - Please pay attention to the version of SC2 you are using for your experiments. 3 | - Performance is *not* always comparable between versions. 4 | - The results in SMAC (https://arxiv.org/abs/1902.04043) use SC2.4.6.2.69232 not SC2.4.10. 5 | ``` 6 | 7 | # Python MARL framework 8 | 9 | PyMARL is [WhiRL](http://whirl.cs.ox.ac.uk)'s framework for deep multi-agent reinforcement learning and includes implementations of the following algorithms: 10 | - [**QMIX**: QMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1803.11485) 11 | - [**COMA**: Counterfactual Multi-Agent Policy Gradients](https://arxiv.org/abs/1705.08926) 12 | - [**VDN**: Value-Decomposition Networks For Cooperative Multi-Agent Learning](https://arxiv.org/abs/1706.05296) 13 | - [**IQL**: Independent Q-Learning](https://arxiv.org/abs/1511.08779) 14 | - [**QTRAN**: QTRAN: Learning to Factorize with Transformation for Cooperative Multi-Agent Reinforcement Learning](https://arxiv.org/abs/1905.05408) 15 | 16 | PyMARL is written in PyTorch and uses [SMAC](https://github.com/oxwhirl/smac) as its environment. 17 | 18 | ## Installation instructions 19 | 20 | Build the Dockerfile using 21 | ```shell 22 | cd docker 23 | bash build.sh 24 | ``` 25 | 26 | Set up StarCraft II and SMAC: 27 | ```shell 28 | bash install_sc2.sh 29 | ``` 30 | 31 | This will download SC2 into the 3rdparty folder and copy the maps necessary to run over. 32 | 33 | The requirements.txt file can be used to install the necessary packages into a virtual environment (not recomended). 34 | 35 | ## Run an experiment 36 | 37 | ```shell 38 | python3 src/main.py --config=qmix --env-config=sc2 with env_args.map_name=2s3z 39 | ``` 40 | 41 | The config files act as defaults for an algorithm or environment. 42 | 43 | They are all located in `src/config`. 44 | `--config` refers to the config files in `src/config/algs` 45 | `--env-config` refers to the config files in `src/config/envs` 46 | 47 | To run experiments using the Docker container: 48 | ```shell 49 | bash run.sh $GPU python3 src/main.py --config=qmix --env-config=sc2 with env_args.map_name=2s3z 50 | ``` 51 | 52 | All results will be stored in the `Results` folder. 53 | 54 | The previous config files used for the SMAC Beta have the suffix `_beta`. 55 | 56 | ## Saving and loading learnt models 57 | 58 | ### Saving models 59 | 60 | You can save the learnt models to disk by setting `save_model = True`, which is set to `False` by default. The frequency of saving models can be adjusted using `save_model_interval` configuration. Models will be saved in the result directory, under the folder called *models*. The directory corresponding each run will contain models saved throughout the experiment, each within a folder corresponding to the number of timesteps passed since starting the learning process. 61 | 62 | ### Loading models 63 | 64 | Learnt models can be loaded using the `checkpoint_path` parameter, after which the learning will proceed from the corresponding timestep. 65 | 66 | ## Watching StarCraft II replays 67 | 68 | `save_replay` option allows saving replays of models which are loaded using `checkpoint_path`. Once the model is successfully loaded, `test_nepisode` number of episodes are run on the test mode and a .SC2Replay file is saved in the Replay directory of StarCraft II. Please make sure to use the episode runner if you wish to save a replay, i.e., `runner=episode`. The name of the saved replay file starts with the given `env_args.save_replay_prefix` (map_name if empty), followed by the current timestamp. 69 | 70 | The saved replays can be watched by double-clicking on them or using the following command: 71 | 72 | ```shell 73 | python -m pysc2.bin.play --norender --rgb_minimap_size 0 --replay NAME.SC2Replay 74 | ``` 75 | 76 | **Note:** Replays cannot be watched using the Linux version of StarCraft II. Please use either the Mac or Windows version of the StarCraft II client. 77 | 78 | ## Documentation/Support 79 | 80 | Documentation is a little sparse at the moment (but will improve!). Please raise an issue in this repo, or email [Tabish](mailto:tabish.rashid@cs.ox.ac.uk) 81 | 82 | ## Citing PyMARL 83 | 84 | If you use PyMARL in your research, please cite the [SMAC paper](https://arxiv.org/abs/1902.04043). 85 | 86 | *M. Samvelyan, T. Rashid, C. Schroeder de Witt, G. Farquhar, N. Nardelli, T.G.J. Rudner, C.-M. Hung, P.H.S. Torr, J. Foerster, S. Whiteson. The StarCraft Multi-Agent Challenge, CoRR abs/1902.04043, 2019.* 87 | 88 | In BibTeX format: 89 | 90 | ```tex 91 | @article{samvelyan19smac, 92 | title = {{The} {StarCraft} {Multi}-{Agent} {Challenge}}, 93 | author = {Mikayel Samvelyan and Tabish Rashid and Christian Schroeder de Witt and Gregory Farquhar and Nantas Nardelli and Tim G. J. Rudner and Chia-Man Hung and Philiph H. S. Torr and Jakob Foerster and Shimon Whiteson}, 94 | journal = {CoRR}, 95 | volume = {abs/1902.04043}, 96 | year = {2019}, 97 | } 98 | ``` 99 | 100 | ## License 101 | 102 | Code licensed under the Apache License v2.0 103 | -------------------------------------------------------------------------------- /PyMARL/src/controllers/basic_controller.py: -------------------------------------------------------------------------------- 1 | from modules.agents import REGISTRY as agent_REGISTRY 2 | from components.action_selectors import REGISTRY as action_REGISTRY 3 | import torch as th 4 | 5 | 6 | # This multi-agent controller shares parameters between agents 7 | class BasicMAC: 8 | def __init__(self, scheme, groups, args): 9 | self.n_agents = args.n_agents 10 | self.args = args 11 | input_shape = self._get_input_shape(scheme) 12 | self._build_agents(input_shape) 13 | self.agent_output_type = args.agent_output_type 14 | 15 | self.action_selector = action_REGISTRY[args.action_selector](args) 16 | 17 | self.hidden_states = None 18 | 19 | def select_actions(self, ep_batch, t_ep, t_env, bs=slice(None), test_mode=False): 20 | # print("State", ep_batch["state"][:, t_ep]) 21 | # print("State") 22 | # for i in range(0, 15): 23 | # print(ep_batch["state"][:, t_ep][0][13* i: 13*(i+1)]) 24 | # print() 25 | # Only select actions for the selected batch elements in bs 26 | avail_actions = ep_batch["avail_actions"][:, t_ep] 27 | agent_outputs = self.forward(ep_batch, t_ep, test_mode=test_mode) 28 | chosen_actions = self.action_selector.select_action(agent_outputs[bs], avail_actions[bs], t_env, test_mode=test_mode) 29 | return chosen_actions 30 | 31 | def forward(self, ep_batch, t, test_mode=False): 32 | agent_inputs = self._build_inputs(ep_batch, t) 33 | avail_actions = ep_batch["avail_actions"][:, t] 34 | agent_outs, self.hidden_states = self.agent(agent_inputs, self.hidden_states) 35 | 36 | # Softmax the agent outputs if they're policy logits 37 | if self.agent_output_type == "pi_logits": 38 | 39 | if getattr(self.args, "mask_before_softmax", True): 40 | # Make the logits for unavailable actions very negative to minimise their affect on the softmax 41 | reshaped_avail_actions = avail_actions.reshape(ep_batch.batch_size * self.n_agents, -1) 42 | agent_outs[reshaped_avail_actions == 0] = -1e10 43 | 44 | agent_outs = th.nn.functional.softmax(agent_outs, dim=-1) 45 | if not test_mode: 46 | # Epsilon floor 47 | epsilon_action_num = agent_outs.size(-1) 48 | if getattr(self.args, "mask_before_softmax", True): 49 | # With probability epsilon, we will pick an available action uniformly 50 | epsilon_action_num = reshaped_avail_actions.sum(dim=1, keepdim=True).float() 51 | 52 | agent_outs = ((1 - self.action_selector.epsilon) * agent_outs 53 | + th.ones_like(agent_outs) * self.action_selector.epsilon/epsilon_action_num) 54 | 55 | if getattr(self.args, "mask_before_softmax", True): 56 | # Zero out the unavailable actions 57 | agent_outs[reshaped_avail_actions == 0] = 0.0 58 | 59 | return agent_outs.view(ep_batch.batch_size, self.n_agents, -1) 60 | 61 | def init_hidden(self, batch_size): 62 | self.hidden_states = self.agent.init_hidden().unsqueeze(0).expand(batch_size, self.n_agents, -1) # bav 63 | 64 | def parameters(self): 65 | return self.agent.parameters() 66 | 67 | def load_state(self, other_mac): 68 | self.agent.load_state_dict(other_mac.agent.state_dict()) 69 | 70 | def cuda(self): 71 | self.agent.cuda() 72 | 73 | def save_models(self, path): 74 | th.save(self.agent.state_dict(), "{}/agent.th".format(path)) 75 | 76 | def load_models(self, path): 77 | self.agent.load_state_dict(th.load("{}/agent.th".format(path), map_location=lambda storage, loc: storage)) 78 | 79 | def _build_agents(self, input_shape): 80 | self.agent = agent_REGISTRY[self.args.agent](input_shape, self.args) 81 | 82 | def _build_inputs(self, batch, t): 83 | # Assumes homogenous agents with flat observations. 84 | # Other MACs might want to e.g. delegate building inputs to each agent 85 | bs = batch.batch_size 86 | inputs = [] 87 | inputs.append(batch["obs"][:, t]) # b1av 88 | if self.args.obs_last_action: 89 | if t == 0: 90 | inputs.append(th.zeros_like(batch["actions_onehot"][:, t])) 91 | else: 92 | inputs.append(batch["actions_onehot"][:, t-1]) 93 | if self.args.obs_agent_id: 94 | inputs.append(th.eye(self.n_agents, device=batch.device).unsqueeze(0).expand(bs, -1, -1)) 95 | 96 | inputs = th.cat([x.reshape(bs*self.n_agents, -1) for x in inputs], dim=1) 97 | return inputs 98 | 99 | def _get_input_shape(self, scheme): 100 | input_shape = scheme["obs"]["vshape"] 101 | if self.args.obs_last_action: 102 | input_shape += scheme["actions_onehot"]["vshape"][0] 103 | if self.args.obs_agent_id: 104 | input_shape += self.n_agents 105 | 106 | return input_shape 107 | -------------------------------------------------------------------------------- /PyMARL/src/modules/mixers/qtran.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | 7 | class QTranBase(nn.Module): 8 | def __init__(self, args): 9 | super(QTranBase, self).__init__() 10 | 11 | self.args = args 12 | 13 | self.n_agents = args.n_agents 14 | self.n_actions = args.n_actions 15 | self.state_dim = int(np.prod(args.state_shape)) 16 | self.arch = self.args.qtran_arch # QTran architecture 17 | 18 | self.embed_dim = args.mixing_embed_dim 19 | 20 | # Q(s,u) 21 | if self.arch == "coma_critic": 22 | # Q takes [state, u] as input 23 | q_input_size = self.state_dim + (self.n_agents * self.n_actions) 24 | elif self.arch == "qtran_paper": 25 | # Q takes [state, agent_action_observation_encodings] 26 | q_input_size = self.state_dim + self.args.rnn_hidden_dim + self.n_actions 27 | else: 28 | raise Exception("{} is not a valid QTran architecture".format(self.arch)) 29 | 30 | if self.args.network_size == "small": 31 | self.Q = nn.Sequential(nn.Linear(q_input_size, self.embed_dim), 32 | nn.ReLU(), 33 | nn.Linear(self.embed_dim, self.embed_dim), 34 | nn.ReLU(), 35 | nn.Linear(self.embed_dim, 1)) 36 | 37 | # V(s) 38 | self.V = nn.Sequential(nn.Linear(self.state_dim, self.embed_dim), 39 | nn.ReLU(), 40 | nn.Linear(self.embed_dim, self.embed_dim), 41 | nn.ReLU(), 42 | nn.Linear(self.embed_dim, 1)) 43 | ae_input = self.args.rnn_hidden_dim + self.n_actions 44 | self.action_encoding = nn.Sequential(nn.Linear(ae_input, ae_input), 45 | nn.ReLU(), 46 | nn.Linear(ae_input, ae_input)) 47 | elif self.args.network_size == "big": 48 | self.Q = nn.Sequential(nn.Linear(q_input_size, self.embed_dim), 49 | nn.ReLU(), 50 | nn.Linear(self.embed_dim, self.embed_dim), 51 | nn.ReLU(), 52 | nn.Linear(self.embed_dim, self.embed_dim), 53 | nn.ReLU(), 54 | nn.Linear(self.embed_dim, 1)) 55 | # V(s) 56 | self.V = nn.Sequential(nn.Linear(self.state_dim, self.embed_dim), 57 | nn.ReLU(), 58 | nn.Linear(self.embed_dim, self.embed_dim), 59 | nn.ReLU(), 60 | nn.Linear(self.embed_dim, self.embed_dim), 61 | nn.ReLU(), 62 | nn.Linear(self.embed_dim, 1)) 63 | ae_input = self.args.rnn_hidden_dim + self.n_actions 64 | self.action_encoding = nn.Sequential(nn.Linear(ae_input, ae_input), 65 | nn.ReLU(), 66 | nn.Linear(ae_input, ae_input)) 67 | else: 68 | assert False 69 | 70 | def forward(self, batch, hidden_states, actions=None): 71 | bs = batch.batch_size 72 | ts = batch.max_seq_length 73 | 74 | states = batch["state"].reshape(bs * ts, self.state_dim) 75 | 76 | if self.arch == "coma_critic": 77 | if actions is None: 78 | # Use the actions taken by the agents 79 | actions = batch["actions_onehot"].reshape(bs * ts, self.n_agents * self.n_actions) 80 | else: 81 | # It will arrive as (bs, ts, agents, actions), we need to reshape it 82 | actions = actions.reshape(bs * ts, self.n_agents * self.n_actions) 83 | inputs = th.cat([states, actions], dim=1) 84 | elif self.arch == "qtran_paper": 85 | if actions is None: 86 | # Use the actions taken by the agents 87 | actions = batch["actions_onehot"].reshape(bs * ts, self.n_agents, self.n_actions) 88 | else: 89 | # It will arrive as (bs, ts, agents, actions), we need to reshape it 90 | actions = actions.reshape(bs * ts, self.n_agents, self.n_actions) 91 | 92 | hidden_states = hidden_states.reshape(bs * ts, self.n_agents, -1) 93 | agent_state_action_input = th.cat([hidden_states, actions], dim=2) 94 | agent_state_action_encoding = self.action_encoding(agent_state_action_input.reshape(bs * ts * self.n_agents, -1)).reshape(bs * ts, self.n_agents, -1) 95 | agent_state_action_encoding = agent_state_action_encoding.sum(dim=1) # Sum across agents 96 | 97 | inputs = th.cat([states, agent_state_action_encoding], dim=1) 98 | 99 | q_outputs = self.Q(inputs) 100 | 101 | states = batch["state"].reshape(bs * ts, self.state_dim) 102 | v_outputs = self.V(states) 103 | 104 | return q_outputs, v_outputs 105 | 106 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/free_env.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | import numpy as np 4 | import yaml 5 | 6 | from .craft_env import CraftEnv 7 | from .grid_objs import ObjType 8 | 9 | 10 | class FreeEnv(CraftEnv): 11 | 12 | def __init__(self, enable_render, init_blueprint_path, env_config): 13 | super().__init__(enable_render, init_blueprint_path, env_config) 14 | self.key_mapping = { 15 | "block": 1, 16 | "folded_slope": 2, 17 | "unfolded_body": 3, 18 | "unfolded_foot": 4 19 | } 20 | self.design_list, self.design_dict = self.read_design( 21 | env_config["design_path"]) 22 | ########## 23 | temp = 0 24 | for k in self.design_dict.keys(): 25 | temp += len(self.design_dict[k]) 26 | self.design_length = temp 27 | print("Direct load from pymarl succeed") 28 | print("design_length", self.design_length) 29 | ########## 30 | self.last_pos_dict = None 31 | 32 | def read_design(self, design_path): 33 | with open(design_path) as f: 34 | source = yaml.load(f, Loader=yaml.loader.SafeLoader) 35 | design_list = np.zeros(self.area_size) 36 | design_dict = { 37 | "block": [], 38 | "folded_slope": [], 39 | "unfolded_body": [], 40 | "unfolded_foot": [] 41 | } 42 | for key in self.key_mapping.keys(): 43 | if key not in source.keys(): 44 | continue 45 | for obj in source[key]: 46 | x, y, z = int(obj['x']), int(obj['y']), int(obj['z']) 47 | design_list[x][y][z] = self.key_mapping[key] 48 | design_dict[key].append((x, y, z)) 49 | return design_list, design_dict 50 | 51 | def get_pos_list(self): 52 | raise NotImplementedError 53 | 54 | def get_pos_dict(self): 55 | result = { 56 | "block": [], 57 | "folded_slope": [], 58 | "unfolded_body": [], 59 | "unfolded_foot": [] 60 | } 61 | grid = self._blackboard.grid 62 | for i, j, k in itertools.product(range(self.area_size[0]), 63 | range(self.area_size[1]), 64 | range(self.area_size[2])): 65 | obj = grid[i][j][k] 66 | if obj.type is ObjType.Block or obj.type is \ 67 | ObjType.FoldedSlopeGear: 68 | result["block"].append((i, j, k)) 69 | elif obj.type is ObjType.FoldedSlope: 70 | result["folded_slope"].append((i, j, k)) 71 | elif obj.type is ObjType.UnfoldedSlopeBody: 72 | result["unfolded_body"].append((i, j, k)) 73 | elif obj.type is ObjType.UnfoldedSlopeFoot: 74 | result["unfolded_foot"].append((i, j, k)) 75 | else: 76 | pass 77 | return result 78 | 79 | def reset(self): 80 | self.design_list, self.design_dict = self.read_design( 81 | self.env_config["design_path"]) 82 | self.last_pos_dict = None 83 | obs = super().reset() 84 | return obs 85 | 86 | def compute_score(self, pos_dict): 87 | blocks = pos_dict["block"] 88 | visited = {block: False for block in blocks} 89 | counter = [0 for _ in range(0, len(blocks))] 90 | index = 0 91 | for block in blocks: 92 | if visited[block] is True: 93 | continue 94 | else: 95 | visited[block] = True 96 | counter[index] = 1 97 | stack = [block] 98 | while len(stack) != 0: 99 | top = stack.pop() 100 | neighbors = [(top[0] - 1, top[1], top[2]), 101 | (top[0] + 1, top[1], top[2]), 102 | (top[0], top[1] - 1, top[2]), 103 | (top[0], top[1] + 1, top[2])] 104 | for neighbor in neighbors: 105 | if neighbor in blocks and visited[neighbor] is False: 106 | visited[neighbor] = True 107 | counter[index] += 1 108 | stack.append(neighbor) 109 | index += 1 110 | score = 0 111 | for c in counter: 112 | if c == 0: 113 | break 114 | if c == 1: 115 | continue 116 | elif c > score: 117 | score = c 118 | return score 119 | 120 | def _compute_reward(self, blackboard=None): 121 | reward = 0 122 | before = 0 123 | after = 0 124 | if self.last_pos_dict is not None: 125 | before = self.compute_score(self.last_pos_dict) 126 | reward -= before 127 | current_pos_dict = self.get_pos_dict() 128 | after = self.compute_score(current_pos_dict) 129 | reward += after 130 | self.last_pos_dict = current_pos_dict 131 | return reward 132 | 133 | def step(self, action): 134 | enable_local_obs = self.env_config.get('enable_local_obs', False) 135 | if not enable_local_obs: 136 | obs, reward, done, info = super().step(action) 137 | return obs, reward, done, info 138 | else: 139 | return NotImplementedError 140 | -------------------------------------------------------------------------------- /PyMARL/src/painter.py: -------------------------------------------------------------------------------- 1 | import json 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import seaborn as sns; sns.set() 5 | 6 | plt.switch_backend('agg') 7 | 8 | info = { 9 | "QMIX_TEST":{ 10 | "lr=0.005, epsilon=0.2": [15, 17, 16], 11 | "lr=0.005, epsilon=0.3": [18, 20, 19], 12 | "lr=0.0005, epsilon=0.2": [23, 21, 22], 13 | "lr=0.0005, epsilon=0.3": [25, 26, 24], 14 | } 15 | } 16 | 17 | info = { 18 | "The complex two-story building task":{ 19 | "QMIX": [40, 28, 27], # 40 43 28 27 20 | "QTRAN": [48, 49, 36, 35, ], 21 | "VDN": [47, 41, 38, 37, ], 22 | "IQL": [42, 44, 32, 31], 23 | "MAPPO": [45, 51, 34, 33, ], 24 | "COMA": [46, 30] 25 | }, 26 | "The simple two-story building task":{ 27 | "QMIX": [90, 71, 82, 68, ], 28 | "QTRAN": [84, 77, 81, 67, ], 29 | "VDN": [80, 75, 87, 76], 30 | "IQL": [79, 72, 85, 74], 31 | "MAPPO": [83, 69, 89, 73, ], 32 | "COMA": [88, 70, 86, 66] 33 | }, 34 | "The block-shaped building task":{ 35 | "QMIX": [104, 105, 106, 107, 108], 36 | "QTRAN": [109, 110, 111, 112, 113], 37 | "VDN": [114, 115, 116, 117, 118], 38 | "IQL": [119, 120, 121, 122, 123], 39 | "MAPPO": [124, 125, 126, 127, 128], 40 | "COMA": [130, 131, 132] # 129 133 41 | }, 42 | "The strip-shaped building task": { 43 | "QMIX": [134, 135, 136, 137, 138], 44 | "QTRAN": [139, 140, 141, 142, 143], 45 | "VDN": [144, 145, 146, 147, 148], 46 | "IQL": [149, 150, 151, 152, 153], 47 | "MAPPO": [154, 155, 156, 157, 158], 48 | "COMA": [159, 160, 161, 162, 163] 49 | } 50 | } 51 | 52 | data = {} 53 | 54 | # read data... 55 | for task, algos in info.items(): 56 | for algo, labels in algos.items(): 57 | for label in labels: 58 | path = "craft/PyMARL/results/sacred/" + str(label) + "/info.json" 59 | print(path) 60 | data1 = json.load(open(path)) 61 | return_mean_T = data1['return_mean_T'] 62 | return_mean = data1['return_mean'] 63 | # print(return_mean_T) 64 | # print(return_mean) 65 | test_return_mean_T = data1['test_return_mean_T'] 66 | test_return_mean = data1['test_return_mean'] 67 | # print(test_return_mean_T) 68 | # print(test_return_mean) 69 | 70 | x = np.array(return_mean_T) 71 | y = np.array(return_mean) 72 | 73 | x = np.array(test_return_mean_T) 74 | y = np.array(test_return_mean) 75 | 76 | if task not in data: 77 | data[task] = {} 78 | if algo not in data[task]: 79 | data[task][algo] = [] 80 | data[task][algo].append((x, y)) 81 | 82 | 83 | # plot! 84 | from matplotlib.pyplot import figure 85 | figure(figsize=(5, 4), dpi=80) 86 | for task in sorted(data.keys()): 87 | plt.clf() 88 | for algo in sorted(data[task].keys()): 89 | xs, ys = zip(*data[task][algo]) 90 | xs, ys = np.array(xs), np.array(ys) 91 | def cut(x, length): 92 | x_cut = np.empty([x.shape[0], length]) 93 | for i in range(x.shape[0]): 94 | x_cut[i] = x[i][0:length] 95 | return x_cut 96 | min_length = min(xs[i].shape[0] for i in range(xs.shape[0])) 97 | if task in ["task 06", "task 07"]: 98 | cutter = -1 99 | for i in range(0, xs[0].shape[0]): 100 | if xs[0][i] >= 1e6: 101 | cutter = i 102 | break 103 | min_length = min(min_length, cutter) 104 | print(min_length) 105 | xs = cut(xs, min_length) 106 | ys = cut(ys, min_length) 107 | assert xs.shape == ys.shape 108 | label = algo 109 | # Calculate for success rate 110 | success_rate_flag = False 111 | if success_rate_flag: 112 | success_rate = [[] for _ in range(0, len(ys))] 113 | max_val = None 114 | if task == "The strip-shaped building task": 115 | ranges = [8e5, 1e6] 116 | max_val = 4 117 | if task == "The block-shaped building task": 118 | ranges = [8e5, 1e6] 119 | max_val = 4 120 | if task == "The simple two-story building task": 121 | ranges = [1.5e6, 2e6] 122 | max_val = 3 123 | if task == "The complex two-story building task": 124 | ranges = [1.5e6, 2e6] 125 | max_val = 20 126 | if max_val is None: 127 | continue 128 | else: 129 | for i in range(0, len(ys[0])): 130 | for j in range(0, len(ys)): 131 | success = 0 132 | total = 1 133 | if ys[j][i] >= max_val: 134 | success += 1 135 | success_rate[j].append(success / total) 136 | print(len(success_rate)) 137 | ys = np.array(success_rate) 138 | print(ys.shape) 139 | 140 | plt.plot(xs[0], np.mean(ys, axis=0), label=label, linewidth=2, alpha=1.) 141 | plt.fill_between(xs[0], np.mean(ys, axis=0)+np.std(ys, axis=0), np.mean(ys, axis=0)-np.std(ys, axis=0), alpha=0.25) 142 | plt.title('{}'.format(task)) 143 | plt.legend() 144 | if success_rate_flag: 145 | plt.savefig("SR_{}.pdf".format(task), bbox_inches='tight') 146 | else: 147 | plt.savefig("{}.pdf".format(task), bbox_inches='tight') 148 | 149 | 150 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/data/urdf/smartcar/smartcar.urdf.xacro: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | transmission_interface/SimpleTransmission 66 | 67 | hardware_interface/VelocityJointInterface 68 | 69 | 70 | 1 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | transmission_interface/SimpleTransmission 130 | 131 | hardware_interface/PositionJointInterface 132 | 133 | 134 | hardware_interface/PositionJointInterface 135 | 1 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/planner/smartcar_planner.py: -------------------------------------------------------------------------------- 1 | """ 2 | A meta planner for smartcars planning in multi-flat 4-connected grid maps, 3 | which supports planning with rotation and non-diagonal movement under action_mask constraints. 4 | """ 5 | from ..grid_objs import FoldedSlope 6 | from ..grid_objs import ObjType 7 | 8 | 9 | class SmartCarNode: 10 | def __init__(self, 11 | x, 12 | y, 13 | z=1, 14 | yaw=0, 15 | is_lift=False, 16 | lift_obj=None, 17 | moving_over_slope=0): 18 | self.x = x 19 | self.y = y 20 | self.z = z 21 | self.yaw = yaw 22 | self.is_lift = is_lift 23 | self.lift_obj = lift_obj 24 | self.moving_over_slope = moving_over_slope 25 | 26 | @property 27 | def key(self): 28 | return (self.x, self.y, self.z, self.yaw) 29 | 30 | @property 31 | def pos(self): 32 | return (self.x, self.y, self.z) 33 | 34 | def copy(self): 35 | return SmartCarNode( 36 | x=self.x, 37 | y=self.y, 38 | z=self.z, 39 | yaw=self.yaw, 40 | is_lift=self.is_lift, 41 | lift_obj=self.new_obj(self.lift_obj), 42 | moving_over_slope=self.moving_over_slope) 43 | 44 | def new_obj(self, obj): 45 | return FoldedSlope(obj.yaw) if obj is not None and obj.type is ObjType.FoldedSlope else obj 46 | 47 | 48 | class SmartCarPlanner: 49 | Node = SmartCarNode 50 | 51 | def __init__(self, blackboard) -> None: 52 | self.move_cost = 1.0 53 | self.rotate_cost = 1.0 54 | self.blackboard = blackboard 55 | self.action_mask_proxy = None 56 | self.grid = None 57 | self.agent_id = None 58 | 59 | def reset(self): 60 | """ 61 | Call blackboard.reset() before this function 62 | """ 63 | self.action_mask_proxy = self.blackboard.action_mask_proxy 64 | self._set_grid(self.blackboard.grid) 65 | 66 | def _set_grid(self, grid): 67 | self.grid = grid 68 | self.length = len(grid) 69 | self.width = len(grid[0]) 70 | self.height = len(grid[0][0]) 71 | 72 | def is_inbound(self, x, y): 73 | if x < 0 or x >= self.length: 74 | return False 75 | if y < 0 or y >= self.width: 76 | return False 77 | return True 78 | 79 | def can_move(self, node: SmartCarNode, move_dir): 80 | if not self.is_inbound(node.x + move_dir[0], node.y + move_dir[1]): 81 | return False 82 | 83 | kw = dict( 84 | x=node.x, 85 | y=node.y, 86 | z=node.z, 87 | yaw=node.yaw, 88 | is_lift=node.is_lift, 89 | lift_obj=node.lift_obj, 90 | moving_over_slope=node.moving_over_slope 91 | ) 92 | return self.action_mask_proxy.move_action_mask(move_dir, self.agent_id, kw) 93 | 94 | def can_rotate(self, node: SmartCarNode, rotate_dir): 95 | kw = dict( 96 | x=node.x, 97 | y=node.y, 98 | z=node.z, 99 | yaw=node.yaw, 100 | is_lift=node.is_lift, 101 | lift_obj=node.lift_obj, 102 | moving_over_slope=node.moving_over_slope 103 | ) 104 | return self.action_mask_proxy.rotate_mask(rotate_dir, self.agent_id, kw) 105 | 106 | def get_moved_node(self, curr_node: SmartCarNode, move_dir): 107 | node = curr_node.copy() 108 | node.x += move_dir[0] 109 | node.y += move_dir[1] 110 | 111 | obj = self.grid[node.x][node.y][node.z] 112 | blow_obj = self.grid[node.x][node.y][node.z - 1] 113 | if obj.type is ObjType.UnfoldedSlopeFoot and node.moving_over_slope == 0: 114 | node.moving_over_slope = 1 115 | elif obj.type is ObjType.UnfoldedSlopeBody and node.moving_over_slope == 1: 116 | node.moving_over_slope = 2 117 | node.z += 1 118 | elif blow_obj.type is ObjType.UnfoldedSlopeBody and node.moving_over_slope == 0: 119 | node.moving_over_slope = 2 120 | elif blow_obj.type is ObjType.UnfoldedSlopeFoot and node.moving_over_slope == 2: 121 | node.moving_over_slope = 1 122 | node.z -= 1 123 | else: 124 | node.moving_over_slope = 0 125 | return node 126 | 127 | def get_rotated_node(self, curr_node: SmartCarNode, rotate_dir): 128 | node = curr_node.copy() 129 | node.yaw = (node.yaw + rotate_dir) % 4 130 | if node.is_lift and node.lift_obj.type is ObjType.FoldedSlope: 131 | node.lift_obj.yaw = (node.lift_obj.yaw + rotate_dir) % 4 132 | return node 133 | 134 | def get_successors(self, curr_node: SmartCarNode): 135 | successors = [] 136 | 137 | # move action 138 | for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]: 139 | if not self.can_move(curr_node, (dx, dy)): 140 | continue 141 | node = self.get_moved_node(curr_node, (dx, dy)) 142 | successors.append(node) 143 | 144 | # rotate action 145 | for d_yaw in [-1, 1]: 146 | if not self.can_rotate(curr_node, d_yaw): 147 | continue 148 | node = self.get_rotated_node(curr_node, d_yaw) 149 | successors.append(node) 150 | 151 | return successors 152 | 153 | def plan(self, agent_id, start_x, start_y, start_z, yaw, is_lift, lift_obj, moving_over_slope, 154 | goal_x, goal_y, goal_z, verbose=False): 155 | 156 | assert self.grid is not None, \ 157 | 'Grid map not specified, please call set_grid() before planning' 158 | 159 | self.agent_id = agent_id 160 | 161 | start_node = self.Node( 162 | x=start_x, y=start_y, z=start_z, 163 | yaw=yaw, is_lift=is_lift, lift_obj=lift_obj, 164 | moving_over_slope=moving_over_slope) 165 | 166 | goal_node = self.Node(x=goal_x, y=goal_y, z=goal_z) 167 | 168 | return self._plan(start_node, goal_node, verbose) 169 | 170 | def _plan(self, start_node, goal_node, verbose): 171 | raise NotImplementedError 172 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/matrix_env.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import random 3 | import time 4 | 5 | import gym 6 | import numpy as np 7 | import pybullet 8 | from pybullet_utils import bullet_client 9 | 10 | from craft import utils 11 | 12 | from .action_mask import ActionMask 13 | from .blackboard import Blackboard 14 | from .goal import Goal 15 | from .matrix_to_bullet import MatrixToBullet 16 | from .planner import BreadthFirstSearch 17 | from .scene import Scene 18 | from .smartcar import Smartcar 19 | 20 | 21 | class MatrixEnv(gym.Env): 22 | 23 | def __init__(self, enable_render, init_blueprint_path, env_config): 24 | self.enable_render = enable_render 25 | self._blackboard = Blackboard(init_blueprint_path) 26 | search_depth = env_config.get('search_depth', 10) 27 | self.smartcars = [ 28 | Smartcar(self._blackboard, i, search_depth=search_depth) 29 | for i in range(self._blackboard.smartcar_num) 30 | ] 31 | self.scene = Scene(self._blackboard) 32 | 33 | self._blackboard.smartcars = self.smartcars 34 | self._blackboard.scene = self.scene 35 | self._blackboard.action_mask_proxy = ActionMask(self._blackboard) 36 | self._blackboard.bfs = BreadthFirstSearch(self._blackboard) 37 | 38 | self._blackboard.goal = Goal(self._blackboard) 39 | 40 | if self.enable_render: 41 | self._bullet_client = bullet_client.BulletClient( 42 | connection_mode=pybullet.GUI) 43 | else: 44 | self._bullet_client = bullet_client.BulletClient( 45 | connection_mode=pybullet.DIRECT) 46 | self._blackboard._bullet_client = self._bullet_client 47 | self.matrix_to_bullet = MatrixToBullet(self._bullet_client, 48 | self._blackboard) 49 | self._step = 0 50 | self.env_config = env_config 51 | if "work_mode" in self.env_config: 52 | # 0: train mode, 1: record mode, 2: play mode 53 | self.work_mode = self.env_config["work_mode"] 54 | else: 55 | self.work_mode = 0 56 | self._total_step = 0 57 | 58 | def predict(self, action: list, blackboard=None): 59 | """predict the observation after an action with specific blackboard.""" 60 | predict_blackboard = blackboard 61 | if predict_blackboard is None: 62 | predict_blackboard = copy.deepcopy(self._blackboard) 63 | 64 | reward = 0 65 | done = False 66 | action_order = np.arange(predict_blackboard.smartcar_num) 67 | random.shuffle(action_order) 68 | for i in action_order: 69 | predict_blackboard.smartcars[i].step(action[i]) 70 | if done: 71 | reward = 1 72 | break 73 | done = self._is_done(predict_blackboard) 74 | obs = self.get_obs(predict_blackboard) 75 | info = {predict_blackboard} 76 | return obs, reward, done, info 77 | 78 | def reset(self): 79 | blueprint = random.choice(self._blackboard.blueprint_path_list) 80 | self._blackboard.reset(blueprint) 81 | self._blackboard.goal.reset() 82 | self.scene.reset() 83 | for smartcar in self.smartcars: 84 | smartcar.reset() 85 | self.matrix_to_bullet.sync() 86 | if self.work_mode == 1: 87 | self.record_action = [] 88 | self.record_action_order = [] 89 | elif self.work_mode == 2: 90 | assert "yaml_save_path" in self.env_config, \ 91 | "yaml_save_path arg is needed in play mode" 92 | assert "action_yaml_path" in self.env_config, \ 93 | "action_yaml_path arg is needed in play mode" 94 | self.record_action, self.record_action_order = \ 95 | utils.load_action2yaml(self._blackboard.smartcar_num, 96 | self.env_config['action_yaml_path']) 97 | self._step = 0 98 | 99 | return self.get_obs() 100 | 101 | def step(self, action: list): 102 | done = False 103 | action_order = np.arange(self._blackboard.smartcar_num) 104 | random.shuffle(action_order) 105 | 106 | if self.work_mode == 2: 107 | try: 108 | action, action_order = self.record_action[ 109 | self._step], self.record_action_order[self._step] 110 | except IndexError: 111 | print("record actions reach limit.") 112 | return None, 0, False, {} 113 | 114 | for i in action_order: 115 | self.smartcars[i].step(action[i]) 116 | 117 | obs = self.get_obs() 118 | reward = self._compute_reward() 119 | done = self._is_done() 120 | info = {} 121 | 122 | self.matrix_to_bullet.sync() 123 | if self.work_mode == 1: 124 | assert "yaml_save_path" in self.env_config, \ 125 | "yaml_save_path arg is needed in record mode" 126 | timestamp = time.strftime("%b-%d-%H:%M:%S", time.localtime()) 127 | utils.save_scene2yaml(self._blackboard, self._step, timestamp, 128 | self.env_config["yaml_save_path"]) 129 | self.record_action.append(action) 130 | self.record_action_order.append(action_order) 131 | if self._step == 0: 132 | self.act_timestamp = timestamp 133 | utils.save_action2yaml(self.record_action, 134 | self.record_action_order, 135 | self.act_timestamp, 136 | self.env_config["yaml_save_path"]) 137 | 138 | self._step += 1 139 | self._total_step += 1 140 | return obs, reward, done, info 141 | 142 | def get_obs(self, blackboard=None): 143 | raise NotImplementedError 144 | 145 | def _is_done(self, blackboard=None): 146 | """Indicates whether or not the episode is done.""" 147 | return False 148 | 149 | def _compute_reward(self, blackboard=None): 150 | """Calculates the reward to give based on the observations given.""" 151 | return 1 152 | 153 | def get_flag_pos(self, blackboard=None): 154 | raise NotImplementedError 155 | -------------------------------------------------------------------------------- /PyMARL/src/learners/q_learner.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from components.episode_buffer import EpisodeBatch 3 | from modules.mixers.vdn import VDNMixer 4 | from modules.mixers.qmix import QMixer 5 | import torch as th 6 | from torch.optim import RMSprop 7 | 8 | 9 | class QLearner: 10 | def __init__(self, mac, scheme, logger, args): 11 | self.args = args 12 | self.mac = mac 13 | self.logger = logger 14 | 15 | self.params = list(mac.parameters()) 16 | 17 | self.last_target_update_episode = 0 18 | 19 | self.mixer = None 20 | if args.mixer is not None: 21 | if args.mixer == "vdn": 22 | self.mixer = VDNMixer() 23 | elif args.mixer == "qmix": 24 | self.mixer = QMixer(args) 25 | else: 26 | raise ValueError("Mixer {} not recognised.".format(args.mixer)) 27 | self.params += list(self.mixer.parameters()) 28 | self.target_mixer = copy.deepcopy(self.mixer) 29 | 30 | self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) 31 | 32 | # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC 33 | self.target_mac = copy.deepcopy(mac) 34 | 35 | self.log_stats_t = -self.args.learner_log_interval - 1 36 | 37 | def train(self, batch: EpisodeBatch, t_env: int, episode_num: int): 38 | # Get the relevant quantities 39 | rewards = batch["reward"][:, :-1] 40 | actions = batch["actions"][:, :-1] 41 | terminated = batch["terminated"][:, :-1].float() 42 | mask = batch["filled"][:, :-1].float() 43 | mask[:, 1:] = mask[:, 1:] * (1 - terminated[:, :-1]) 44 | avail_actions = batch["avail_actions"] 45 | 46 | # Calculate estimated Q-Values 47 | mac_out = [] 48 | self.mac.init_hidden(batch.batch_size) 49 | for t in range(batch.max_seq_length): 50 | agent_outs = self.mac.forward(batch, t=t) 51 | mac_out.append(agent_outs) 52 | mac_out = th.stack(mac_out, dim=1) # Concat over time 53 | 54 | # Pick the Q-Values for the actions taken by each agent 55 | chosen_action_qvals = th.gather(mac_out[:, :-1], dim=3, index=actions).squeeze(3) # Remove the last dim 56 | 57 | # Calculate the Q-Values necessary for the target 58 | target_mac_out = [] 59 | self.target_mac.init_hidden(batch.batch_size) 60 | for t in range(batch.max_seq_length): 61 | target_agent_outs = self.target_mac.forward(batch, t=t) 62 | target_mac_out.append(target_agent_outs) 63 | 64 | # We don't need the first timesteps Q-Value estimate for calculating targets 65 | target_mac_out = th.stack(target_mac_out[1:], dim=1) # Concat across time 66 | 67 | # Mask out unavailable actions 68 | target_mac_out[avail_actions[:, 1:] == 0] = -9999999 69 | 70 | # Max over target Q-Values 71 | if self.args.double_q: 72 | # Get actions that maximise live Q (for double q-learning) 73 | mac_out_detach = mac_out.clone().detach() 74 | mac_out_detach[avail_actions == 0] = -9999999 75 | cur_max_actions = mac_out_detach[:, 1:].max(dim=3, keepdim=True)[1] 76 | target_max_qvals = th.gather(target_mac_out, 3, cur_max_actions).squeeze(3) 77 | else: 78 | target_max_qvals = target_mac_out.max(dim=3)[0] 79 | 80 | # Mix 81 | if self.mixer is not None: 82 | chosen_action_qvals = self.mixer(chosen_action_qvals, batch["state"][:, :-1]) 83 | target_max_qvals = self.target_mixer(target_max_qvals, batch["state"][:, 1:]) 84 | 85 | # Calculate 1-step Q-Learning targets 86 | targets = rewards + self.args.gamma * (1 - terminated) * target_max_qvals 87 | 88 | # Td-error 89 | td_error = (chosen_action_qvals - targets.detach()) 90 | 91 | mask = mask.expand_as(td_error) 92 | 93 | # 0-out the targets that came from padded data 94 | masked_td_error = td_error * mask 95 | 96 | # Normal L2 loss, take mean over actual data 97 | loss = (masked_td_error ** 2).sum() / mask.sum() 98 | 99 | # Optimise 100 | self.optimiser.zero_grad() 101 | loss.backward() 102 | grad_norm = th.nn.utils.clip_grad_norm_(self.params, self.args.grad_norm_clip) 103 | self.optimiser.step() 104 | 105 | if (episode_num - self.last_target_update_episode) / self.args.target_update_interval >= 1.0: 106 | self._update_targets() 107 | self.last_target_update_episode = episode_num 108 | 109 | if t_env - self.log_stats_t >= self.args.learner_log_interval: 110 | self.logger.log_stat("loss", loss.item(), t_env) 111 | self.logger.log_stat("grad_norm", grad_norm, t_env) 112 | mask_elems = mask.sum().item() 113 | self.logger.log_stat("td_error_abs", (masked_td_error.abs().sum().item()/mask_elems), t_env) 114 | self.logger.log_stat("q_taken_mean", (chosen_action_qvals * mask).sum().item()/(mask_elems * self.args.n_agents), t_env) 115 | self.logger.log_stat("target_mean", (targets * mask).sum().item()/(mask_elems * self.args.n_agents), t_env) 116 | self.log_stats_t = t_env 117 | 118 | def _update_targets(self): 119 | self.target_mac.load_state(self.mac) 120 | if self.mixer is not None: 121 | self.target_mixer.load_state_dict(self.mixer.state_dict()) 122 | self.logger.console_logger.info("Updated target network") 123 | 124 | def cuda(self): 125 | self.mac.cuda() 126 | self.target_mac.cuda() 127 | if self.mixer is not None: 128 | self.mixer.cuda() 129 | self.target_mixer.cuda() 130 | 131 | def save_models(self, path): 132 | self.mac.save_models(path) 133 | if self.mixer is not None: 134 | th.save(self.mixer.state_dict(), "{}/mixer.th".format(path)) 135 | th.save(self.optimiser.state_dict(), "{}/opt.th".format(path)) 136 | 137 | def load_models(self, path): 138 | self.mac.load_models(path) 139 | # Not quite right but I don't want to save target networks 140 | self.target_mac.load_models(path) 141 | if self.mixer is not None: 142 | self.mixer.load_state_dict(th.load("{}/mixer.th".format(path), map_location=lambda storage, loc: storage)) 143 | self.optimiser.load_state_dict(th.load("{}/opt.th".format(path), map_location=lambda storage, loc: storage)) 144 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/matrix_to_bullet.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import numpy as np 3 | from scipy.spatial.transform import Rotation as R 4 | from .bullet_block import BulletBlocks 5 | from .bullet_wall import BulletWalls 6 | from .bullet_flag import BulletFlag 7 | from .bullet_goal import BulletGoal 8 | from .bullet_slope import BulletSlope 9 | from .bullet_smartcar import BulletSmartcar 10 | from .grid_objs import ObjType 11 | 12 | 13 | class MatrixToBullet: 14 | def __init__(self, bullet_client, blackboard): 15 | self._bullet_client = bullet_client 16 | self._blackboard = blackboard 17 | self.area_size = blackboard.area_size 18 | self.block_length = blackboard.BLOCK_LENGTH 19 | self.block_height = blackboard.BLOCK_HEIGHT 20 | 21 | bullet_client.resetDebugVisualizerCamera( 22 | cameraDistance=2, 23 | cameraYaw=0, 24 | cameraPitch=-30, 25 | cameraTargetPosition=[5, 0, 4], 26 | ) 27 | 28 | bullet_client.setPhysicsEngineParameter(collisionFilterMode=0) 29 | bullet_client.configureDebugVisualizer(bullet_client.COV_ENABLE_RENDERING, 0) 30 | 31 | self.blocks = BulletBlocks(bullet_client, blackboard) 32 | self.slopes = [ 33 | BulletSlope(bullet_client) for _ in range(self._blackboard.slope_num) 34 | ] 35 | self.smartcars = [ 36 | BulletSmartcar(bullet_client) for _ in range(self._blackboard.smartcar_num) 37 | ] 38 | self.flag = BulletFlag(bullet_client) 39 | self.goal = BulletGoal(bullet_client) 40 | if self._blackboard.wall_num != 0: 41 | self.walls = BulletWalls(bullet_client, blackboard) 42 | 43 | bullet_client.configureDebugVisualizer(bullet_client.COV_ENABLE_RENDERING, 1) 44 | 45 | def yaw_to_quaternion(yaw): 46 | r = R.from_euler("z", yaw, degrees=False) 47 | return r.as_quat() 48 | 49 | self.yaw_to_quat = { 50 | 0: yaw_to_quaternion(0 * np.pi / 2), 51 | 1: yaw_to_quaternion(1 * np.pi / 2), 52 | 2: yaw_to_quaternion(2 * np.pi / 2), 53 | 3: yaw_to_quaternion(3 * np.pi / 2), 54 | } 55 | 56 | def get_bullet_position(self, i, j, k): 57 | x = i * self.block_length + 0.5 * self.block_length 58 | y = j * self.block_length + 0.5 * self.block_length 59 | z = k * self.block_height 60 | return [x, y, z] 61 | 62 | def sync(self): 63 | grid = self._blackboard.grid 64 | wall_id = 0 65 | block_id = 0 66 | slope_id = 0 67 | for i, j, k in itertools.product( 68 | range(self.area_size[0]), 69 | range(self.area_size[1]), 70 | range(1, self.area_size[2] + 1), 71 | ): 72 | obj = grid[i][j][k] 73 | 74 | if self._blackboard.wall_num != 0 and obj.type is ObjType.Wall: 75 | position = self.get_bullet_position(i, j, k) 76 | self._bullet_client.resetBasePositionAndOrientation( 77 | self.walls.ids[wall_id], position, self.yaw_to_quat[0] 78 | ) 79 | wall_id += 1 80 | 81 | if obj.type is ObjType.Block: 82 | position = self.get_bullet_position(i, j, k) 83 | self._bullet_client.resetBasePositionAndOrientation( 84 | self.blocks.ids[block_id], position, self.yaw_to_quat[0] 85 | ) 86 | block_id += 1 87 | 88 | if obj.type is ObjType.FoldedSlope: 89 | position = self.get_bullet_position(i, j, k - 1) 90 | self._bullet_client.resetBasePositionAndOrientation( 91 | self.slopes[slope_id].robot_id, position, self.yaw_to_quat[obj.yaw] 92 | ) 93 | self.slopes[slope_id].fold() 94 | slope_id += 1 95 | 96 | if obj.type is ObjType.UnfoldedSlopeBody: 97 | position = self.get_bullet_position(i, j, k - 1) 98 | self._bullet_client.resetBasePositionAndOrientation( 99 | self.slopes[slope_id].robot_id, position, self.yaw_to_quat[obj.yaw] 100 | ) 101 | self.slopes[slope_id].unfold() 102 | slope_id += 1 103 | 104 | if obj.type is ObjType.Flag: 105 | position = self.get_bullet_position(i, j, k - 1) 106 | self._bullet_client.resetBasePositionAndOrientation( 107 | self.flag.robot_id, position, self.yaw_to_quat[0] 108 | ) 109 | 110 | for smartcar_id, smartcar in enumerate(self._blackboard.smartcars): 111 | position = self.get_bullet_position(smartcar.x, smartcar.y, smartcar.z - 1) 112 | position[2] += 0.043 113 | self._bullet_client.resetBasePositionAndOrientation( 114 | self.smartcars[smartcar_id].robot_id, 115 | position, 116 | self.yaw_to_quat[smartcar.yaw], 117 | ) 118 | if smartcar.is_lift: 119 | if smartcar.lift_obj.type is ObjType.Block: 120 | position = self.get_bullet_position( 121 | smartcar.x, smartcar.y, smartcar.z + 1 122 | ) 123 | self._bullet_client.resetBasePositionAndOrientation( 124 | self.blocks.ids[block_id], position, self.yaw_to_quat[0] 125 | ) 126 | block_id += 1 127 | elif smartcar.lift_obj.type is ObjType.FoldedSlope: 128 | position = self.get_bullet_position( 129 | smartcar.x, smartcar.y, smartcar.z 130 | ) 131 | self._bullet_client.resetBasePositionAndOrientation( 132 | self.slopes[slope_id].robot_id, 133 | position, 134 | self.yaw_to_quat[smartcar.lift_obj.yaw], 135 | ) 136 | self.slopes[slope_id].fold() 137 | slope_id += 1 138 | elif smartcar.lift_obj.type is ObjType.Flag: 139 | position = self.get_bullet_position( 140 | smartcar.x, smartcar.y, smartcar.z 141 | ) 142 | self._bullet_client.resetBasePositionAndOrientation( 143 | self.flag.robot_id, position, self.yaw_to_quat[0] 144 | ) 145 | 146 | goal_coord = self.get_bullet_position( 147 | self._blackboard.goal.x, 148 | self._blackboard.goal.y, 149 | self._blackboard.goal.z - 1, 150 | ) 151 | self._bullet_client.resetBasePositionAndOrientation( 152 | self.goal.robot_id, goal_coord, self.yaw_to_quat[0] 153 | ) 154 | 155 | self._bullet_client.stepSimulation() 156 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/utils.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from enum import IntEnum, auto, unique 3 | from math import cos, sin 4 | 5 | import numpy as np 6 | import yaml 7 | from scipy.spatial.transform import Rotation as R 8 | 9 | from .grid_objs import ObjType 10 | 11 | 12 | @unique 13 | class Direction(IntEnum): 14 | """ 15 | World coordinate system 16 | DIR_0: -- > 17 | yaw = 0 18 | 19 | DIR_1: ^ 20 | | 21 | yaw = np.pi * 0.5 22 | 23 | DIR_2: < -- 24 | yaw = np.pi * 1 25 | 26 | DIR_3: | 27 | v 28 | yaw = np.pi * 1.5 29 | """ 30 | 31 | DIR_0 = 0 32 | DIR_1 = auto() 33 | DIR_2 = auto() 34 | DIR_3 = auto() 35 | 36 | 37 | def next_step(x, y, theta): 38 | next_x = x + round(cos(theta)) 39 | next_y = y + round(sin(theta)) 40 | return next_x, next_y 41 | 42 | 43 | def world_to_local(move_dir, yaw): 44 | theta = yaw * np.pi / 2 45 | r = R.from_matrix( 46 | [[cos(theta), -sin(theta), 0], [sin(theta), cos(theta), 0], [0, 0, 1]] 47 | ) 48 | move_dir = np.append(move_dir, 0) 49 | a = r.inv().apply(move_dir) 50 | return a[:2].astype(np.int) 51 | 52 | 53 | def is_move_action(action): 54 | return 6 <= action <= 9 55 | 56 | 57 | def is_smartcar_on_slope(smartcar): 58 | return smartcar.moving_over_slope 59 | 60 | 61 | def save_scene2yaml(blackboard, step, timestamp, saved_path): 62 | for smartcar in blackboard.smartcars: 63 | if is_smartcar_on_slope(smartcar): 64 | return 65 | step = str(step).zfill(5) 66 | timestamp = f"{timestamp}-{step}" 67 | blackboard._bullet_client.addUserDebugText(text=timestamp, 68 | textPosition=[6, 12, 7], 69 | textColorRGB=[0, 0, 1], 70 | lifeTime=0.7, 71 | textSize=1.2, 72 | ) 73 | grid_items = { 74 | "yaml_generated_time": timestamp, 75 | "area_length": blackboard.area_size[0], 76 | "area_width": blackboard.area_size[1], 77 | "area_height": blackboard.area_size[2], 78 | "wall_num": 0, 79 | "block_num": 0, 80 | "slope_num": 0, 81 | "smartcar_num": 0, 82 | "legged_robot_num": 2, 83 | } 84 | wall_list = [] 85 | block_list = [] 86 | fold_slope_list = [] 87 | unfold_slope_list = [] 88 | flag_list = [] 89 | goal_list = [] 90 | smartcar_list = [] 91 | legged_robot_list = [ 92 | {"id": 0, "x": 1, "y": 1, "z": 1, "yaw": 0}, 93 | {"id": 1, "x": 1, "y": 2, "z": 1, "yaw": 0}, 94 | ] 95 | 96 | grid = blackboard.grid 97 | wall_id = 0 98 | block_id = 0 99 | slope_id = 0 100 | for i, j, k in itertools.product( 101 | range(blackboard.area_size[0]), 102 | range(blackboard.area_size[1]), 103 | range(1, blackboard.area_size[2] + 1), 104 | ): 105 | obj = grid[i][j][k] 106 | 107 | if obj.type is ObjType.Wall: 108 | wall_list.append({"id": wall_id, "x": i, "y": j, "z": k}) 109 | wall_id += 1 110 | 111 | if obj.type is ObjType.Block: 112 | block_list.append({"id": block_id, "x": i, "y": j, "z": k}) 113 | block_id += 1 114 | 115 | if obj.type is ObjType.FoldedSlope: 116 | fold_slope_list.append( 117 | {"id": slope_id, "x": i, "y": j, "z": k, "yaw": obj.yaw} 118 | ) 119 | slope_id += 1 120 | 121 | if obj.type is ObjType.UnfoldedSlopeBody: 122 | unfold_slope_list.append( 123 | {"id": slope_id, "x": i, "y": j, "z": k, "yaw": obj.yaw} 124 | ) 125 | slope_id += 1 126 | 127 | if obj.type is ObjType.Flag: 128 | flag_list.append({"id": 0, "x": i, "y": j, "z": k}) 129 | 130 | for smartcar_id, smartcar in enumerate(blackboard.smartcars): 131 | smartcar_list.append( 132 | { 133 | "id": int(smartcar_id), 134 | "x": int(smartcar.x), 135 | "y": int(smartcar.y), 136 | "z": int(smartcar.z), 137 | "yaw": int(smartcar.yaw), 138 | } 139 | ) 140 | if smartcar.is_lift: 141 | if smartcar.lift_obj.type is ObjType.Block: 142 | block_list.append( 143 | {"id": int(block_id), "x": int(smartcar.x), "y": int(smartcar.y), "z": int(smartcar.z)} 144 | ) 145 | block_id += 1 146 | elif smartcar.lift_obj.type is ObjType.FoldedSlope: 147 | fold_slope_list.append( 148 | { 149 | "id": int(slope_id), 150 | "x": int(smartcar.x), 151 | "y": int(smartcar.y), 152 | "z": int(smartcar.z), 153 | "yaw": int(smartcar.lift_obj.yaw), 154 | } 155 | ) 156 | slope_id += 1 157 | elif smartcar.lift_obj.type is ObjType.Flag: 158 | flag_list.append( 159 | {"id": 0, "x": int(smartcar.x), "y": int(smartcar.y), "z": int(smartcar.z)} 160 | ) 161 | 162 | goal_list.append( 163 | { 164 | "id": 0, 165 | "x": int(blackboard.goal.x), 166 | "y": int(blackboard.goal.y), 167 | "z": int(blackboard.goal.z), 168 | } 169 | ) 170 | 171 | grid_items.update({"wall_num": wall_id}) 172 | grid_items.update({"block_num": block_id}) 173 | grid_items.update({"slope_num": slope_id}) 174 | grid_items.update({"smartcar_num": len(blackboard.smartcars)}) 175 | with open(f"{saved_path}/{timestamp}.yaml", "w") as f: 176 | yaml.dump_all( 177 | [ 178 | grid_items, 179 | {"block": block_list}, 180 | {"fold_slope": fold_slope_list}, 181 | {"unfold_slope": unfold_slope_list}, 182 | {"smartcar": smartcar_list}, 183 | {"flag": flag_list}, 184 | {"goal": goal_list}, 185 | {"legged_robot": legged_robot_list}, 186 | {"wall": wall_list}, 187 | ], 188 | f, 189 | sort_keys=False, 190 | ) 191 | 192 | 193 | def save_action2yaml(record_action, record_action_order, timestamp, saved_path): 194 | action = np.array(record_action) 195 | action_order = np.array(record_action_order) 196 | data = np.concatenate((action, action_order), axis=1) 197 | # saved format: action, action order 198 | with open(f"{saved_path}/{timestamp}_record_action.yaml", "w") as f: 199 | np.savetxt(f, data, fmt='%i') 200 | 201 | 202 | def load_action2yaml(robot_num, saved_path): 203 | with open(f"{saved_path}", "r") as f: 204 | data = np.loadtxt(f, dtype=int) 205 | action, action_order = data[:, :robot_num], data[:, robot_num:] 206 | return action, action_order 207 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/scene.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from craft import utils 4 | 5 | from .blackboard import Point 6 | from .grid_objs import (Block, Flag, FoldedSlope, FoldedSlopeGear, ObjType, 7 | UnfoldedSlopeBody, UnfoldedSlopeFoot, Wall) 8 | 9 | 10 | class Scene: 11 | 12 | def __init__(self, blackboard): 13 | self._blackboard = blackboard 14 | self.area_size = self._blackboard.area_size 15 | 16 | def checke_coord_legal(self, obj): 17 | if obj["x"] >= self.area_size[0] or \ 18 | obj["y"] >= self.area_size[1] or \ 19 | obj["z"] > self.area_size[2]: 20 | raise ValueError("object coordinate out of bounds") 21 | 22 | def random_place_block(self, max_id): 23 | i = 0 24 | while i < max_id: 25 | name = "block" 26 | p, _ = self._blackboard.random_spawn_obj(name) 27 | self._blackboard.grid[p.x][p.y][p.z] = Block() 28 | self._blackboard.spawn_point_set.add(p) 29 | i += 1 30 | 31 | def random_place_slope(self, max_id): 32 | i = 0 33 | while i < max_id: 34 | name = "slope" 35 | while True: 36 | p, direction = self._blackboard.random_spawn_obj(name) 37 | # +2 to consider the Gear 38 | pre_x, pre_y = utils.next_step(p.x, p.y, 39 | (direction + 2) * np.pi / 2) 40 | next_x, next_y = utils.next_step(p.x, p.y, 41 | direction * np.pi / 2) 42 | if self._blackboard.grid[pre_x][pre_y][p.z].type is not \ 43 | ObjType.Wall and \ 44 | self._blackboard.grid[next_x][next_y][p.z].type \ 45 | is ObjType.Air: 46 | break 47 | self._blackboard.grid[p.x][p.y][p.z] = FoldedSlope(direction) 48 | self._blackboard.grid[next_x][next_y][p.z] = FoldedSlopeGear( 49 | direction) 50 | self._blackboard.spawn_point_set.add(p) 51 | self._blackboard.spawn_point_set.add(Point(next_x, next_y, p.z)) 52 | i += 1 53 | 54 | def z_axis(self, elem): 55 | return elem["z"] 56 | 57 | def reset(self): 58 | self.template = self._blackboard.template 59 | 60 | # must process block first! 61 | try: 62 | self.template["block"].sort(key=self.z_axis) 63 | for i in self.template["block"]: 64 | self.checke_coord_legal(i) 65 | self._blackboard.grid[i["x"]][i["y"]][i["z"]] = Block() 66 | self._blackboard.grid[i["x"]][i["y"]][i["z"] - 67 | 1].obj_on_it = -2 68 | self._blackboard.spawn_point_set.add( 69 | Point(i["x"], i["y"], i["z"])) 70 | # place_block = i["id"] + 1 71 | except KeyError as e: 72 | print("KeyError, ", e) 73 | pass 74 | try: 75 | self.template["fold_slope"].sort(key=self.z_axis) 76 | for i in self.template["fold_slope"]: 77 | self.checke_coord_legal(i) 78 | self._blackboard.grid[i["x"]][i["y"]][i["z"]] = FoldedSlope( 79 | i["yaw"]) 80 | self._blackboard.grid[i["x"]][i["y"]][i["z"] - 81 | 1].obj_on_it = -2 82 | n_x, n_y = utils.next_step(i["x"], i["y"], 83 | i["yaw"] * np.pi / 2) 84 | self._blackboard.grid[n_x][n_y][i["z"]] = FoldedSlopeGear( 85 | i["yaw"]) 86 | self._blackboard.spawn_point_set.add( 87 | Point(i["x"], i["y"], i["z"])) 88 | # place_slope = i["id"] + 1 89 | except KeyError as e: 90 | print("KeyError, ", e) 91 | pass 92 | try: 93 | self.template["unfold_slope"].sort(key=self.z_axis) 94 | for i in self.template["unfold_slope"]: 95 | self.checke_coord_legal(i) 96 | self._blackboard.grid[i["x"]][i["y"]][ 97 | i["z"]] = UnfoldedSlopeBody(i["yaw"]) 98 | self._blackboard.grid[i["x"]][i["y"]][i["z"] - 99 | 1].obj_on_it = -2 100 | pre_x, pre_y = utils.next_step(i["x"], i["y"], 101 | (i["yaw"] + 2) * np.pi / 2) 102 | pre_obj = self._blackboard.grid[pre_x][pre_y][i["z"]] 103 | if isinstance(pre_obj, Block): 104 | pre_obj.near_unfold_slope_body = True 105 | self._blackboard.spawn_point_set.add( 106 | Point(i["x"], i["y"], i["z"])) 107 | n_x, n_y = utils.next_step(i["x"], i["y"], 108 | i["yaw"] * np.pi / 2) 109 | self._blackboard.grid[n_x][n_y][i["z"]] = UnfoldedSlopeFoot( 110 | i["yaw"]) 111 | self._blackboard.grid[n_x][n_y][i["z"] - 1].obj_on_it = -2 112 | front_x, front_y = utils.next_step(n_x, n_y, 113 | i["yaw"] * np.pi / 2) 114 | front_blow_obj = self._blackboard.grid[front_x][front_y][i["z"] 115 | - 1] 116 | if isinstance(front_blow_obj, Block): 117 | front_blow_obj.near_blow_unfold_slope_foot = True 118 | self._blackboard.spawn_point_set.add(Point(n_x, n_y, i["z"])) 119 | # place_slope = i["id"] + 1 120 | except KeyError as e: 121 | print("KeyError, ", e) 122 | pass 123 | try: 124 | for i in self.template["flag"]: 125 | self.checke_coord_legal(i) 126 | self._blackboard.grid[i["x"]][i["y"]][i["z"]] = Flag() 127 | self._blackboard.grid[i["x"]][i["y"]][i["z"] - 128 | 1].obj_on_it = -2 129 | self._blackboard.spawn_point_set.add( 130 | Point(i["x"], i["y"], i["z"])) 131 | except KeyError as e: 132 | print("KeyError, ", e) 133 | pass 134 | try: 135 | for i in self.template["wall"]: 136 | self.checke_coord_legal(i) 137 | self._blackboard.grid[i["x"]][i["y"]][i["z"]] = Wall() 138 | except KeyError as e: 139 | print("KeyError, ", e) 140 | pass 141 | place_block = len( 142 | self.template["block"]) if "block" in self.template else 0 143 | place_slope = 0 144 | if "fold_slope" in self.template: 145 | place_slope = len(self.template["fold_slope"]) 146 | if "unfold_slope" in self.template: 147 | place_slope += len(self.template["unfold_slope"]) 148 | 149 | self.random_place_slope(self._blackboard.slope_num - place_slope) 150 | self.random_place_block(self._blackboard.block_num - place_block) 151 | -------------------------------------------------------------------------------- /CraftEnv/src/craft/planner/breadth_first_search.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | import numpy as np 3 | from .smartcar_planner import SmartCarPlanner 4 | from ..grid_objs import ObjType 5 | 6 | 7 | class BreadthFirstSearch(SmartCarPlanner): 8 | def __init__(self, blackboard): 9 | super().__init__(blackboard) 10 | 11 | def is_inbound(self, x, y, z): 12 | if x < self.x_min or x > self.x_max: 13 | return False 14 | if y < self.y_min or y > self.y_max: 15 | return False 16 | if z < self.z_min or z > self.z_max: 17 | return False 18 | return True 19 | 20 | def set_bound(self, size, start_x, start_y): 21 | if size is None: 22 | self.x_min, self.x_max = 0, self.length - 1 23 | self.y_min, self.y_max = 0, self.width - 1 24 | self.z_min, self.z_max = 0, self.height - 1 25 | return 26 | 27 | if isinstance(size, int): 28 | l, w, h = size, size, size 29 | elif isinstance(size, (tuple, list)): 30 | l, w, h = size 31 | else: 32 | raise TypeError 33 | 34 | self.x_min = max(0, start_x - l / 2) 35 | self.x_max = min(self.length - 1, start_x + l / 2) 36 | self.y_min = max(0, start_y - w / 2) 37 | self.y_max = min(self.width - 1, start_y + w / 2) 38 | self.z_min = 0 39 | self.z_max = min(self.height - 1, h) 40 | 41 | def can_move(self, node, move_dir): 42 | x, y, z, yaw, moving_over_slope = node 43 | kw = dict( 44 | x=x, 45 | y=y, 46 | z=z, 47 | yaw=yaw, 48 | is_lift=False, 49 | lift_obj=None, 50 | moving_over_slope=moving_over_slope 51 | ) 52 | return self.action_mask_proxy.move_action_mask(move_dir, self.agent_id, kw, ignore_cars=self.ignore_cars) 53 | 54 | def can_rotate(self, node, rotate_dir): 55 | x, y, z, yaw, moving_over_slope = node 56 | kw = dict( 57 | x=x, 58 | y=y, 59 | z=z, 60 | yaw=yaw, 61 | is_lift=False, 62 | lift_obj=None, 63 | moving_over_slope=moving_over_slope 64 | ) 65 | return self.action_mask_proxy.rotate_mask(rotate_dir, self.agent_id, kw, ignore_cars=self.ignore_cars) 66 | 67 | def get_moved_node(self, curr_node, move_dir): 68 | x, y, z, yaw, moving_over_slope = curr_node 69 | x += move_dir[0] 70 | y += move_dir[1] 71 | 72 | obj = self.grid[x][y][z] 73 | blow_obj = self.grid[x][y][z - 1] 74 | if obj.type is ObjType.UnfoldedSlopeFoot and moving_over_slope == 0: 75 | moving_over_slope = 1 76 | elif obj.type is ObjType.UnfoldedSlopeBody and moving_over_slope == 1: 77 | moving_over_slope = 2 78 | z += 1 79 | elif blow_obj.type is ObjType.UnfoldedSlopeBody and moving_over_slope == 0: 80 | moving_over_slope = 2 81 | elif blow_obj.type is ObjType.UnfoldedSlopeFoot and moving_over_slope == 2: 82 | moving_over_slope = 1 83 | z -= 1 84 | else: 85 | moving_over_slope = 0 86 | return (x, y, z, yaw, moving_over_slope) 87 | 88 | def get_rotated_node(self, curr_node, rotate_dir): 89 | x, y, z, yaw, moving_over_slope = curr_node 90 | yaw = (yaw + rotate_dir) % 4 91 | return (x, y, z, yaw, moving_over_slope) 92 | 93 | def get_successors(self, curr_node): 94 | successors = [] 95 | 96 | # move action 97 | for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]: 98 | if not self.can_move(curr_node, (dx, dy)): 99 | continue 100 | node = self.get_moved_node(curr_node, (dx, dy)) 101 | if not self.is_inbound(*node[:3]): 102 | continue 103 | successors.append(node) 104 | 105 | # rotate action 106 | for d_yaw in [-1, 1]: 107 | if not self.can_rotate(curr_node, d_yaw): 108 | continue 109 | node = self.get_rotated_node(curr_node, d_yaw) 110 | successors.append(node) 111 | 112 | return successors 113 | 114 | def search(self, agent_id, start_x, start_y, start_z, yaw, 115 | moving_over_slope=0, visualize=False, view_size=None, ignore_cars=False): 116 | """ 117 | :param size: int or tuple, specify bounded search space. If None, bfs searches the entire space. 118 | """ 119 | self.agent_id = agent_id 120 | self.reset() 121 | self.set_bound(view_size, start_x, start_y) 122 | self.ignore_cars = ignore_cars 123 | 124 | visited = np.zeros((self.length, self.width, self.height)) 125 | close_set = np.zeros((self.length, self.width, self.height, 4)) 126 | open_set = np.zeros((self.length, self.width, self.height, 4)) 127 | 128 | q = deque() 129 | start_node = (start_x, start_y, start_z, yaw, moving_over_slope) 130 | q.append(start_node) 131 | open_set[start_node[:4]] = 1 132 | 133 | block_length = self.blackboard.BLOCK_LENGTH 134 | block_height = self.blackboard.BLOCK_HEIGHT 135 | if visualize: 136 | import pybullet_data 137 | self.blackboard._bullet_client.setAdditionalSearchPath( 138 | pybullet_data.getDataPath()) 139 | vis_obj_list = [] 140 | 141 | self.blackboard._bullet_client.configureDebugVisualizer( 142 | self.blackboard._bullet_client.COV_ENABLE_RENDERING, 0) 143 | 144 | while q: 145 | curr_node = q.popleft() 146 | open_set[curr_node[:4]] = 0 147 | close_set[curr_node[:4]] = 1 148 | visited[curr_node[:3]] = 1 149 | 150 | if visualize: 151 | x, y, z = curr_node[:3] 152 | vis_x = x * block_length + block_length / 2 153 | vis_y = y * block_length + block_length / 2 154 | vis_z = (z - 1) * block_height + block_height / 2 155 | handle = self.blackboard._bullet_client.loadURDF( 156 | 'cube.urdf', (vis_x, vis_y, vis_z), [1, 0, 0, 1], globalScaling=0.05) 157 | for iii in range(self.blackboard._bullet_client.getNumJoints(handle)): 158 | self.blackboard._bullet_client.changeVisualShape( 159 | handle, iii, rgbaColor=[1, 0, 0, 1] 160 | ) 161 | vis_obj_list.append(handle) 162 | 163 | successed_nodes = self.get_successors(curr_node) 164 | for node in successed_nodes: 165 | if close_set[node[:4]] or open_set[node[:4]]: 166 | continue 167 | 168 | q.append(node) 169 | open_set[node[:4]] = 1 170 | 171 | if visualize: 172 | self.blackboard._bullet_client.configureDebugVisualizer( 173 | self.blackboard._bullet_client.COV_ENABLE_RENDERING, 1) 174 | input('===== Enter to remove BFS blocks =====\n') 175 | self.blackboard._bullet_client.configureDebugVisualizer( 176 | self.blackboard._bullet_client.COV_ENABLE_RENDERING, 0) 177 | for i in vis_obj_list: 178 | self.blackboard._bullet_client.removeBody(i) 179 | self.blackboard._bullet_client.configureDebugVisualizer( 180 | self.blackboard._bullet_client.COV_ENABLE_RENDERING, 1) 181 | return visited 182 | --------------------------------------------------------------------------------