├── .idea ├── .gitignore ├── MADDPG_torch.iml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── README.md ├── __pycache__ ├── arguments.cpython-36.pyc ├── model.cpython-36.pyc └── replay_buffer.cpython-36.pyc ├── arguments.py ├── enjoy_split.py ├── learning_curves ├── exp_torch_4 │ ├── agrewards.pkl │ └── rewards.pkl ├── exp_torch_5 │ ├── agrewards.pkl │ └── rewards.pkl ├── exp_torch_6 │ ├── agrewards.pkl │ └── rewards.pkl ├── exp_torch_7 │ ├── agrewards.pkl │ └── rewards.pkl └── exp_torch_8 │ ├── agrewards.pkl │ └── rewards.pkl ├── main_openai.py ├── main_openai_backup.py ├── model.py ├── multiagent ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── core.cpython-36.pyc │ ├── core_uav.cpython-36.pyc │ ├── environment.cpython-36.pyc │ ├── environment_uav.cpython-36.pyc │ ├── multi_discrete.cpython-36.pyc │ ├── rendering.cpython-36.pyc │ └── scenario.cpython-36.pyc ├── core.py ├── core_uav.py ├── environment.py ├── environment_uav.py ├── multi_discrete.py ├── policy.py ├── rendering.py ├── scenario.py └── scenarios │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── simple_adversary.cpython-36.pyc │ └── simple_collaborative_comm.cpython-36.pyc │ ├── simple.py │ ├── simple_adversary.py │ ├── simple_collaborative_comm.py │ ├── simple_crypto.py │ ├── simple_push.py │ ├── simple_reference.py │ ├── simple_speaker_listener.py │ ├── simple_spread.py │ ├── simple_tag.py │ └── simple_world_comm.py ├── obs_img.py ├── plot_reward.py ├── replay_buffer.py └── scenario-options ├── band-loc ├── core_uav.py ├── environment_uav.py └── simple_collaborative_comm.py ├── pow-band-loc.7z └── pow-band-loc ├── core_uav.py ├── environment_uav.py └── simple_collaborative_comm.py /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/MADDPG_torch.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | 13 | 15 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 14 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Time: 2019-11-05 2 | ## Desciption: 3 | This project is created for MADDPG, which is already popular in multi-agents. With the population of Pytorch, I think a version of pytorch for this project is useful for learners in multi-agents(Not for profit). 4 | 5 | ## Installation 6 | known dependencies: Python(3.6.8), OpenAI Gym(0.10.5), Pytorch(1.1.0), Numpy(1.17.3) 7 | Step 1: Install the MPE(Multi-Agent Particle Environments) as the readme of OpenAI (or the blog of mine). 8 | Step 2: Download the project and cd to this project. Make sure that you have a '\logs' and '\models' folder in your projects. 9 | Step 3: Run the 'main_openai.py' with 'python main_openai.py --scenario_name simple_tag' 10 | 11 | ## Structure 12 | ./main_openai: Main func 13 | ./arguments.py: Init the par for game, training and saving. 14 | ./model.py: Init the model for the agent. 15 | ./replay_buffer.py: Save the memory for all the agents. 16 | ./enjoy_split.py: A templete for testing the model trained in the 'main_openai.py'. 17 | 18 | ## Command line options 19 | ### Environment options 20 | --scenario_name: defines which environment in the MPE is to be used (default: "simple") 21 | --per_episode_max_len: maximum length of each episode for the environment (default: 45) 22 | --max_episode: total number of training episodes (default: 150000) 23 | 24 | ### Training options 25 | --lr_a: learning rate for actor(default: 1e-2) 26 | --lr_c: learning rate for critic(default: 1e-2) 27 | --gamma: discount factor (default: 0.95) 28 | --batch_size: batch size (default: 1024) 29 | --num_units_openai: number of units in the MLP (default: 64) 30 | 31 | ### Checkpointing 32 | --save_dir: directory where intermediate training results and model will be saved (default: "/tmp/policy/") 33 | --fre4save_model: model is saved every time this number of game steps has been completed (default: 1000) 34 | --start_save_model: the time when we start to save the model(default: 400) 35 | 36 | ## Link for blog 37 | OpenAI MPE: https://github.com/openai/multiagent-particle-envs 38 | The explorer of Game AI: https://zhuanlan.zhihu.com/c_186658689 39 | -------------------------------------------------------------------------------- /__pycache__/arguments.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/__pycache__/arguments.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/replay_buffer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/__pycache__/replay_buffer.cpython-36.pyc -------------------------------------------------------------------------------- /arguments.py: -------------------------------------------------------------------------------- 1 | # Time: 2019-11-05 2 | # Author: Zachary 3 | # Name: MADDPG_torch 4 | 5 | import time 6 | import torch 7 | import argparse 8 | 9 | device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu') 10 | time_now = time.strftime('%y%m_%d%H%M') 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser("reinforcement learning experiments for multiagent environments") 14 | 15 | # environment simple_adversary simple_collaborative_comm 16 | parser.add_argument("--scenario_name", type=str, default="simple_collaborative_comm", help="name of the scenario script") 17 | parser.add_argument("--start_time", type=str, default=time_now, help="the time when start the game") 18 | parser.add_argument("--per_episode_max_len", type=int, default=100, help="maximum episode length") 19 | parser.add_argument("--max_episode", type=int, default=150000, help="maximum episode length") 20 | parser.add_argument("--num-adversaries", type=int, default=0, help="number of adversaries") 21 | 22 | # core training parameters 23 | parser.add_argument("--exp_name", type=str, default="exp_torch_8",help="name of the experiment") 24 | parser.add_argument("--device", default=device, help="torch device ") 25 | parser.add_argument("--learning_start_step", type=int, default=0, help="learning start steps") 26 | parser.add_argument("--max_grad_norm", type=float, default=0.5, help="max gradient norm for clip") 27 | parser.add_argument("--learning_fre", type=int, default=100, help="learning frequency") 28 | parser.add_argument("--tao", type=int, default=0.01, help="how depth we exchange the par of the nn") 29 | parser.add_argument("--lr_a", type=float, default=1e-2, help="learning rate for adam optimizer") 30 | parser.add_argument("--lr_c", type=float, default=1e-2, help="learning rate for adam optimizer") 31 | parser.add_argument("--gamma", type=float, default=0.8, help="discount factor") 32 | parser.add_argument("--batch_size", type=int, default=1024, help="number of episodes to optimize at the same time") 33 | parser.add_argument("--memory_size", type=int, default=1e6, help="number of data stored in the memory") 34 | parser.add_argument("--num_units_1", type=int, default=128, help="number of units in the mlp") 35 | parser.add_argument("--num_units_2", type=int, default=64, help="number of units in the mlp") 36 | parser.add_argument("--num_units_openai", type=int, default=64, help="number of units in the mlp") 37 | 38 | # checkpointing 39 | parser.add_argument("--fre4save_model", type=int, default=100, help="the number of the episode for saving the model") 40 | parser.add_argument("--start_save_model", type=int, default=400, help="the number of the episode for saving the model") 41 | parser.add_argument("--save_dir", type=str, default="./models/", \ 42 | help="directory in which training state and model should be saved") 43 | parser.add_argument("--old_model_name", type=str, default="models/exp_torch_8/21_0406_0824_12120000/", \ 44 | help="directory in which training state and model are loaded") 45 | # evaluation 46 | parser.add_argument("--restore", action="store_true", default=True) 47 | parser.add_argument("--display", action="store_true", default=True) 48 | parser.add_argument("--test", action="store_true", default=True) 49 | parser.add_argument("--restore_idxs", type=list, default=[0,1,2]) 50 | parser.add_argument("--benchmark", action="store_true", default=False) 51 | parser.add_argument("--benchmark-iters", type=int, default=100000, help="number of iterations run for benchmarking") 52 | parser.add_argument("--benchmark-dir", type=str, default="./benchmark_files/", \ 53 | help="directory where benchmark data is saved") 54 | parser.add_argument("--plots-dir", type=str, default="./learning_curves/", \ 55 | help="directory where plot data is saved") 56 | return parser.parse_args() 57 | -------------------------------------------------------------------------------- /enjoy_split.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import torch 5 | import multiagent.scenarios as scenarios 6 | from multiagent.environment_uav import MultiAgentEnv 7 | 8 | from model import actor_agent, critic_agent 9 | from arguments import parse_args 10 | 11 | def make_env(scenario_name, arglist, benchmark=False): 12 | """ 13 | create the environment from script 14 | """ 15 | scenario = scenarios.load(scenario_name + ".py").Scenario() 16 | world = scenario.make_world() 17 | if benchmark: 18 | env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data) 19 | else: 20 | env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) 21 | return env 22 | 23 | def get_trainers(env, arglist): 24 | trainers_cur = [] 25 | trainers_tar = [] 26 | optimizers = [] 27 | input_size = [8, 10, 10] # the obs size 28 | input_size_global = [23, 25, 25] # cal by README 29 | 30 | """ load the model """ 31 | actors_tar = [torch.load(arglist.old_model_name+'a_t_{}.pt'.format(agent_idx), map_location=arglist.device) \ 32 | for agent_idx in range(env.n)] 33 | 34 | return actors_tar 35 | 36 | def enjoy(arglist): 37 | """ 38 | This func is used for testing the model 39 | """ 40 | 41 | episode_step = 0 42 | """ init the env """ 43 | env = make_env(arglist.scenario_name, arglist, arglist.benchmark) 44 | 45 | """ init the agents """ 46 | obs_shape_n = [env.observation_space[i].shape for i in range(env.n)] 47 | actors_tar = get_trainers(env, arglist) 48 | 49 | """ interact with the env """ 50 | obs_n = env.reset() 51 | while(True): 52 | 53 | # update the episode step number 54 | episode_step += 1 55 | # print(episode_step) 56 | 57 | # get action 58 | try: 59 | action_n = [] 60 | # action_n = [agent.actor(torch.from_numpy(obs).to(arglist.device, torch.float)).numpy() \ 61 | # for agent, obs in zip(trainers_cur, obs_n)] 62 | for actor, obs in zip(actors_tar, obs_n): 63 | action = torch.clamp(actor(torch.from_numpy(obs).to(arglist.device, torch.float)), -1, 1) 64 | action_n.append(action) 65 | 66 | 67 | except: 68 | print(obs_n) 69 | 70 | # interact with env 71 | new_obs_n, rew_n, done_n, info_n = env.step(action_n) 72 | 73 | print('*********************Bandwidht assignment**********************') 74 | for i in env.world.bandwidth: 75 | for j in i: 76 | print('%10.2e' % j, end='') 77 | print('\n', end='') 78 | 79 | print('*************************SNR*****************************') 80 | for i in env.world.SNR: 81 | for j in i: 82 | print('%10.2f' % j, end='') 83 | print('\n', end='') 84 | print('*************************Rate*****************************') 85 | for i in env.world.Rate: 86 | for j in i: 87 | print('%10.2e' % j, end='') 88 | print('\n', end=' ') 89 | print('\n') 90 | 91 | # update the flag 92 | done = all(done_n) 93 | terminal = (episode_step >= arglist.per_episode_max_len) 94 | 95 | # reset the env 96 | if done or terminal: 97 | episode_step = 0 98 | obs_n = env.reset() 99 | 100 | # render the env 101 | # print(rew_n) 102 | env.render() 103 | 104 | if __name__ == '__main__': 105 | arglist = parse_args() 106 | enjoy(arglist) 107 | -------------------------------------------------------------------------------- /learning_curves/exp_torch_4/agrewards.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_4/agrewards.pkl -------------------------------------------------------------------------------- /learning_curves/exp_torch_4/rewards.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_4/rewards.pkl -------------------------------------------------------------------------------- /learning_curves/exp_torch_5/agrewards.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_5/agrewards.pkl -------------------------------------------------------------------------------- /learning_curves/exp_torch_5/rewards.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_5/rewards.pkl -------------------------------------------------------------------------------- /learning_curves/exp_torch_6/agrewards.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_6/agrewards.pkl -------------------------------------------------------------------------------- /learning_curves/exp_torch_6/rewards.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_6/rewards.pkl -------------------------------------------------------------------------------- /learning_curves/exp_torch_7/agrewards.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_7/agrewards.pkl -------------------------------------------------------------------------------- /learning_curves/exp_torch_7/rewards.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_7/rewards.pkl -------------------------------------------------------------------------------- /learning_curves/exp_torch_8/agrewards.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_8/agrewards.pkl -------------------------------------------------------------------------------- /learning_curves/exp_torch_8/rewards.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_8/rewards.pkl -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | # Time: 2019-11-05 2 | # Author: Zachary 3 | # Name: MADDPG_torch 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | class abstract_agent(nn.Module): 9 | def __init__(self): 10 | super(abstract_agent, self).__init__() 11 | 12 | def act(self, input): 13 | policy, value = self.forward(input) # flow the input through the nn 14 | return policy, value 15 | 16 | class actor_agent(abstract_agent): 17 | def __init__(self, num_inputs, action_size, args): 18 | super(actor_agent, self).__init__() 19 | self.linear_a1 = nn.Linear(num_inputs, args.num_units_1) 20 | self.linear_a2 = nn.Linear(args.num_units_1, args.num_units_2) 21 | self.linear_a = nn.Linear(args.num_units_2, action_size) 22 | self.reset_parameters() 23 | # Activation func init 24 | self.LReLU = nn.LeakyReLU(0.01) 25 | self.tanh= nn.Tanh() 26 | self.train() 27 | 28 | def reset_parameters(self): 29 | gain = nn.init.calculate_gain('leaky_relu') 30 | gain_tanh = nn.init.calculate_gain('tanh') 31 | self.linear_a1.weight.data.mul_(gain) 32 | self.linear_a2.weight.data.mul_(gain) 33 | self.linear_a.weight.data.mul_(gain_tanh) 34 | 35 | def forward(self, input): 36 | """ 37 | The forward func defines how the data flows through the graph(layers) 38 | """ 39 | x = self.LReLU(self.linear_a1(input)) 40 | x = self.LReLU(self.linear_a2(x)) 41 | policy = self.tanh(self.linear_a(x)) 42 | return policy 43 | 44 | class critic_agent(abstract_agent): 45 | def __init__(self, obs_shape_n, action_shape_n, args): 46 | super(critic_agent, self).__init__() 47 | self.linear_o_c1 = nn.Linear(obs_shape_n, args.num_units_1) 48 | self.linear_a_c1 = nn.Linear(action_shape_n, args.num_units_1) 49 | self.linear_c2 = nn.Linear(args.num_units_1*2, args.num_units_2) 50 | self.linear_c = nn.Linear(args.num_units_2, 1) 51 | self.reset_parameters() 52 | 53 | self.LReLU = nn.LeakyReLU(0.01) 54 | self.tanh= nn.Tanh() 55 | self.train() 56 | 57 | def reset_parameters(self): 58 | gain = nn.init.calculate_gain('leaky_relu') 59 | gain_tanh = nn.init.calculate_gain('tanh') 60 | self.linear_o_c1.weight.data.mul_(gain) 61 | self.linear_a_c1.weight.data.mul_(gain) 62 | self.linear_c2.weight.data.mul_(gain) 63 | self.linear_c.weight.data.mul_(gain) 64 | 65 | def forward(self, obs_input, action_input): 66 | """ 67 | input_g: input_global, input features of all agents 68 | """ 69 | x_o = self.LReLU(self.linear_o_c1(obs_input)) 70 | x_a = self.LReLU(self.linear_a_c1(action_input)) 71 | x_cat = torch.cat([x_o, x_a], dim=1) 72 | x = self.LReLU(self.linear_c2(x_cat)) 73 | value = self.linear_c(x) 74 | return value 75 | 76 | class openai_critic(abstract_agent): 77 | def __init__(self, obs_shape_n, action_shape_n, args): 78 | super(openai_critic, self).__init__() 79 | # self.LReLU = nn.LeakyReLU(0.01) 80 | self.LReLU = nn.ReLU() 81 | self.linear_c1 = nn.Linear(action_shape_n+obs_shape_n, args.num_units_openai) 82 | self.linear_c2 = nn.Linear(args.num_units_openai, args.num_units_openai) 83 | self.linear_c = nn.Linear(args.num_units_openai, 1) 84 | 85 | self.reset_parameters() 86 | self.train() 87 | 88 | def reset_parameters(self): 89 | # gain = nn.init.calculate_gain('leaky_relu') 90 | # nn.init.xavier_uniform_(self.linear_c1.weight, gain=nn.init.calculate_gain('leaky_relu')) 91 | # nn.init.xavier_uniform_(self.linear_c2.weight, gain=nn.init.calculate_gain('leaky_relu')) 92 | # nn.init.xavier_uniform_(self.linear_c.weight, gain=nn.init.calculate_gain('leaky_relu')) 93 | gain = nn.init.calculate_gain('relu') 94 | nn.init.xavier_uniform_(self.linear_c1.weight, gain=nn.init.calculate_gain('relu')) 95 | nn.init.xavier_uniform_(self.linear_c2.weight, gain=nn.init.calculate_gain('relu')) 96 | nn.init.xavier_uniform_(self.linear_c.weight, gain=nn.init.calculate_gain('relu')) 97 | 98 | 99 | def forward(self, obs_input, action_input): 100 | """ 101 | input_g: input_global, input features of all agents 102 | """ 103 | x_cat = self.LReLU(self.linear_c1(torch.cat([obs_input, action_input], dim=1))) 104 | x = self.LReLU(self.linear_c2(x_cat)) 105 | value = self.linear_c(x) 106 | return value 107 | 108 | class openai_actor(abstract_agent): 109 | def __init__(self, num_inputs, action_size, args): 110 | super(openai_actor, self).__init__() 111 | self.tanh= nn.Tanh() 112 | # self.LReLU = nn.LeakyReLU(0.01) 113 | self.LReLU = nn.ReLU() 114 | self.linear_a1 = nn.Linear(num_inputs, args.num_units_openai) 115 | self.linear_a2 = nn.Linear(args.num_units_openai, args.num_units_openai) 116 | self.linear_a = nn.Linear(args.num_units_openai, action_size) 117 | 118 | self.reset_parameters() 119 | self.train() 120 | 121 | def reset_parameters(self): 122 | # gain = nn.init.calculate_gain('leaky_relu') 123 | # nn.init.xavier_uniform_(self.linear_a1.weight, gain=nn.init.calculate_gain('leaky_relu')) 124 | # nn.init.xavier_uniform_(self.linear_a2.weight, gain=nn.init.calculate_gain('leaky_relu')) 125 | # nn.init.xavier_uniform_(self.linear_a.weight, gain=nn.init.calculate_gain('leaky_relu')) 126 | gain_tanh = nn.init.calculate_gain('tanh') 127 | gain = nn.init.calculate_gain('relu') 128 | nn.init.xavier_uniform_(self.linear_a1.weight, gain=nn.init.calculate_gain('relu')) 129 | nn.init.xavier_uniform_(self.linear_a2.weight, gain=nn.init.calculate_gain('relu')) 130 | nn.init.xavier_uniform_(self.linear_a.weight, gain=nn.init.calculate_gain('relu')) 131 | 132 | def forward(self, input, model_original_out=False): 133 | """ 134 | The forward func defines how the data flows through the graph(layers) 135 | flag: 0 sigle input 1 batch input 136 | """ 137 | x = self.LReLU(self.linear_a1(input)) 138 | x = self.LReLU(self.linear_a2(x)) 139 | model_out = self.linear_a(x) 140 | u = torch.rand_like(model_out) 141 | policy = F.softmax(model_out - torch.log(-torch.log(u)), dim=-1) 142 | if model_original_out == True: return model_out, policy # for model_out criterion 143 | return policy 144 | -------------------------------------------------------------------------------- /multiagent/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | # Multiagent envs 4 | # ---------------------------------------- 5 | 6 | register( 7 | id='MultiagentSimple-v0', 8 | entry_point='multiagent.envs:SimpleEnv', 9 | # FIXME(cathywu) currently has to be exactly max_path_length parameters in 10 | # rllab run script 11 | max_episode_steps=100, 12 | ) 13 | 14 | register( 15 | id='MultiagentSimpleSpeakerListener-v0', 16 | entry_point='multiagent.envs:SimpleSpeakerListenerEnv', 17 | max_episode_steps=100, 18 | ) 19 | -------------------------------------------------------------------------------- /multiagent/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /multiagent/__pycache__/core.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/core.cpython-36.pyc -------------------------------------------------------------------------------- /multiagent/__pycache__/core_uav.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/core_uav.cpython-36.pyc -------------------------------------------------------------------------------- /multiagent/__pycache__/environment.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/environment.cpython-36.pyc -------------------------------------------------------------------------------- /multiagent/__pycache__/environment_uav.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/environment_uav.cpython-36.pyc -------------------------------------------------------------------------------- /multiagent/__pycache__/multi_discrete.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/multi_discrete.cpython-36.pyc -------------------------------------------------------------------------------- /multiagent/__pycache__/rendering.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/rendering.cpython-36.pyc -------------------------------------------------------------------------------- /multiagent/__pycache__/scenario.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/scenario.cpython-36.pyc -------------------------------------------------------------------------------- /multiagent/core.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # physical/external base state of all entites 4 | class EntityState(object): 5 | def __init__(self): 6 | # physical position 7 | self.p_pos = None 8 | # physical velocity 9 | self.p_vel = None 10 | 11 | # state of agents (including communication and internal/mental state) 12 | class AgentState(EntityState): 13 | def __init__(self): 14 | super(AgentState, self).__init__() 15 | # communication allocation 16 | self.c = None 17 | 18 | # action of the agent 19 | class Action(object): 20 | def __init__(self): 21 | # physical action 22 | self.u = None 23 | # communication action 24 | self.c = None 25 | 26 | # properties and state of physical world entity 27 | class Entity(object): 28 | def __init__(self): 29 | # name 30 | self.name = '' 31 | # properties: 32 | self.size = 0.050 33 | # entity can move / be pushed 34 | self.movable = False 35 | # entity collides with others 36 | self.collide = True 37 | # material density (affects mass) 38 | self.density = 25.0 39 | # color 40 | self.color = None 41 | # max speed and accel 42 | self.max_speed = None 43 | self.accel = None 44 | # state 45 | self.state = EntityState() 46 | # mass 47 | self.initial_mass = 1 48 | 49 | @property 50 | def mass(self): 51 | return self.initial_mass 52 | 53 | # properties of landmark entities 54 | class Landmark(Entity): 55 | def __init__(self): 56 | super(Landmark, self).__init__() 57 | 58 | # properties of agent entities 59 | class Agent(Entity): 60 | def __init__(self): 61 | super(Agent, self).__init__() 62 | # agents are movable by default 63 | self.movable = True 64 | # cannot send communication signals 65 | self.silent = False 66 | # cannot observe the world 67 | self.blind = False 68 | # physical motor noise amount 69 | self.u_noise = None 70 | # communication noise amount 71 | self.c_noise = None 72 | # control range 73 | self.u_range = 1.0 74 | # state 75 | self.state = AgentState() 76 | # action 77 | self.action = Action() 78 | # script behavior to execute 79 | self.action_callback = None 80 | 81 | # multi-agent world 82 | class World(object): 83 | def __init__(self): 84 | # list of agents and entities (can change at execution-time!) 85 | self.agents = [] 86 | self.landmarks = [] 87 | # communication channel dimensionality 88 | self.dim_c = 0 89 | # position dimensionality 90 | self.dim_p = 2 91 | # color dimensionality 92 | self.dim_color = 3 93 | # simulation timestep 94 | self.dt = 0.1 95 | # physical damping 96 | self.damping = 0.25 97 | # contact response parameters 98 | self.contact_force = 1e+2 99 | self.contact_margin = 1e-3 100 | 101 | # return all entities in the world 102 | @property 103 | def entities(self): 104 | return self.agents + self.landmarks 105 | 106 | # return all agents controllable by external policies 107 | @property 108 | def policy_agents(self): 109 | return [agent for agent in self.agents if agent.action_callback is None] 110 | 111 | # return all agents controlled by world scripts 112 | @property 113 | def scripted_agents(self): 114 | return [agent for agent in self.agents if agent.action_callback is not None] 115 | 116 | # update state of the world 117 | def step(self): 118 | # set actions for scripted agents 119 | for agent in self.scripted_agents: 120 | agent.action = agent.action_callback(agent, self) 121 | # gather forces applied to entities 122 | p_force = [None] * len(self.entities) 123 | # apply agent physical controls 124 | p_force = self.apply_action_force(p_force) 125 | # apply environment forces 126 | p_force = self.apply_environment_force(p_force) 127 | # integrate physical state 128 | self.integrate_state(p_force) 129 | # update agent state 130 | for agent in self.agents: 131 | self.update_agent_state(agent) 132 | 133 | # gather agent action forces 134 | def apply_action_force(self, p_force): 135 | # set applied forces 136 | for i,agent in enumerate(self.agents): 137 | if agent.movable: 138 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0 139 | p_force[i] = agent.action.u + noise 140 | return p_force 141 | 142 | # gather physical forces acting on entities 143 | def apply_environment_force(self, p_force): 144 | # simple (but inefficient) collision response 145 | for a,entity_a in enumerate(self.entities): 146 | for b,entity_b in enumerate(self.entities): 147 | if(b <= a): continue 148 | [f_a, f_b] = self.get_collision_force(entity_a, entity_b) 149 | if(f_a is not None): 150 | if(p_force[a] is None): p_force[a] = 0.0 151 | p_force[a] = f_a + p_force[a] 152 | if(f_b is not None): 153 | if(p_force[b] is None): p_force[b] = 0.0 154 | p_force[b] = f_b + p_force[b] 155 | return p_force 156 | 157 | # integrate physical state 158 | def integrate_state(self, p_force): 159 | for i,entity in enumerate(self.entities): 160 | if not entity.movable: continue 161 | entity.state.p_vel = entity.state.p_vel * (1 - self.damping) 162 | if (p_force[i] is not None): 163 | entity.state.p_vel += (p_force[i] / entity.mass) * self.dt 164 | if entity.max_speed is not None: 165 | speed = np.sqrt(np.square(entity.state.p_vel[0]) + np.square(entity.state.p_vel[1])) 166 | if speed > entity.max_speed: 167 | entity.state.p_vel = entity.state.p_vel / np.sqrt(np.square(entity.state.p_vel[0]) + 168 | np.square(entity.state.p_vel[1])) * entity.max_speed 169 | entity.state.p_pos += entity.state.p_vel * self.dt 170 | 171 | def update_agent_state(self, agent): 172 | # set communication state (directly for now) 173 | if agent.silent: 174 | agent.state.c = np.zeros(self.dim_c) 175 | else: 176 | noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0 177 | agent.state.c = agent.action.c + noise 178 | 179 | # get collision forces for any contact between two entities 180 | def get_collision_force(self, entity_a, entity_b): 181 | if (not entity_a.collide) or (not entity_b.collide): 182 | return [None, None] # not a collider 183 | if (entity_a is entity_b): 184 | return [None, None] # don't collide against itself 185 | # compute actual distance between entities 186 | delta_pos = entity_a.state.p_pos - entity_b.state.p_pos 187 | dist = np.sqrt(np.sum(np.square(delta_pos))) 188 | # minimum allowable distance 189 | dist_min = entity_a.size + entity_b.size 190 | # softmax penetration 191 | k = self.contact_margin 192 | penetration = np.logaddexp(0, -(dist - dist_min)/k)*k 193 | force = self.contact_force * delta_pos / dist * penetration 194 | force_a = +force if entity_a.movable else None 195 | force_b = -force if entity_b.movable else None 196 | return [force_a, force_b] -------------------------------------------------------------------------------- /multiagent/core_uav.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # physical/external base state of all entites 4 | class EntityState(object): 5 | def __init__(self): 6 | # physical position 7 | self.p_pos = None 8 | # physical velocity 9 | self.p_vel = None 10 | self.hold_action = 0 11 | 12 | # state of agents (including communication and internal/mental state) 13 | class AgentState(EntityState): 14 | def __init__(self): 15 | super(AgentState, self).__init__() 16 | # communication allocation 17 | self.c = None 18 | self.UAV_Rate = None 19 | self.assign = None 20 | self.SNRforUser = None 21 | 22 | # action of the agent 23 | class Action(object): 24 | def __init__(self): 25 | # physical action 26 | self.u = None 27 | # communication action 28 | self.c = None 29 | 30 | # properties and state of physical world entity 31 | class Entity(object): 32 | def __init__(self): 33 | # name 34 | self.name = '' 35 | # properties: 36 | self.size = 0.0250 37 | # entity can move / be pushed 38 | self.movable = False 39 | # entity collides with others 40 | self.collide = True 41 | # material density (affects mass) 42 | self.density = 25.0 43 | # color 44 | self.color = None 45 | # max speed and accel 46 | self.max_speed = 10 47 | self.accel = None 48 | # state 49 | self.state = EntityState() 50 | # mass 51 | self.initial_mass = 1 52 | 53 | @property 54 | def mass(self): 55 | return self.initial_mass 56 | 57 | # properties of landmark entities 58 | class Landmark(Entity): 59 | def __init__(self): 60 | super(Landmark, self).__init__() 61 | self.assign_index = None 62 | self.switch_cout = 0 63 | 64 | # properties of agent entities 65 | class Agent(Entity): 66 | def __init__(self): 67 | super(Agent, self).__init__() 68 | # agents are movable by default 69 | self.movable = True 70 | # cannot send communication signals 71 | self.silent = False 72 | # cannot observe the world 73 | self.blind = False 74 | # physical motor noise amount 75 | self.u_noise = None 76 | # communication noise amount 77 | self.c_noise = None 78 | # control range 79 | self.u_range = 1.0 80 | # state 81 | self.state = AgentState() 82 | # action 83 | self.action = Action() 84 | # script behavior to execute 85 | self.action_callback = None 86 | 87 | # multi-agent world 88 | class World(object): 89 | def __init__(self): 90 | # list of agents and entities (can change at execution-time!) 91 | self.agents = [] 92 | self.landmarks = [] 93 | # communication channel dimensionality 94 | self.dim_c = 0 95 | # position dimensionality 96 | self.dim_p = 2 97 | # color dimensionality 98 | self.dim_color = 3 99 | # simulation timestep 100 | self.dt = 0.1 101 | # physical damping 102 | self.damping = 0.25 103 | # contact response parameters 104 | self.contact_force = 1e+2 105 | self.contact_margin = 1e-3 106 | self.collaborative = False 107 | 108 | self.pow_UAVs = 100 # 无人机发射功率 109 | self.pow_GBS = None # 地面基站发射功率 110 | self.gain_UAV = 10**(-40/10) # 单位db 无人机链路单位距离下的信道增益 111 | self.gain_GBS = 60 # 单位db 地面基站链路单位距离下的信道增益 112 | self.los_exp_UAV = 2 # 无人机链路衰减指数 113 | self.los_exp_GBS = 3 # 地面基站链路衰减指数 114 | self.max_band = 10e6 # 最大可用带宽 115 | self.noise_pdf = 10**(-167/10) # 噪声功率谱密度 dBm 116 | 117 | self.bandwidth = None # 带宽 118 | self.Rate = None 119 | self.SNR = None 120 | 121 | # return all entities in the world 122 | @property 123 | def entities(self): 124 | return self.agents + self.landmarks 125 | 126 | # return all agents controllable by external policies 127 | @property 128 | def policy_agents(self): 129 | return [agent for agent in self.agents if agent.action_callback is None] 130 | 131 | # return all agents controlled by world scripts 132 | @property 133 | def scripted_agents(self): 134 | return [agent for agent in self.agents if agent.action_callback is not None] 135 | 136 | # update state of the world 137 | def step(self): 138 | # set actions for scripted agents 139 | for agent in self.scripted_agents: 140 | agent.action = agent.action_callback(agent, self) 141 | 142 | # gather forces applied to entities 143 | p_force = [None] * len(self.entities) 144 | # apply agent physical controls 145 | p_force = self.apply_action_force(p_force) 146 | # apply environment forces 147 | # p_force = self.apply_environment_force(p_force) 148 | # integrate physical state 149 | self.integrate_state(p_force) 150 | 151 | self.update_landmark_pos() 152 | 153 | # self.update_agent_pos() 154 | 155 | # update agent state 156 | for agent in self.agents: 157 | self.update_agent_state(agent) 158 | self.update_world_state() 159 | 160 | # dict_state = [] 161 | # for i, landmark in enumerate(self.landmarks): 162 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos) 163 | # dict_state.append({'User {}'.format(i): landmark.state.p_pos}) 164 | # 165 | # for i, agent in enumerate(self.agents): 166 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos) 167 | # dict_state.append({'UAV {}'.format(i): agent.state.p_pos, 'band_asign': agent.state.c}) 168 | # with open('./state_results/states.txt','a') as f: 169 | # f.write(str(dict_state)+'\n') 170 | 171 | # gather agent action forces 172 | def apply_action_force(self, p_force): 173 | # set applied forces 174 | for i,agent in enumerate(self.agents): 175 | if agent.movable: 176 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0 177 | p_force[i] = agent.action.u + noise 178 | return p_force 179 | 180 | # gather physical forces acting on entities 181 | def apply_environment_force(self, p_force): 182 | # simple (but inefficient) collision response 183 | for a,entity_a in enumerate(self.entities): 184 | for b,entity_b in enumerate(self.entities): 185 | if(b <= a): continue 186 | [f_a, f_b] = self.get_collision_force(entity_a, entity_b) 187 | if(f_a is not None): 188 | if(p_force[a] is None): p_force[a] = 0.0 189 | p_force[a] = f_a + p_force[a] 190 | if(f_b is not None): 191 | if(p_force[b] is None): p_force[b] = 0.0 192 | p_force[b] = f_b + p_force[b] 193 | return p_force 194 | 195 | # integrate physical state 196 | def integrate_state(self, p_force): 197 | for i, agent in enumerate(self.agents): 198 | if not agent.movable: continue 199 | # noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0 200 | # agent.state.p_vel = (agent.action.u + noise) * (1 - self.damping) 201 | agent.state.p_vel = agent.state.p_vel * (1 - self.damping) 202 | if (p_force[i] is not None): 203 | agent.state.p_vel += (p_force[i] / agent.mass) * self.dt 204 | if agent.max_speed is not None: 205 | speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1])) 206 | if speed > agent.max_speed: 207 | agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) + 208 | np.square(agent.state.p_vel[1])) * agent.max_speed 209 | agent.state.p_pos += agent.state.p_vel * self.dt 210 | 211 | def update_landmark_pos(self): 212 | # for landmark in self.landmarks: 213 | # if not landmark.movable: continue 214 | # if landmark.state.hold_action > 0: 215 | # landmark.state.hold_action -= 1 216 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 217 | # else: 218 | # landmark.state.hold_action = np.random.randint(0,10) 219 | # 220 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 221 | # while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1): 222 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 223 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 224 | # # print(temp_pos) 225 | # landmark.state.p_pos = temp_pos 226 | 227 | for landmark in self.landmarks: 228 | if not landmark.movable: continue 229 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 230 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 231 | while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1): 232 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 233 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 234 | # print(temp_pos) 235 | landmark.state.p_pos = temp_pos 236 | 237 | 238 | def update_agent_state(self, agent): 239 | # set communication state (directly f or now) 240 | if agent.silent: 241 | agent.state.c = np.zeros(self.dim_c) 242 | else: 243 | # noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0 244 | # agent.state.c = agent.action.c + noise 245 | agent.state.c = agent.action.c 246 | 247 | # def update_agent_pos(self): 248 | # # set communication state (directly for now) 249 | # for i, agent in enumerate(self.agents): 250 | # if not agent.movable: continue 251 | # noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0 252 | # agent.state.p_vel = (agent.action.u + noise) * (1 - self.damping) 253 | # print(agent.state.p_vel) 254 | # if agent.max_speed is not None: 255 | # speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1])) 256 | # if speed > agent.max_speed: 257 | # print('高于最大速度') 258 | # agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) + 259 | # np.square(agent.state.p_vel[1])) * agent.max_speed 260 | # agent.state.p_pos += agent.state.p_vel * self.dt 261 | 262 | def update_world_state(self): 263 | # 根据基站提供的方案,用户进行选择 264 | 265 | bw = np.zeros((len(self.agents), self.dim_c)) 266 | disVec = np.zeros((len(self.agents), self.dim_c)) 267 | height = np.ones((len(self.agents), self.dim_c)) * 200 # 无人机的飞行高度 268 | for index, agent in enumerate(self.agents): 269 | bw[index] = np.array(list(self.max_band * i for i in agent.action.c)) 270 | # 计算无人机到各个用户的欧式距离 271 | for i, landmark in enumerate(self.landmarks): 272 | disVec[index][i] = np.linalg.norm(agent.state.p_pos - landmark.state.p_pos, 2) * 1000 273 | # 距离超过定值时,无法进行通信,将带宽分配为0 274 | # bw[index][i] = 0.1 if disVec[index][i] > 0.025*8 else bw[index][i] 275 | # 接收到的信号功率 276 | signal_pow = self.pow_UAVs * self.gain_UAV / (disVec ** self.los_exp_UAV + height ** 2) 277 | 278 | # 接收端噪声功率 279 | noise_pow = bw * self.noise_pdf + 1e-9 280 | # 接收端信噪比 281 | SNR = signal_pow / noise_pow 282 | 283 | SNR[SNR < np.sqrt(10)] = 0 #SNR小于5dB时,无法通信,将其强制置零 284 | 285 | 286 | # for i in range(self.dim_c): 287 | # for j in range(len(self.agents)): 288 | # if 10*np.log10(SNR[j][i]) < 15: 289 | # bw[j][i] = 0 290 | 291 | # 信息传输速率 292 | Rate = bw * np.log2(np.ones((len(self.agents), self.dim_c)) + SNR) 293 | 294 | ## 保证每个用户只与一个无人机建立通信链路 295 | mask = np.zeros((len(self.agents), self.dim_c)) #被遮掉的位置为0 296 | # mask = -1 * np.ones((len(self.agents), self.dim_c)) #被遮掉的位置为负数 297 | 298 | for i, landmark in enumerate(self.landmarks): 299 | if (landmark.assign_index is not None) and (landmark.assign_index != np.argmax(Rate[:, i])): 300 | landmark.switch_cout = 1 301 | landmark.assign_index = np.argmax(Rate[:, i]) 302 | else: 303 | landmark.assign_index = np.argmax(Rate[:, i]) 304 | 305 | mask[landmark.assign_index][i] = 1 306 | 307 | 308 | self.bandwidth = mask * bw 309 | self.Rate = mask * Rate 310 | self.SNR = 10*np.log10(mask * SNR + 1e-10) 311 | 312 | for i, agent in enumerate(self.agents): 313 | agent.state.assign = self.bandwidth[i, :]/self.max_band 314 | agent.state.UAV_Rate = self.Rate[i, :] 315 | agent.state.SNRforUser = SNR[i,:] 316 | 317 | # for i, landmark in enumerate(self.landmarks): 318 | # band = self.bandwidth.transpose() 319 | # landmark.assign_index = np.nonzero(band[i]) 320 | 321 | # get collision forces for any contact between two entities 322 | def get_collision_force(self, entity_a, entity_b): 323 | if (not entity_a.collide) or (not entity_b.collide): 324 | return [None, None] # not a collider 325 | if (entity_a is entity_b): 326 | return [None, None] # don't collide against itself 327 | # compute actual distance between entities 328 | delta_pos = entity_a.state.p_pos - entity_b.state.p_pos 329 | dist = np.sqrt(np.sum(np.square(delta_pos))) 330 | # minimum allowable distance 331 | dist_min = entity_a.size + entity_b.size 332 | # softmax penetration 333 | k = self.contact_margin 334 | penetration = np.logaddexp(0, -(dist - dist_min)/k)*k 335 | force = self.contact_force * delta_pos / dist * penetration 336 | force_a = +force if entity_a.movable else None 337 | force_b = -force if entity_b.movable else None 338 | return [force_a, force_b] -------------------------------------------------------------------------------- /multiagent/environment_uav.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.envs.registration import EnvSpec 4 | import numpy as np 5 | from multiagent.multi_discrete import MultiDiscrete 6 | 7 | # environment for all agents in the multiagent world 8 | # currently code assumes that no agents will be created/destroyed at runtime! 9 | class MultiAgentEnv(gym.Env): 10 | metadata = { 11 | 'render.modes' : ['human', 'rgb_array'] 12 | } 13 | 14 | def __init__(self, world, reset_callback=None, reward_callback=None, 15 | observation_callback=None, info_callback=None, 16 | done_callback=None, shared_viewer=True): 17 | 18 | self.world = world 19 | self.agents = self.world.policy_agents 20 | # set required vectorized gym env property 21 | self.n = len(world.policy_agents) 22 | # scenario callbacks 23 | self.reset_callback = reset_callback 24 | self.reward_callback = reward_callback 25 | self.observation_callback = observation_callback 26 | self.info_callback = info_callback 27 | self.done_callback = done_callback 28 | # environment parameters 29 | self.discrete_action_space = True 30 | # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector 31 | self.discrete_action_input = False 32 | # if true, even the action is continuous, action will be performed discretely 33 | self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False 34 | # if true, every agent has the same reward 35 | self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False 36 | self.time = 0 37 | 38 | # configure spaces 39 | self.action_space = [] 40 | self.observation_space = [] 41 | for agent in self.agents: 42 | total_action_space = [] 43 | # physical action space 44 | if self.discrete_action_space: 45 | u_action_space = spaces.Discrete(world.dim_p * 2 + 1) 46 | else: 47 | u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32) 48 | if agent.movable: 49 | total_action_space.append(u_action_space) 50 | # communication action space 51 | if self.discrete_action_space: 52 | c_action_space = spaces.Discrete(world.dim_c) 53 | else: 54 | c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32) 55 | if not agent.silent: 56 | total_action_space.append(c_action_space) 57 | # total action space 58 | if len(total_action_space) > 1: 59 | # all action spaces are discrete, so simplify to MultiDiscrete action space 60 | if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]): 61 | act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space]) 62 | else: 63 | act_space = spaces.Tuple(total_action_space) 64 | self.action_space.append(act_space) 65 | else: 66 | self.action_space.append(total_action_space[0]) 67 | # observation space 68 | obs_dim = len(observation_callback(agent, self.world)) 69 | self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32)) 70 | # agent.action.c = np.zeros(self.world.dim_c) 71 | 72 | # rendering 73 | self.shared_viewer = shared_viewer 74 | if self.shared_viewer: 75 | self.viewers = [None] 76 | else: 77 | self.viewers = [None] * self.n 78 | self._reset_render() 79 | 80 | def step(self, action_n): 81 | obs_n = [] 82 | reward_n = [] 83 | done_n = [] 84 | info_n = {'n': []} 85 | self.agents = self.world.policy_agents 86 | # set action for each agent 87 | for i, agent in enumerate(self.agents): 88 | self._set_action(action_n[i], agent, self.action_space[i]) 89 | 90 | # advance world state 91 | self.world.step() 92 | # record observation for each agent 93 | for agent in self.agents: 94 | obs_n.append(self._get_obs(agent)) 95 | reward_n.append(self._get_reward(agent)) 96 | done_n.append(self._get_done(agent)) 97 | 98 | info_n['n'].append(self._get_info(agent)) 99 | 100 | # all agents get total reward in cooperative case 101 | reward = np.sum(reward_n) 102 | if self.shared_reward: 103 | reward_n = [reward] * self.n 104 | 105 | return obs_n, reward_n, done_n, info_n 106 | 107 | def reset(self): 108 | # reset world 109 | self.reset_callback(self.world) 110 | # reset renderer 111 | self._reset_render() 112 | # record observations for each agent 113 | obs_n = [] 114 | self.agents = self.world.policy_agents 115 | for agent in self.agents: 116 | obs_n.append(self._get_obs(agent)) 117 | return obs_n 118 | 119 | # get info used for benchmarking 120 | def _get_info(self, agent): 121 | if self.info_callback is None: 122 | return {} 123 | return self.info_callback(agent, self.world) 124 | 125 | # get observation for a particular agent 126 | def _get_obs(self, agent): 127 | if self.observation_callback is None: 128 | return np.zeros(0) 129 | return self.observation_callback(agent, self.world) 130 | 131 | # get dones for a particular agent 132 | # unused right now -- agents are allowed to go beyond the viewing screen 133 | def _get_done(self, agent): 134 | if self.done_callback is None: 135 | return False 136 | return self.done_callback(agent, self.world) 137 | 138 | # get reward for a particular agent 139 | def _get_reward(self, agent): 140 | if self.reward_callback is None: 141 | return 0.0 142 | return self.reward_callback(agent, self.world) 143 | 144 | # set env action for a particular agent 145 | def _set_action(self, action, agent, action_space, time=None): 146 | agent.action.u = np.zeros(self.world.dim_p) 147 | agent.action.c = np.zeros(self.world.dim_c) 148 | # process action 149 | if isinstance(action_space, MultiDiscrete): 150 | act = [] 151 | size = action_space.high - action_space.low + 1 152 | index = 0 153 | for s in size: 154 | act.append(action[index:(index+s)]) 155 | index += s 156 | action = act 157 | else: 158 | action = [action] 159 | 160 | 161 | if agent.movable: 162 | # physical action 163 | if self.discrete_action_input: 164 | agent.action.u = np.zeros(self.world.dim_p) 165 | # process discrete action 166 | if action[0] == 1: agent.action.u[0] = -1.0 167 | if action[0] == 2: agent.action.u[0] = +1.0 168 | if action[0] == 3: agent.action.u[1] = -1.0 169 | if action[0] == 4: agent.action.u[1] = +1.0 170 | else: 171 | if self.force_discrete_action: 172 | d = np.argmax(action[0]) 173 | action[0][:] = 0.0 174 | action[0][d] = 1.0 175 | if self.discrete_action_space: 176 | agent.action.u[0] = action[0][1] - action[0][2] 177 | agent.action.u[1] = action[0][3] - action[0][4] 178 | else: 179 | agent.action.u = action[0] 180 | sensitivity = 5 181 | if agent.accel is not None: 182 | sensitivity = agent.accel 183 | agent.action.u *= sensitivity 184 | action = action[1:] 185 | if not agent.silent: 186 | # communication action 187 | if self.discrete_action_input: 188 | agent.action.c = np.zeros(self.world.dim_c) 189 | agent.action.c[action[0]] = 1.0 190 | else: 191 | agent.action.c = action[0] 192 | action = action[1:] 193 | # make sure we used all elements of action 194 | assert len(action) == 0 195 | 196 | # reset rendering assets 197 | def _reset_render(self): 198 | self.render_geoms = None 199 | self.render_geoms_xform = None 200 | # self.render_geoms_uav = None 201 | # self.render_geoms_xform_uav = None 202 | 203 | # render environment 204 | def render(self, mode='human'): 205 | # if mode == 'human': 206 | # alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 207 | # message = '' 208 | # for agent in self.world.agents: 209 | # comm = [] 210 | # for other in self.world.agents: 211 | # if other is agent: continue 212 | # if np.all(other.state.c == 0): 213 | # word = '_' 214 | # else: 215 | # word = alphabet[np.argmax(other.state.c)] 216 | # message += (other.name + ' to ' + agent.name + ': ' + word + ' ') 217 | # print(message) 218 | 219 | for i in range(len(self.viewers)): 220 | # create viewers (if necessary) 221 | if self.viewers[i] is None: 222 | # import rendering only if we need it (and don't import for headless machines) 223 | #from gym.envs.classic_control import rendering 224 | from multiagent import rendering 225 | self.viewers[i] = rendering.Viewer(700,700) 226 | 227 | # create rendering geometry 228 | if self.render_geoms is None: 229 | # import rendering only if we need it (and don't import for headless machines) 230 | #from gym.envs.classic_control import rendering 231 | from multiagent import rendering 232 | self.render_geoms = [] 233 | self.render_geoms_xform = [] 234 | self.render_geoms_uav = [] 235 | self.render_geoms_xform_uav = [] 236 | self.render_geoms_line = [] 237 | 238 | 239 | range_color = [0.25, 0.25, 0.75] 240 | for entity in self.world.entities: 241 | geom = rendering.make_circle(entity.size) 242 | xform = rendering.Transform() 243 | 244 | if 'agent' in entity.name: 245 | geom.set_color(*entity.color, alpha=0.5) 246 | 247 | geom_uav = rendering.make_circle(entity.size*4) 248 | xform_uav = rendering.Transform() 249 | geom_uav.set_color(*range_color, alpha=0.25) 250 | geom_uav.add_attr(xform_uav) 251 | 252 | self.render_geoms_uav.append(geom_uav) 253 | self.render_geoms_xform_uav.append(xform_uav) 254 | 255 | else: 256 | geom.set_color(*entity.color) 257 | 258 | geom_line = rendering.Line(entity.state.p_pos, self.world.agents[entity.assign_index].state.p_pos) 259 | self.render_geoms_line.append(geom_line) 260 | 261 | geom.add_attr(xform) 262 | self.render_geoms.append(geom) 263 | self.render_geoms_xform.append(xform) 264 | 265 | # add geoms to viewer 266 | for viewer in self.viewers: 267 | viewer.geoms = [] 268 | for geom in self.render_geoms: 269 | viewer.add_geom(geom) 270 | for geom_uav in self.render_geoms_uav: 271 | viewer.add_geom(geom_uav) 272 | for geom_line in self.render_geoms_line: 273 | viewer.add_geom(geom_line) 274 | 275 | results = [] 276 | for i in range(len(self.viewers)): 277 | from multiagent import rendering 278 | # update bounds to center around agent 279 | cam_range = 1 280 | if self.shared_viewer: 281 | pos = np.zeros(self.world.dim_p) 282 | else: 283 | pos = self.agents[i].state.p_pos 284 | self.viewers[i].set_bounds(pos[0]-cam_range, pos[0]+cam_range, pos[1]-cam_range, pos[1]+cam_range) 285 | # update geometry positions 286 | for e, entity in enumerate(self.world.entities): 287 | self.render_geoms_xform[e].set_translation(*entity.state.p_pos) 288 | for a, agent in enumerate(self.world.policy_agents): 289 | self.render_geoms_xform_uav[a].set_translation(*agent.state.p_pos) 290 | for l, landmark in enumerate(self.world.landmarks): 291 | self.render_geoms_line[l].start = landmark.state.p_pos 292 | self.render_geoms_line[l].end = self.world.agents[landmark.assign_index].state.p_pos 293 | 294 | # render to display or array 295 | results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array')) 296 | 297 | return results 298 | 299 | # create receptor field locations in local coordinate frame 300 | def _make_receptor_locations(self, agent): 301 | receptor_type = 'polar' 302 | range_min = 0.05 * 2.0 303 | range_max = 1.00 304 | dx = [] 305 | # circular receptive field 306 | if receptor_type == 'polar': 307 | for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False): 308 | for distance in np.linspace(range_min, range_max, 3): 309 | dx.append(distance * np.array([np.cos(angle), np.sin(angle)])) 310 | # add origin 311 | dx.append(np.array([0.0, 0.0])) 312 | # grid receptive field 313 | if receptor_type == 'grid': 314 | for x in np.linspace(-range_max, +range_max, 5): 315 | for y in np.linspace(-range_max, +range_max, 5): 316 | dx.append(np.array([x,y])) 317 | return dx -------------------------------------------------------------------------------- /multiagent/multi_discrete.py: -------------------------------------------------------------------------------- 1 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates) 2 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py) 3 | 4 | import numpy as np 5 | 6 | import gym 7 | from gym.spaces import prng 8 | 9 | class MultiDiscrete(gym.Space): 10 | """ 11 | - The multi-discrete action space consists of a series of discrete action spaces with different parameters 12 | - It can be adapted to both a Discrete action space or a continuous (Box) action space 13 | - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space 14 | - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space 15 | where the discrete action space can take any integers from `min` to `max` (both inclusive) 16 | Note: A value of 0 always need to represent the NOOP action. 17 | e.g. Nintendo Game Controller 18 | - Can be conceptualized as 3 discrete action spaces: 19 | 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4 20 | 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 21 | 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 22 | - Can be initialized as 23 | MultiDiscrete([ [0,4], [0,1], [0,1] ]) 24 | """ 25 | def __init__(self, array_of_param_array): 26 | self.low = np.array([x[0] for x in array_of_param_array]) 27 | self.high = np.array([x[1] for x in array_of_param_array]) 28 | self.num_discrete_space = self.low.shape[0] 29 | 30 | def sample(self): 31 | """ Returns a array with one sample from each discrete action space """ 32 | # For each row: round(random .* (max - min) + min, 0) 33 | random_array = prng.np_random.rand(self.num_discrete_space) 34 | return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)] 35 | def contains(self, x): 36 | return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all() 37 | 38 | @property 39 | def shape(self): 40 | return self.num_discrete_space 41 | def __repr__(self): 42 | return "MultiDiscrete" + str(self.num_discrete_space) 43 | def __eq__(self, other): 44 | return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high) -------------------------------------------------------------------------------- /multiagent/policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pyglet.window import key 3 | 4 | # individual agent policy 5 | class Policy(object): 6 | def __init__(self): 7 | pass 8 | def action(self, obs): 9 | raise NotImplementedError() 10 | 11 | # interactive policy based on keyboard input 12 | # hard-coded to deal only with movement, not communication 13 | class InteractivePolicy(Policy): 14 | def __init__(self, env, agent_index): 15 | super(InteractivePolicy, self).__init__() 16 | self.env = env 17 | # hard-coded keyboard events 18 | self.move = [False for i in range(4)] 19 | self.comm = [False for i in range(env.world.dim_c)] 20 | # register keyboard events with this environment's window 21 | env.viewers[agent_index].window.on_key_press = self.key_press 22 | env.viewers[agent_index].window.on_key_release = self.key_release 23 | 24 | def action(self, obs): 25 | # ignore observation and just act based on keyboard events 26 | if self.env.discrete_action_input: 27 | u = 0 28 | if self.move[0]: u = 1 29 | if self.move[1]: u = 2 30 | if self.move[2]: u = 4 31 | if self.move[3]: u = 3 32 | else: 33 | u = np.zeros(5) # 5-d because of no-move action 34 | if self.move[0]: u[1] += 1.0 35 | if self.move[1]: u[2] += 1.0 36 | if self.move[3]: u[3] += 1.0 37 | if self.move[2]: u[4] += 1.0 38 | if True not in self.move: 39 | u[0] += 1.0 40 | return np.concatenate([u, np.zeros(self.env.world.dim_c)]) 41 | 42 | # keyboard event callbacks 43 | def key_press(self, k, mod): 44 | if k==key.LEFT: self.move[0] = True 45 | if k==key.RIGHT: self.move[1] = True 46 | if k==key.UP: self.move[2] = True 47 | if k==key.DOWN: self.move[3] = True 48 | def key_release(self, k, mod): 49 | if k==key.LEFT: self.move[0] = False 50 | if k==key.RIGHT: self.move[1] = False 51 | if k==key.UP: self.move[2] = False 52 | if k==key.DOWN: self.move[3] = False 53 | -------------------------------------------------------------------------------- /multiagent/rendering.py: -------------------------------------------------------------------------------- 1 | """ 2 | 2D rendering framework 3 | """ 4 | from __future__ import division 5 | import os 6 | import six 7 | import sys 8 | 9 | if "Apple" in sys.version: 10 | if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ: 11 | os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib' 12 | # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite 13 | 14 | from gym.utils import reraise 15 | from gym import error 16 | 17 | try: 18 | import pyglet 19 | except ImportError as e: 20 | reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.") 21 | 22 | try: 23 | from pyglet.gl import * 24 | except ImportError as e: 25 | reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python '") 26 | 27 | import math 28 | import numpy as np 29 | 30 | RAD2DEG = 57.29577951308232 31 | 32 | def get_display(spec): 33 | """Convert a display specification (such as :0) into an actual Display 34 | object. 35 | 36 | Pyglet only supports multiple Displays on Linux. 37 | """ 38 | if spec is None: 39 | return None 40 | elif isinstance(spec, six.string_types): 41 | return pyglet.canvas.Display(spec) 42 | else: 43 | raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec)) 44 | 45 | class Viewer(object): 46 | def __init__(self, width, height, display=None): 47 | display = get_display(display) 48 | 49 | self.width = width 50 | self.height = height 51 | 52 | self.window = pyglet.window.Window(width=width, height=height, display=display) 53 | self.window.on_close = self.window_closed_by_user 54 | self.geoms = [] 55 | self.onetime_geoms = [] 56 | self.transform = Transform() 57 | 58 | glEnable(GL_BLEND) 59 | # glEnable(GL_MULTISAMPLE) 60 | glEnable(GL_LINE_SMOOTH) 61 | # glHint(GL_LINE_SMOOTH_HINT, GL_DONT_CARE) 62 | glHint(GL_LINE_SMOOTH_HINT, GL_NICEST) 63 | glLineWidth(2.0) 64 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) 65 | 66 | def close(self): 67 | self.window.close() 68 | 69 | def window_closed_by_user(self): 70 | self.close() 71 | 72 | def set_bounds(self, left, right, bottom, top): 73 | assert right > left and top > bottom 74 | scalex = self.width/(right-left) 75 | scaley = self.height/(top-bottom) 76 | self.transform = Transform( 77 | translation=(-left*scalex, -bottom*scaley), 78 | scale=(scalex, scaley)) 79 | 80 | def add_geom(self, geom): 81 | self.geoms.append(geom) 82 | 83 | def add_onetime(self, geom): 84 | self.onetime_geoms.append(geom) 85 | 86 | def render(self, return_rgb_array=False): 87 | glClearColor(1,1,1,1) 88 | self.window.clear() 89 | self.window.switch_to() 90 | self.window.dispatch_events() 91 | self.transform.enable() 92 | for geom in self.geoms: 93 | geom.render() 94 | for geom in self.onetime_geoms: 95 | geom.render() 96 | self.transform.disable() 97 | arr = None 98 | if return_rgb_array: 99 | buffer = pyglet.image.get_buffer_manager().get_color_buffer() 100 | image_data = buffer.get_image_data() 101 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') 102 | # In https://github.com/openai/gym-http-api/issues/2, we 103 | # discovered that someone using Xmonad on Arch was having 104 | # a window of size 598 x 398, though a 600 x 400 window 105 | # was requested. (Guess Xmonad was preserving a pixel for 106 | # the boundary.) So we use the buffer height/width rather 107 | # than the requested one. 108 | arr = arr.reshape(buffer.height, buffer.width, 4) 109 | arr = arr[::-1,:,0:3] 110 | self.window.flip() 111 | self.onetime_geoms = [] 112 | return arr 113 | 114 | # Convenience 115 | def draw_circle(self, radius=10, res=30, filled=True, **attrs): 116 | geom = make_circle(radius=radius, res=res, filled=filled) 117 | _add_attrs(geom, attrs) 118 | self.add_onetime(geom) 119 | return geom 120 | 121 | def draw_polygon(self, v, filled=True, **attrs): 122 | geom = make_polygon(v=v, filled=filled) 123 | _add_attrs(geom, attrs) 124 | self.add_onetime(geom) 125 | return geom 126 | 127 | def draw_polyline(self, v, **attrs): 128 | geom = make_polyline(v=v) 129 | _add_attrs(geom, attrs) 130 | self.add_onetime(geom) 131 | return geom 132 | 133 | def draw_line(self, start, end, **attrs): 134 | geom = Line(start, end) 135 | _add_attrs(geom, attrs) 136 | self.add_onetime(geom) 137 | return geom 138 | 139 | def get_array(self): 140 | self.window.flip() 141 | image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() 142 | self.window.flip() 143 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') 144 | arr = arr.reshape(self.height, self.width, 4) 145 | return arr[::-1,:,0:3] 146 | 147 | def _add_attrs(geom, attrs): 148 | if "color" in attrs: 149 | geom.set_color(*attrs["color"]) 150 | if "linewidth" in attrs: 151 | geom.set_linewidth(attrs["linewidth"]) 152 | 153 | class Geom(object): 154 | def __init__(self): 155 | self._color=Color((0, 0, 0, 1.0)) 156 | self.attrs = [self._color] 157 | def render(self): 158 | for attr in reversed(self.attrs): 159 | attr.enable() 160 | self.render1() 161 | for attr in self.attrs: 162 | attr.disable() 163 | def render1(self): 164 | raise NotImplementedError 165 | def add_attr(self, attr): 166 | self.attrs.append(attr) 167 | def set_color(self, r, g, b, alpha=1): 168 | self._color.vec4 = (r, g, b, alpha) 169 | 170 | class Attr(object): 171 | def enable(self): 172 | raise NotImplementedError 173 | def disable(self): 174 | pass 175 | 176 | class Transform(Attr): 177 | def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)): 178 | self.set_translation(*translation) 179 | self.set_rotation(rotation) 180 | self.set_scale(*scale) 181 | def enable(self): 182 | glPushMatrix() 183 | glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint 184 | glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0) 185 | glScalef(self.scale[0], self.scale[1], 1) 186 | def disable(self): 187 | glPopMatrix() 188 | def set_translation(self, newx, newy): 189 | self.translation = (float(newx), float(newy)) 190 | def set_rotation(self, new): 191 | self.rotation = float(new) 192 | def set_scale(self, newx, newy): 193 | self.scale = (float(newx), float(newy)) 194 | 195 | class Color(Attr): 196 | def __init__(self, vec4): 197 | self.vec4 = vec4 198 | def enable(self): 199 | glColor4f(*self.vec4) 200 | 201 | class LineStyle(Attr): 202 | def __init__(self, style): 203 | self.style = style 204 | def enable(self): 205 | glEnable(GL_LINE_STIPPLE) 206 | glLineStipple(1, self.style) 207 | def disable(self): 208 | glDisable(GL_LINE_STIPPLE) 209 | 210 | class LineWidth(Attr): 211 | def __init__(self, stroke): 212 | self.stroke = stroke 213 | def enable(self): 214 | glLineWidth(self.stroke) 215 | 216 | class Point(Geom): 217 | def __init__(self): 218 | Geom.__init__(self) 219 | def render1(self): 220 | glBegin(GL_POINTS) # draw point 221 | glVertex3f(0.0, 0.0, 0.0) 222 | glEnd() 223 | 224 | class FilledPolygon(Geom): 225 | def __init__(self, v): 226 | Geom.__init__(self) 227 | self.v = v 228 | def render1(self): 229 | if len(self.v) == 4 : glBegin(GL_QUADS) 230 | elif len(self.v) > 4 : glBegin(GL_POLYGON) 231 | else: glBegin(GL_TRIANGLES) 232 | for p in self.v: 233 | glVertex3f(p[0], p[1],0) # draw each vertex 234 | glEnd() 235 | 236 | color = (self._color.vec4[0] * 0.5, self._color.vec4[1] * 0.5, self._color.vec4[2] * 0.5, self._color.vec4[3] * 0.5) 237 | glColor4f(*color) 238 | glBegin(GL_LINE_LOOP) 239 | for p in self.v: 240 | glVertex3f(p[0], p[1],0) # draw each vertex 241 | glEnd() 242 | 243 | def make_circle(radius=10, res=30, filled=True): 244 | points = [] 245 | for i in range(res): 246 | ang = 2*math.pi*i / res 247 | points.append((math.cos(ang)*radius, math.sin(ang)*radius)) 248 | if filled: 249 | return FilledPolygon(points) 250 | else: 251 | return PolyLine(points, True) 252 | 253 | def make_line(start,end): 254 | return Line(start, end) 255 | 256 | 257 | def make_polygon(v, filled=True): 258 | if filled: return FilledPolygon(v) 259 | else: return PolyLine(v, True) 260 | 261 | def make_polyline(v): 262 | return PolyLine(v, False) 263 | 264 | def make_capsule(length, width): 265 | l, r, t, b = 0, length, width/2, -width/2 266 | box = make_polygon([(l,b), (l,t), (r,t), (r,b)]) 267 | circ0 = make_circle(width/2) 268 | circ1 = make_circle(width/2) 269 | circ1.add_attr(Transform(translation=(length, 0))) 270 | geom = Compound([box, circ0, circ1]) 271 | return geom 272 | 273 | class Compound(Geom): 274 | def __init__(self, gs): 275 | Geom.__init__(self) 276 | self.gs = gs 277 | for g in self.gs: 278 | g.attrs = [a for a in g.attrs if not isinstance(a, Color)] 279 | def render1(self): 280 | for g in self.gs: 281 | g.render() 282 | 283 | class PolyLine(Geom): 284 | def __init__(self, v, close): 285 | Geom.__init__(self) 286 | self.v = v 287 | self.close = close 288 | self.linewidth = LineWidth(1) 289 | self.add_attr(self.linewidth) 290 | def render1(self): 291 | glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP) 292 | for p in self.v: 293 | glVertex3f(p[0], p[1],0) # draw each vertex 294 | glEnd() 295 | def set_linewidth(self, x): 296 | self.linewidth.stroke = x 297 | 298 | class Line(Geom): 299 | def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)): 300 | Geom.__init__(self) 301 | self.start = start 302 | self.end = end 303 | self.linewidth = LineWidth(1) 304 | self.add_attr(self.linewidth) 305 | 306 | def render1(self): 307 | glBegin(GL_LINES) 308 | glVertex2f(*self.start) 309 | glVertex2f(*self.end) 310 | glEnd() 311 | 312 | class Image(Geom): 313 | def __init__(self, fname, width, height): 314 | Geom.__init__(self) 315 | self.width = width 316 | self.height = height 317 | img = pyglet.image.load(fname) 318 | self.img = img 319 | self.flip = False 320 | def render1(self): 321 | self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height) 322 | 323 | # ================================================================ 324 | 325 | class SimpleImageViewer(object): 326 | def __init__(self, display=None): 327 | self.window = None 328 | self.isopen = False 329 | self.display = display 330 | def imshow(self, arr): 331 | if self.window is None: 332 | height, width, channels = arr.shape 333 | self.window = pyglet.window.Window(width=width, height=height, display=self.display) 334 | self.width = width 335 | self.height = height 336 | self.isopen = True 337 | assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape" 338 | image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3) 339 | self.window.clear() 340 | self.window.switch_to() 341 | self.window.dispatch_events() 342 | image.blit(0,0) 343 | self.window.flip() 344 | def close(self): 345 | if self.isopen: 346 | self.window.close() 347 | self.isopen = False 348 | def __del__(self): 349 | self.close() -------------------------------------------------------------------------------- /multiagent/scenario.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # defines scenario upon which the world is built 4 | class BaseScenario(object): 5 | # create elements of the world 6 | def make_world(self): 7 | raise NotImplementedError() 8 | # create initial conditions of the world 9 | def reset_world(self, world): 10 | raise NotImplementedError() 11 | -------------------------------------------------------------------------------- /multiagent/scenarios/__init__.py: -------------------------------------------------------------------------------- 1 | import imp 2 | import os.path as osp 3 | 4 | 5 | def load(name): 6 | pathname = osp.join(osp.dirname(__file__), name) 7 | return imp.load_source('', pathname) 8 | -------------------------------------------------------------------------------- /multiagent/scenarios/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/scenarios/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /multiagent/scenarios/__pycache__/simple_adversary.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/scenarios/__pycache__/simple_adversary.cpython-36.pyc -------------------------------------------------------------------------------- /multiagent/scenarios/__pycache__/simple_collaborative_comm.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/scenarios/__pycache__/simple_collaborative_comm.cpython-36.pyc -------------------------------------------------------------------------------- /multiagent/scenarios/simple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiagent.core import World, Agent, Landmark 3 | from multiagent.scenario import BaseScenario 4 | class Scenario(BaseScenario): 5 | def make_world(self): 6 | world = World() 7 | # add agents 8 | world.agents = [Agent() for i in range(1)] 9 | for i, agent in enumerate(world.agents): 10 | agent.name = 'agent %d' % i 11 | agent.collide = False 12 | agent.silent = True 13 | # add landmarks 14 | world.landmarks = [Landmark() for i in range(1)] 15 | for i, landmark in enumerate(world.landmarks): 16 | landmark.name = 'landmark %d' % i 17 | landmark.collide = False 18 | landmark.movable = False 19 | # make initial conditions 20 | self.reset_world(world) 21 | return world 22 | 23 | def reset_world(self, world): 24 | # random properties for agents 25 | for i, agent in enumerate(world.agents): 26 | agent.color = np.array([0.25,0.25,0.25]) 27 | # random properties for landmarks 28 | for i, landmark in enumerate(world.landmarks): 29 | landmark.color = np.array([0.75,0.75,0.75]) 30 | world.landmarks[0].color = np.array([0.75,0.25,0.25]) 31 | # set random initial states 32 | for agent in world.agents: 33 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p) 34 | agent.state.p_vel = np.zeros(world.dim_p) 35 | agent.state.c = np.zeros(world.dim_c) 36 | for i, landmark in enumerate(world.landmarks): 37 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p) 38 | landmark.state.p_vel = np.zeros(world.dim_p) 39 | 40 | def reward(self, agent, world): 41 | dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos)) 42 | return -dist2 43 | 44 | def observation(self, agent, world): 45 | # get positions of all entities in this agent's reference frame 46 | entity_pos = [] 47 | for entity in world.landmarks: 48 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 49 | return np.concatenate([agent.state.p_vel] + entity_pos) 50 | -------------------------------------------------------------------------------- /multiagent/scenarios/simple_adversary.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiagent.core import World, Agent, Landmark 3 | from multiagent.scenario import BaseScenario 4 | 5 | 6 | class Scenario(BaseScenario): 7 | 8 | def make_world(self): 9 | world = World() 10 | # set any world properties first 11 | world.dim_c = 2 12 | num_agents = 3 13 | world.num_agents = num_agents 14 | num_adversaries = 1 15 | num_landmarks = num_agents - 1 16 | # add agents 17 | world.agents = [Agent() for i in range(num_agents)] 18 | for i, agent in enumerate(world.agents): 19 | agent.name = 'agent %d' % i 20 | agent.collide = False 21 | agent.silent = True 22 | agent.adversary = True if i < num_adversaries else False 23 | agent.size = 0.15 24 | # add landmarks 25 | world.landmarks = [Landmark() for i in range(num_landmarks)] 26 | for i, landmark in enumerate(world.landmarks): 27 | landmark.name = 'landmark %d' % i 28 | landmark.collide = False 29 | landmark.movable = False 30 | landmark.size = 0.08 31 | # make initial conditions 32 | self.reset_world(world) 33 | return world 34 | 35 | def reset_world(self, world): 36 | # random properties for agents 37 | world.agents[0].color = np.array([0.85, 0.35, 0.35]) 38 | for i in range(1, world.num_agents): 39 | world.agents[i].color = np.array([0.35, 0.35, 0.85]) 40 | # random properties for landmarks 41 | for i, landmark in enumerate(world.landmarks): 42 | landmark.color = np.array([0.15, 0.15, 0.15]) 43 | # set goal landmark 44 | goal = np.random.choice(world.landmarks) 45 | goal.color = np.array([0.15, 0.65, 0.15]) 46 | for agent in world.agents: 47 | agent.goal_a = goal 48 | # set random initial states 49 | for agent in world.agents: 50 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 51 | agent.state.p_vel = np.zeros(world.dim_p) 52 | agent.state.c = np.zeros(world.dim_c) 53 | for i, landmark in enumerate(world.landmarks): 54 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 55 | landmark.state.p_vel = np.zeros(world.dim_p) 56 | 57 | def benchmark_data(self, agent, world): 58 | # returns data for benchmarking purposes 59 | if agent.adversary: 60 | return np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)) 61 | else: 62 | dists = [] 63 | for l in world.landmarks: 64 | dists.append(np.sum(np.square(agent.state.p_pos - l.state.p_pos))) 65 | dists.append(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) 66 | return tuple(dists) 67 | 68 | # return all agents that are not adversaries 69 | def good_agents(self, world): 70 | return [agent for agent in world.agents if not agent.adversary] 71 | 72 | # return all adversarial agents 73 | def adversaries(self, world): 74 | return [agent for agent in world.agents if agent.adversary] 75 | 76 | def reward(self, agent, world): 77 | # Agents are rewarded based on minimum agent distance to each landmark 78 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) 79 | 80 | def agent_reward(self, agent, world): 81 | # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it 82 | shaped_reward = True 83 | shaped_adv_reward = True 84 | 85 | # Calculate negative reward for adversary 86 | adversary_agents = self.adversaries(world) 87 | if shaped_adv_reward: # distance-based adversary reward 88 | adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents]) 89 | else: # proximity-based adversary reward (binary) 90 | adv_rew = 0 91 | for a in adversary_agents: 92 | if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size: 93 | adv_rew -= 5 94 | 95 | # Calculate positive reward for agents 96 | good_agents = self.good_agents(world) 97 | if shaped_reward: # distance-based agent reward 98 | pos_rew = -min( 99 | [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) 100 | else: # proximity-based agent reward (binary) 101 | pos_rew = 0 102 | if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \ 103 | < 2 * agent.goal_a.size: 104 | pos_rew += 5 105 | pos_rew -= min( 106 | [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) 107 | return pos_rew + adv_rew 108 | 109 | def adversary_reward(self, agent, world): 110 | # Rewarded based on proximity to the goal landmark 111 | shaped_reward = True 112 | if shaped_reward: # distance-based reward 113 | return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)) 114 | else: # proximity-based reward (binary) 115 | adv_rew = 0 116 | if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size: 117 | adv_rew += 5 118 | return adv_rew 119 | 120 | 121 | def observation(self, agent, world): 122 | # get positions of all entities in this agent's reference frame 123 | entity_pos = [] 124 | for entity in world.landmarks: 125 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 126 | # entity colors 127 | entity_color = [] 128 | for entity in world.landmarks: 129 | entity_color.append(entity.color) 130 | # communication of all other agents 131 | other_pos = [] 132 | for other in world.agents: 133 | if other is agent: continue 134 | other_pos.append(other.state.p_pos - agent.state.p_pos) 135 | 136 | if not agent.adversary: 137 | return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos) 138 | else: 139 | return np.concatenate(entity_pos + other_pos) 140 | -------------------------------------------------------------------------------- /multiagent/scenarios/simple_collaborative_comm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiagent.core_uav import World, Agent, Landmark 3 | from multiagent.scenario import BaseScenario 4 | 5 | class Scenario(BaseScenario): 6 | def make_world(self): 7 | world = World() 8 | # set any world properties first 9 | world.dim_c = 10 10 | num_uav = 3 11 | world.collaborative = True # whether agents share rewards 12 | # add agents 13 | world.agents = [Agent() for i in range(num_uav)] 14 | for i, agent in enumerate(world.agents): 15 | agent.name = 'agent %d' % i 16 | agent.collide = True 17 | # add landmarks 18 | world.landmarks = [Landmark() for i in range(world.dim_c)] 19 | for i, landmark in enumerate(world.landmarks): 20 | landmark.name = 'landmark %d' % i 21 | landmark.collide = False 22 | landmark.movable = True 23 | 24 | # make initial conditions 25 | self.reset_world(world) 26 | return world 27 | 28 | def reset_world(self, world): 29 | # set color for landmarks 30 | # world.landmarks[0].color = np.array([0.25, 0.25, 0.75]) 31 | # world.landmarks[1].color = np.array([0.25, 0.25, 0.75]) 32 | # world.landmarks[2].color = np.array([0.25, 0.25, 0.75]) 33 | # world.landmarks[3].color = np.array([0.25, 0.25, 0.75]) 34 | # world.landmarks[4].color = np.array([0.25, 0.25, 0.75]) 35 | 36 | # set random initial states 37 | # for i, agent in enumerate(world.agents): 38 | # agent.color = np.array([0.35, 0.35, 0.85]) 39 | # # random properties for landmarks 40 | # for i, landmark in enumerate(world.landmarks): 41 | # landmark.color = np.array([0.25, 0.25, 0.25]) 42 | # set random initial states 43 | for i, agent in enumerate(world.agents): 44 | agent.color = np.array([[0.35, 0.35, 0.85], [0.25, 0.75, 0.25], [0.5, 0.15, 0.35]])[i] 45 | # agent.state.p_pos = np.array([[0.35, 0.35], [-0.35, -0.35]])[i] 46 | agent.state.p_pos = np.array([[0., 0.5], [-0.43, -0.25], [0.43, -0.25]])[i] 47 | # agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 48 | agent.state.p_vel = np.zeros(world.dim_p) 49 | agent.state.c = np.zeros(world.dim_c) 50 | agent.state.assign = np.zeros(world.dim_c) 51 | agent.state.SNRforUser = np.zeros(world.dim_c) 52 | 53 | for i, landmark in enumerate(world.landmarks): 54 | # landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 55 | 56 | # landmark.state.p_pos = np.array([[-0.8154612 , -0.24502182], [0.89699076, -0.14581629], 57 | # [-0.324932, 0.60295496], [0.62360916, -0.52245444], 58 | # [0.65862443, 0.23426809]])[i] 59 | 60 | landmark.state.p_pos = np.array([[-0.8154612, -0.24502182], [0.89699076, -0.14581629], 61 | [-0.3249325, 0.60295496], [0.62360916, -0.52245444], 62 | [0.65862443, 0.23426809], [0.29590076, 0.54571629], 63 | [-0.89699076, -0.74682629], [0.84629571, -0.98582649], 64 | [-0.89699076, 0.14581629], [-0.14699076, -0.75584629]])[i] 65 | 66 | # landmark.state.p_pos = np.array([[0.89699076, -0.14581629], [-0.8154612, -0.24502182], 67 | # [0.65862443, 0.23426809], [-0.3249325, 0.60295496], 68 | # [0.62360916, -0.52245444], [0.29590076, 0.54571629], 69 | # [-0.89699076, -0.74682629], [-0.89699076, 0.14581629], 70 | # [0.84629571, -0.98582649], [-0.14699076, -0.75584629]])[i] 71 | 72 | landmark.state.p_vel = np.zeros(world.dim_p) 73 | # landmark.state.p_vel = np.random.uniform(-1, +1, world.dim_p) 74 | landmark.color = np.array([0.25, 0.25, 0.25]) 75 | landmark.assign_index = None 76 | landmark.switch_cout = 0 77 | 78 | def benchmark_data(self, agent, world): 79 | rew = 0 80 | collisions = 0 81 | occupied_landmarks = 0 82 | min_dists = 0 83 | for l in world.landmarks: 84 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents] 85 | min_dists += min(dists) 86 | rew -= min(dists) 87 | if min(dists) < 0.1: 88 | occupied_landmarks += 1 89 | if agent.collide: 90 | for a in world.agents: 91 | if self.is_collision(a, agent): 92 | rew -= 1 93 | collisions += 1 94 | return rew, collisions, min_dists, occupied_landmarks 95 | 96 | def is_collision(self, agent1, agent2): 97 | delta_pos = agent1.state.p_pos - agent2.state.p_pos 98 | dist = np.sqrt(np.sum(np.square(delta_pos))) 99 | dist_min = agent1.size + agent2.size 100 | return True if dist < dist_min else False 101 | 102 | def reward(self, agent, world): 103 | p = 0.7 104 | try: 105 | 106 | # reward = 10*np.log(p*np.min(world.Rate[np.nonzero(world.Rate)])+(1-p)*np.average(world.Rate[np.nonzero(world.Rate)])) 107 | # reward = 10 * np.log(np.min(world.Rate[np.nonzero(world.Rate)])) 108 | 109 | # 下面的方式寻找最小信息率的用户比较保险,避免当所有的rate均为零时,上一种方式会出错 110 | min_rate = None 111 | for i in range(world.dim_c): 112 | min_rate = np.max(world.Rate[:,i]) if min_rate is None else min(min_rate, np.max(world.Rate[:,i])) 113 | reward = 10*np.log(min_rate+1e-9) 114 | 115 | # reward = np.log(np.min(agent.state.UAV_Rate[np.nonzero(agent.state.UAV_Rate)])) 116 | # reward = np.log(np.sum(world.Rate)) 117 | except ValueError: 118 | reward = 0 119 | 120 | # reward = np.log(np.sum(world.Rate)) 121 | 122 | for i in range(world.dim_c): 123 | if np.max(world.SNR[:, i]) < 15: 124 | reward -= 10 125 | #print('SNR not saftistied') 126 | 127 | for agent in world.agents: 128 | if not (-1 <= agent.state.p_pos[0] <= 1 and -1 <= agent.state.p_pos[1] <= 1): 129 | reward -= 100 130 | # print(entity.name) 131 | # print(entity.state.p_pos) 132 | # raise Exception('超出边界') 133 | 134 | # # 用户的切换次数作为负奖励 135 | # for landmark in world.landmarks: 136 | # reward -= landmark.switch_cout 137 | # landmark.switch_cout = 0 138 | 139 | if agent.collide: 140 | for a in world.agents: 141 | if a.name is not agent.name: 142 | if self.is_collision(a, agent): 143 | reward -= 100 144 | 145 | return reward 146 | 147 | def observation(self, agent, world): 148 | # get positions of all entities in this agent's reference frame 149 | entity_pos = [] 150 | for entity in world.landmarks: # world.entities: 151 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 152 | # entity colors 153 | entity_color = [] 154 | for entity in world.landmarks: # world.entities: 155 | entity_color.append(entity.color) 156 | # communication of all other agents 157 | other_comm = [] 158 | other_pos = [] 159 | assign = [] 160 | SNRforUser = [] 161 | for other in world.agents: 162 | SNRforUser.append(other.state.SNRforUser) 163 | assign.append(other.state.assign) 164 | if other is agent: continue 165 | other_comm.append(other.state.c) 166 | other_pos.append(other.state.p_pos - agent.state.p_pos) 167 | 168 | maxSNR = np.max(SNRforUser) 169 | normalized_SNR = SNRforUser.copy() 170 | for i, item in enumerate(SNRforUser): 171 | if maxSNR != 0: normalized_SNR[i] = item / maxSNR 172 | # return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_comm) # original code 173 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + assign + normalized_SNR) -------------------------------------------------------------------------------- /multiagent/scenarios/simple_crypto.py: -------------------------------------------------------------------------------- 1 | """ 2 | Scenario: 3 | 1 speaker, 2 listeners (one of which is an adversary). Good agents rewarded for proximity to goal, and distance from 4 | adversary to goal. Adversary is rewarded for its distance to the goal. 5 | """ 6 | 7 | 8 | import numpy as np 9 | from multiagent.core import World, Agent, Landmark 10 | from multiagent.scenario import BaseScenario 11 | import random 12 | 13 | 14 | class CryptoAgent(Agent): 15 | def __init__(self): 16 | super(CryptoAgent, self).__init__() 17 | self.key = None 18 | 19 | class Scenario(BaseScenario): 20 | 21 | def make_world(self): 22 | world = World() 23 | # set any world properties first 24 | num_agents = 3 25 | num_adversaries = 1 26 | num_landmarks = 2 27 | world.dim_c = 4 28 | # add agents 29 | world.agents = [CryptoAgent() for i in range(num_agents)] 30 | for i, agent in enumerate(world.agents): 31 | agent.name = 'agent %d' % i 32 | agent.collide = False 33 | agent.adversary = True if i < num_adversaries else False 34 | agent.speaker = True if i == 2 else False 35 | agent.movable = False 36 | # add landmarks 37 | world.landmarks = [Landmark() for i in range(num_landmarks)] 38 | for i, landmark in enumerate(world.landmarks): 39 | landmark.name = 'landmark %d' % i 40 | landmark.collide = False 41 | landmark.movable = False 42 | # make initial conditions 43 | self.reset_world(world) 44 | return world 45 | 46 | 47 | def reset_world(self, world): 48 | # random properties for agents 49 | for i, agent in enumerate(world.agents): 50 | agent.color = np.array([0.25, 0.25, 0.25]) 51 | if agent.adversary: 52 | agent.color = np.array([0.75, 0.25, 0.25]) 53 | agent.key = None 54 | # random properties for landmarks 55 | color_list = [np.zeros(world.dim_c) for i in world.landmarks] 56 | for i, color in enumerate(color_list): 57 | color[i] += 1 58 | for color, landmark in zip(color_list, world.landmarks): 59 | landmark.color = color 60 | # set goal landmark 61 | goal = np.random.choice(world.landmarks) 62 | world.agents[1].color = goal.color 63 | world.agents[2].key = np.random.choice(world.landmarks).color 64 | 65 | for agent in world.agents: 66 | agent.goal_a = goal 67 | 68 | # set random initial states 69 | for agent in world.agents: 70 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 71 | agent.state.p_vel = np.zeros(world.dim_p) 72 | agent.state.c = np.zeros(world.dim_c) 73 | for i, landmark in enumerate(world.landmarks): 74 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 75 | landmark.state.p_vel = np.zeros(world.dim_p) 76 | 77 | 78 | def benchmark_data(self, agent, world): 79 | # returns data for benchmarking purposes 80 | return (agent.state.c, agent.goal_a.color) 81 | 82 | # return all agents that are not adversaries 83 | def good_listeners(self, world): 84 | return [agent for agent in world.agents if not agent.adversary and not agent.speaker] 85 | 86 | # return all agents that are not adversaries 87 | def good_agents(self, world): 88 | return [agent for agent in world.agents if not agent.adversary] 89 | 90 | # return all adversarial agents 91 | def adversaries(self, world): 92 | return [agent for agent in world.agents if agent.adversary] 93 | 94 | def reward(self, agent, world): 95 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) 96 | 97 | def agent_reward(self, agent, world): 98 | # Agents rewarded if Bob can reconstruct message, but adversary (Eve) cannot 99 | good_listeners = self.good_listeners(world) 100 | adversaries = self.adversaries(world) 101 | good_rew = 0 102 | adv_rew = 0 103 | for a in good_listeners: 104 | if (a.state.c == np.zeros(world.dim_c)).all(): 105 | continue 106 | else: 107 | good_rew -= np.sum(np.square(a.state.c - agent.goal_a.color)) 108 | for a in adversaries: 109 | if (a.state.c == np.zeros(world.dim_c)).all(): 110 | continue 111 | else: 112 | adv_l1 = np.sum(np.square(a.state.c - agent.goal_a.color)) 113 | adv_rew += adv_l1 114 | return adv_rew + good_rew 115 | 116 | def adversary_reward(self, agent, world): 117 | # Adversary (Eve) is rewarded if it can reconstruct original goal 118 | rew = 0 119 | if not (agent.state.c == np.zeros(world.dim_c)).all(): 120 | rew -= np.sum(np.square(agent.state.c - agent.goal_a.color)) 121 | return rew 122 | 123 | 124 | def observation(self, agent, world): 125 | # goal color 126 | goal_color = np.zeros(world.dim_color) 127 | if agent.goal_a is not None: 128 | goal_color = agent.goal_a.color 129 | 130 | # get positions of all entities in this agent's reference frame 131 | entity_pos = [] 132 | for entity in world.landmarks: 133 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 134 | # communication of all other agents 135 | comm = [] 136 | for other in world.agents: 137 | if other is agent or (other.state.c is None) or not other.speaker: continue 138 | comm.append(other.state.c) 139 | 140 | confer = np.array([0]) 141 | 142 | if world.agents[2].key is None: 143 | confer = np.array([1]) 144 | key = np.zeros(world.dim_c) 145 | goal_color = np.zeros(world.dim_c) 146 | else: 147 | key = world.agents[2].key 148 | 149 | prnt = False 150 | # speaker 151 | if agent.speaker: 152 | if prnt: 153 | print('speaker') 154 | print(agent.state.c) 155 | print(np.concatenate([goal_color] + [key] + [confer] + [np.random.randn(1)])) 156 | return np.concatenate([goal_color] + [key]) 157 | # listener 158 | if not agent.speaker and not agent.adversary: 159 | if prnt: 160 | print('listener') 161 | print(agent.state.c) 162 | print(np.concatenate([key] + comm + [confer])) 163 | return np.concatenate([key] + comm) 164 | if not agent.speaker and agent.adversary: 165 | if prnt: 166 | print('adversary') 167 | print(agent.state.c) 168 | print(np.concatenate(comm + [confer])) 169 | return np.concatenate(comm) 170 | -------------------------------------------------------------------------------- /multiagent/scenarios/simple_push.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiagent.core import World, Agent, Landmark 3 | from multiagent.scenario import BaseScenario 4 | 5 | class Scenario(BaseScenario): 6 | def make_world(self): 7 | world = World() 8 | # set any world properties first 9 | world.dim_c = 2 10 | num_agents = 2 11 | num_adversaries = 1 12 | num_landmarks = 2 13 | # add agents 14 | world.agents = [Agent() for i in range(num_agents)] 15 | for i, agent in enumerate(world.agents): 16 | agent.name = 'agent %d' % i 17 | agent.collide = True 18 | agent.silent = True 19 | if i < num_adversaries: 20 | agent.adversary = True 21 | else: 22 | agent.adversary = False 23 | # add landmarks 24 | world.landmarks = [Landmark() for i in range(num_landmarks)] 25 | for i, landmark in enumerate(world.landmarks): 26 | landmark.name = 'landmark %d' % i 27 | landmark.collide = False 28 | landmark.movable = False 29 | # make initial conditions 30 | self.reset_world(world) 31 | return world 32 | 33 | def reset_world(self, world): 34 | # random properties for landmarks 35 | for i, landmark in enumerate(world.landmarks): 36 | landmark.color = np.array([0.1, 0.1, 0.1]) 37 | landmark.color[i + 1] += 0.8 38 | landmark.index = i 39 | # set goal landmark 40 | goal = np.random.choice(world.landmarks) 41 | for i, agent in enumerate(world.agents): 42 | agent.goal_a = goal 43 | agent.color = np.array([0.25, 0.25, 0.25]) 44 | if agent.adversary: 45 | agent.color = np.array([0.75, 0.25, 0.25]) 46 | else: 47 | j = goal.index 48 | agent.color[j + 1] += 0.5 49 | # set random initial states 50 | for agent in world.agents: 51 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 52 | agent.state.p_vel = np.zeros(world.dim_p) 53 | agent.state.c = np.zeros(world.dim_c) 54 | for i, landmark in enumerate(world.landmarks): 55 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 56 | landmark.state.p_vel = np.zeros(world.dim_p) 57 | 58 | def reward(self, agent, world): 59 | # Agents are rewarded based on minimum agent distance to each landmark 60 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) 61 | 62 | def agent_reward(self, agent, world): 63 | # the distance to the goal 64 | return -np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) 65 | 66 | def adversary_reward(self, agent, world): 67 | # keep the nearest good agents away from the goal 68 | agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary] 69 | pos_rew = min(agent_dist) 70 | #nearest_agent = world.good_agents[np.argmin(agent_dist)] 71 | #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos))) 72 | neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos))) 73 | #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents]) 74 | return pos_rew - neg_rew 75 | 76 | def observation(self, agent, world): 77 | # get positions of all entities in this agent's reference frame 78 | entity_pos = [] 79 | for entity in world.landmarks: # world.entities: 80 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 81 | # entity colors 82 | entity_color = [] 83 | for entity in world.landmarks: # world.entities: 84 | entity_color.append(entity.color) 85 | # communication of all other agents 86 | comm = [] 87 | other_pos = [] 88 | for other in world.agents: 89 | if other is agent: continue 90 | comm.append(other.state.c) 91 | other_pos.append(other.state.p_pos - agent.state.p_pos) 92 | if not agent.adversary: 93 | return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos) 94 | else: 95 | #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos # randomize position of other agents in adversary network 96 | return np.concatenate([agent.state.p_vel] + entity_pos + other_pos) 97 | -------------------------------------------------------------------------------- /multiagent/scenarios/simple_reference.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiagent.core import World, Agent, Landmark 3 | from multiagent.scenario import BaseScenario 4 | 5 | class Scenario(BaseScenario): 6 | def make_world(self): 7 | world = World() 8 | # set any world properties first 9 | world.dim_c = 10 10 | world.collaborative = True # whether agents share rewards 11 | # add agents 12 | world.agents = [Agent() for i in range(2)] 13 | for i, agent in enumerate(world.agents): 14 | agent.name = 'agent %d' % i 15 | agent.collide = False 16 | # add landmarks 17 | world.landmarks = [Landmark() for i in range(3)] 18 | for i, landmark in enumerate(world.landmarks): 19 | landmark.name = 'landmark %d' % i 20 | landmark.collide = False 21 | landmark.movable = False 22 | # make initial conditions 23 | self.reset_world(world) 24 | return world 25 | 26 | def reset_world(self, world): 27 | # assign goals to agents 28 | for agent in world.agents: 29 | agent.goal_a = None 30 | agent.goal_b = None 31 | # want other agent to go to the goal landmark 32 | world.agents[0].goal_a = world.agents[1] 33 | world.agents[0].goal_b = np.random.choice(world.landmarks) 34 | world.agents[1].goal_a = world.agents[0] 35 | world.agents[1].goal_b = np.random.choice(world.landmarks) 36 | # random properties for agents 37 | for i, agent in enumerate(world.agents): 38 | agent.color = np.array([0.25,0.25,0.25]) 39 | # random properties for landmarks 40 | world.landmarks[0].color = np.array([0.75,0.25,0.25]) 41 | world.landmarks[1].color = np.array([0.25,0.75,0.25]) 42 | world.landmarks[2].color = np.array([0.25,0.25,0.75]) 43 | # special colors for goals 44 | world.agents[0].goal_a.color = world.agents[0].goal_b.color 45 | world.agents[1].goal_a.color = world.agents[1].goal_b.color 46 | # set random initial states 47 | for agent in world.agents: 48 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p) 49 | agent.state.p_vel = np.zeros(world.dim_p) 50 | agent.state.c = np.zeros(world.dim_c) 51 | for i, landmark in enumerate(world.landmarks): 52 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p) 53 | landmark.state.p_vel = np.zeros(world.dim_p) 54 | 55 | def reward(self, agent, world): 56 | if agent.goal_a is None or agent.goal_b is None: 57 | return 0.0 58 | dist2 = np.sum(np.square(agent.goal_a.state.p_pos - agent.goal_b.state.p_pos)) 59 | return -dist2 60 | 61 | def observation(self, agent, world): 62 | # goal color 63 | goal_color = [np.zeros(world.dim_color), np.zeros(world.dim_color)] 64 | if agent.goal_b is not None: 65 | goal_color[1] = agent.goal_b.color 66 | 67 | # get positions of all entities in this agent's reference frame 68 | entity_pos = [] 69 | for entity in world.landmarks: 70 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 71 | # entity colors 72 | entity_color = [] 73 | for entity in world.landmarks: 74 | entity_color.append(entity.color) 75 | # communication of all other agents 76 | comm = [] 77 | for other in world.agents: 78 | if other is agent: continue 79 | comm.append(other.state.c) 80 | return np.concatenate([agent.state.p_vel] + entity_pos + [goal_color[1]] + comm) 81 | -------------------------------------------------------------------------------- /multiagent/scenarios/simple_speaker_listener.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiagent.core import World, Agent, Landmark 3 | from multiagent.scenario import BaseScenario 4 | 5 | class Scenario(BaseScenario): 6 | def make_world(self): 7 | world = World() 8 | # set any world properties first 9 | world.dim_c = 3 10 | num_landmarks = 3 11 | world.collaborative = True 12 | # add agents 13 | world.agents = [Agent() for i in range(2)] 14 | for i, agent in enumerate(world.agents): 15 | agent.name = 'agent %d' % i 16 | agent.collide = False 17 | agent.size = 0.075 18 | # speaker 19 | world.agents[0].movable = False 20 | # listener 21 | world.agents[1].silent = True 22 | # add landmarks 23 | world.landmarks = [Landmark() for i in range(num_landmarks)] 24 | for i, landmark in enumerate(world.landmarks): 25 | landmark.name = 'landmark %d' % i 26 | landmark.collide = False 27 | landmark.movable = False 28 | landmark.size = 0.04 29 | # make initial conditions 30 | self.reset_world(world) 31 | return world 32 | 33 | def reset_world(self, world): 34 | # assign goals to agents 35 | for agent in world.agents: 36 | agent.goal_a = None 37 | agent.goal_b = None 38 | # want listener to go to the goal landmark 39 | world.agents[0].goal_a = world.agents[1] 40 | world.agents[0].goal_b = np.random.choice(world.landmarks) 41 | # random properties for agents 42 | for i, agent in enumerate(world.agents): 43 | agent.color = np.array([0.25,0.25,0.25]) 44 | # random properties for landmarks 45 | world.landmarks[0].color = np.array([0.65,0.15,0.15]) 46 | world.landmarks[1].color = np.array([0.15,0.65,0.15]) 47 | world.landmarks[2].color = np.array([0.15,0.15,0.65]) 48 | # special colors for goals 49 | world.agents[0].goal_a.color = world.agents[0].goal_b.color + np.array([0.45, 0.45, 0.45]) 50 | # set random initial states 51 | for agent in world.agents: 52 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p) 53 | agent.state.p_vel = np.zeros(world.dim_p) 54 | agent.state.c = np.zeros(world.dim_c) 55 | for i, landmark in enumerate(world.landmarks): 56 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p) 57 | landmark.state.p_vel = np.zeros(world.dim_p) 58 | 59 | def benchmark_data(self, agent, world): 60 | # returns data for benchmarking purposes 61 | return self.reward(agent, reward) 62 | 63 | def reward(self, agent, world): 64 | # squared distance from listener to landmark 65 | a = world.agents[0] 66 | dist2 = np.sum(np.square(a.goal_a.state.p_pos - a.goal_b.state.p_pos)) 67 | return -dist2 68 | 69 | def observation(self, agent, world): 70 | # goal color 71 | goal_color = np.zeros(world.dim_color) 72 | if agent.goal_b is not None: 73 | goal_color = agent.goal_b.color 74 | 75 | # get positions of all entities in this agent's reference frame 76 | entity_pos = [] 77 | for entity in world.landmarks: 78 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 79 | 80 | # communication of all other agents 81 | comm = [] 82 | for other in world.agents: 83 | if other is agent or (other.state.c is None): continue 84 | comm.append(other.state.c) 85 | 86 | # speaker 87 | if not agent.movable: 88 | return np.concatenate([goal_color]) 89 | # listener 90 | if agent.silent: 91 | return np.concatenate([agent.state.p_vel] + entity_pos + comm) 92 | 93 | -------------------------------------------------------------------------------- /multiagent/scenarios/simple_spread.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiagent.core import World, Agent, Landmark 3 | from multiagent.scenario import BaseScenario 4 | 5 | 6 | class Scenario(BaseScenario): 7 | def make_world(self): 8 | world = World() 9 | # set any world properties first 10 | world.dim_c = 2 11 | num_agents = 2 12 | num_landmarks = 3 13 | world.collaborative = True 14 | # add agents 15 | world.agents = [Agent() for i in range(num_agents)] 16 | for i, agent in enumerate(world.agents): 17 | agent.name = 'agent %d' % i 18 | agent.collide = True 19 | agent.silent = True 20 | agent.size = 0.15 21 | # add landmarks 22 | world.landmarks = [Landmark() for i in range(num_landmarks)] 23 | for i, landmark in enumerate(world.landmarks): 24 | landmark.name = 'landmark %d' % i 25 | landmark.collide = False 26 | landmark.movable = False 27 | # make initial conditions 28 | self.reset_world(world) 29 | return world 30 | 31 | def reset_world(self, world): 32 | # random properties for agents 33 | for i, agent in enumerate(world.agents): 34 | agent.color = np.array([0.35, 0.35, 0.85]) 35 | # random properties for landmarks 36 | for i, landmark in enumerate(world.landmarks): 37 | landmark.color = np.array([0.25, 0.25, 0.25]) 38 | # set random initial states 39 | for agent in world.agents: 40 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 41 | agent.state.p_vel = np.zeros(world.dim_p) 42 | agent.state.c = np.zeros(world.dim_c) 43 | for i, landmark in enumerate(world.landmarks): 44 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 45 | landmark.state.p_vel = np.zeros(world.dim_p) 46 | 47 | def benchmark_data(self, agent, world): 48 | rew = 0 49 | collisions = 0 50 | occupied_landmarks = 0 51 | min_dists = 0 52 | for l in world.landmarks: 53 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents] 54 | min_dists += min(dists) 55 | rew -= min(dists) 56 | if min(dists) < 0.1: 57 | occupied_landmarks += 1 58 | if agent.collide: 59 | for a in world.agents: 60 | if self.is_collision(a, agent): 61 | rew -= 1 62 | collisions += 1 63 | return (rew, collisions, min_dists, occupied_landmarks) 64 | 65 | 66 | def is_collision(self, agent1, agent2): 67 | delta_pos = agent1.state.p_pos - agent2.state.p_pos 68 | dist = np.sqrt(np.sum(np.square(delta_pos))) 69 | dist_min = agent1.size + agent2.size 70 | return True if dist < dist_min else False 71 | 72 | def reward(self, agent, world): 73 | # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions 74 | rew = 0 75 | for l in world.landmarks: 76 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents] 77 | rew -= min(dists) 78 | if agent.collide: 79 | for a in world.agents: 80 | if self.is_collision(a, agent): 81 | rew -= 1 82 | return rew 83 | 84 | def observation(self, agent, world): 85 | # get positions of all entities in this agent's reference frame 86 | entity_pos = [] 87 | for entity in world.landmarks: # world.entities: 88 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 89 | # entity colors 90 | entity_color = [] 91 | for entity in world.landmarks: # world.entities: 92 | entity_color.append(entity.color) 93 | # communication of all other agents 94 | comm = [] 95 | other_pos = [] 96 | for other in world.agents: 97 | if other is agent: continue 98 | comm.append(other.state.c) 99 | other_pos.append(other.state.p_pos - agent.state.p_pos) 100 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm) 101 | -------------------------------------------------------------------------------- /multiagent/scenarios/simple_tag.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiagent.core import World, Agent, Landmark 3 | from multiagent.scenario import BaseScenario 4 | 5 | 6 | class Scenario(BaseScenario): 7 | def make_world(self): 8 | world = World() 9 | # set any world properties first 10 | world.dim_c = 2 11 | num_good_agents = 1 12 | num_adversaries = 3 13 | num_agents = num_adversaries + num_good_agents 14 | num_landmarks = 2 15 | # add agents 16 | world.agents = [Agent() for i in range(num_agents)] 17 | for i, agent in enumerate(world.agents): 18 | agent.name = 'agent %d' % i 19 | agent.collide = True 20 | agent.silent = True 21 | agent.adversary = True if i < num_adversaries else False 22 | agent.size = 0.075 if agent.adversary else 0.05 23 | agent.accel = 3.0 if agent.adversary else 4.0 24 | #agent.accel = 20.0 if agent.adversary else 25.0 25 | agent.max_speed = 1.0 if agent.adversary else 1.3 26 | # add landmarks 27 | world.landmarks = [Landmark() for i in range(num_landmarks)] 28 | for i, landmark in enumerate(world.landmarks): 29 | landmark.name = 'landmark %d' % i 30 | landmark.collide = True 31 | landmark.movable = False 32 | landmark.size = 0.2 33 | landmark.boundary = False 34 | # make initial conditions 35 | self.reset_world(world) 36 | return world 37 | 38 | 39 | def reset_world(self, world): 40 | # random properties for agents 41 | for i, agent in enumerate(world.agents): 42 | agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35]) 43 | # random properties for landmarks 44 | for i, landmark in enumerate(world.landmarks): 45 | landmark.color = np.array([0.25, 0.25, 0.25]) 46 | # set random initial states 47 | for agent in world.agents: 48 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 49 | agent.state.p_vel = np.zeros(world.dim_p) 50 | agent.state.c = np.zeros(world.dim_c) 51 | for i, landmark in enumerate(world.landmarks): 52 | if not landmark.boundary: 53 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p) 54 | landmark.state.p_vel = np.zeros(world.dim_p) 55 | 56 | 57 | def benchmark_data(self, agent, world): 58 | # returns data for benchmarking purposes 59 | if agent.adversary: 60 | collisions = 0 61 | for a in self.good_agents(world): 62 | if self.is_collision(a, agent): 63 | collisions += 1 64 | return collisions 65 | else: 66 | return 0 67 | 68 | 69 | def is_collision(self, agent1, agent2): 70 | delta_pos = agent1.state.p_pos - agent2.state.p_pos 71 | dist = np.sqrt(np.sum(np.square(delta_pos))) 72 | dist_min = agent1.size + agent2.size 73 | return True if dist < dist_min else False 74 | 75 | # return all agents that are not adversaries 76 | def good_agents(self, world): 77 | return [agent for agent in world.agents if not agent.adversary] 78 | 79 | # return all adversarial agents 80 | def adversaries(self, world): 81 | return [agent for agent in world.agents if agent.adversary] 82 | 83 | 84 | def reward(self, agent, world): 85 | # Agents are rewarded based on minimum agent distance to each landmark 86 | main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) 87 | return main_reward 88 | 89 | def agent_reward(self, agent, world): 90 | # Agents are negatively rewarded if caught by adversaries 91 | rew = 0 92 | shape = False 93 | adversaries = self.adversaries(world) 94 | if shape: # reward can optionally be shaped (increased reward for increased distance from adversary) 95 | for adv in adversaries: 96 | rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos))) 97 | if agent.collide: 98 | for a in adversaries: 99 | if self.is_collision(a, agent): 100 | rew -= 10 101 | 102 | # agents are penalized for exiting the screen, so that they can be caught by the adversaries 103 | def bound(x): 104 | if x < 0.9: 105 | return 0 106 | if x < 1.0: 107 | return (x - 0.9) * 10 108 | return min(np.exp(2 * x - 2), 10) 109 | for p in range(world.dim_p): 110 | x = abs(agent.state.p_pos[p]) 111 | rew -= bound(x) 112 | 113 | return rew 114 | 115 | def adversary_reward(self, agent, world): 116 | # Adversaries are rewarded for collisions with agents 117 | rew = 0 118 | shape = False 119 | agents = self.good_agents(world) 120 | adversaries = self.adversaries(world) 121 | if shape: # reward can optionally be shaped (decreased reward for increased distance from agents) 122 | for adv in adversaries: 123 | rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents]) 124 | if agent.collide: 125 | for ag in agents: 126 | for adv in adversaries: 127 | if self.is_collision(ag, adv): 128 | rew += 10 129 | return rew 130 | 131 | def observation(self, agent, world): 132 | # get positions of all entities in this agent's reference frame 133 | entity_pos = [] 134 | for entity in world.landmarks: 135 | if not entity.boundary: 136 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 137 | # communication of all other agents 138 | comm = [] 139 | other_pos = [] 140 | other_vel = [] 141 | for other in world.agents: 142 | if other is agent: continue 143 | comm.append(other.state.c) 144 | other_pos.append(other.state.p_pos - agent.state.p_pos) 145 | if not other.adversary: 146 | other_vel.append(other.state.p_vel) 147 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel) 148 | -------------------------------------------------------------------------------- /multiagent/scenarios/simple_world_comm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiagent.core import World, Agent, Landmark 3 | from multiagent.scenario import BaseScenario 4 | 5 | 6 | class Scenario(BaseScenario): 7 | def make_world(self): 8 | world = World() 9 | # set any world properties first 10 | world.dim_c = 4 11 | #world.damping = 1 12 | num_good_agents = 2 13 | num_adversaries = 4 14 | num_agents = num_adversaries + num_good_agents 15 | num_landmarks = 1 16 | num_food = 2 17 | num_forests = 2 18 | # add agents 19 | world.agents = [Agent() for i in range(num_agents)] 20 | for i, agent in enumerate(world.agents): 21 | agent.name = 'agent %d' % i 22 | agent.collide = True 23 | agent.leader = True if i == 0 else False 24 | agent.silent = True if i > 0 else False 25 | agent.adversary = True if i < num_adversaries else False 26 | agent.size = 0.075 if agent.adversary else 0.045 27 | agent.accel = 3.0 if agent.adversary else 4.0 28 | #agent.accel = 20.0 if agent.adversary else 25.0 29 | agent.max_speed = 1.0 if agent.adversary else 1.3 30 | # add landmarks 31 | world.landmarks = [Landmark() for i in range(num_landmarks)] 32 | for i, landmark in enumerate(world.landmarks): 33 | landmark.name = 'landmark %d' % i 34 | landmark.collide = True 35 | landmark.movable = False 36 | landmark.size = 0.2 37 | landmark.boundary = False 38 | world.food = [Landmark() for i in range(num_food)] 39 | for i, landmark in enumerate(world.food): 40 | landmark.name = 'food %d' % i 41 | landmark.collide = False 42 | landmark.movable = False 43 | landmark.size = 0.03 44 | landmark.boundary = False 45 | world.forests = [Landmark() for i in range(num_forests)] 46 | for i, landmark in enumerate(world.forests): 47 | landmark.name = 'forest %d' % i 48 | landmark.collide = False 49 | landmark.movable = False 50 | landmark.size = 0.3 51 | landmark.boundary = False 52 | world.landmarks += world.food 53 | world.landmarks += world.forests 54 | #world.landmarks += self.set_boundaries(world) # world boundaries now penalized with negative reward 55 | # make initial conditions 56 | self.reset_world(world) 57 | return world 58 | 59 | def set_boundaries(self, world): 60 | boundary_list = [] 61 | landmark_size = 1 62 | edge = 1 + landmark_size 63 | num_landmarks = int(edge * 2 / landmark_size) 64 | for x_pos in [-edge, edge]: 65 | for i in range(num_landmarks): 66 | l = Landmark() 67 | l.state.p_pos = np.array([x_pos, -1 + i * landmark_size]) 68 | boundary_list.append(l) 69 | 70 | for y_pos in [-edge, edge]: 71 | for i in range(num_landmarks): 72 | l = Landmark() 73 | l.state.p_pos = np.array([-1 + i * landmark_size, y_pos]) 74 | boundary_list.append(l) 75 | 76 | for i, l in enumerate(boundary_list): 77 | l.name = 'boundary %d' % i 78 | l.collide = True 79 | l.movable = False 80 | l.boundary = True 81 | l.color = np.array([0.75, 0.75, 0.75]) 82 | l.size = landmark_size 83 | l.state.p_vel = np.zeros(world.dim_p) 84 | 85 | return boundary_list 86 | 87 | 88 | def reset_world(self, world): 89 | # random properties for agents 90 | for i, agent in enumerate(world.agents): 91 | agent.color = np.array([0.45, 0.95, 0.45]) if not agent.adversary else np.array([0.95, 0.45, 0.45]) 92 | agent.color -= np.array([0.3, 0.3, 0.3]) if agent.leader else np.array([0, 0, 0]) 93 | # random properties for landmarks 94 | for i, landmark in enumerate(world.landmarks): 95 | landmark.color = np.array([0.25, 0.25, 0.25]) 96 | for i, landmark in enumerate(world.food): 97 | landmark.color = np.array([0.15, 0.15, 0.65]) 98 | for i, landmark in enumerate(world.forests): 99 | landmark.color = np.array([0.6, 0.9, 0.6]) 100 | # set random initial states 101 | for agent in world.agents: 102 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 103 | agent.state.p_vel = np.zeros(world.dim_p) 104 | agent.state.c = np.zeros(world.dim_c) 105 | for i, landmark in enumerate(world.landmarks): 106 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p) 107 | landmark.state.p_vel = np.zeros(world.dim_p) 108 | for i, landmark in enumerate(world.food): 109 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p) 110 | landmark.state.p_vel = np.zeros(world.dim_p) 111 | for i, landmark in enumerate(world.forests): 112 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p) 113 | landmark.state.p_vel = np.zeros(world.dim_p) 114 | 115 | def benchmark_data(self, agent, world): 116 | if agent.adversary: 117 | collisions = 0 118 | for a in self.good_agents(world): 119 | if self.is_collision(a, agent): 120 | collisions += 1 121 | return collisions 122 | else: 123 | return 0 124 | 125 | 126 | def is_collision(self, agent1, agent2): 127 | delta_pos = agent1.state.p_pos - agent2.state.p_pos 128 | dist = np.sqrt(np.sum(np.square(delta_pos))) 129 | dist_min = agent1.size + agent2.size 130 | return True if dist < dist_min else False 131 | 132 | 133 | # return all agents that are not adversaries 134 | def good_agents(self, world): 135 | return [agent for agent in world.agents if not agent.adversary] 136 | 137 | # return all adversarial agents 138 | def adversaries(self, world): 139 | return [agent for agent in world.agents if agent.adversary] 140 | 141 | 142 | def reward(self, agent, world): 143 | # Agents are rewarded based on minimum agent distance to each landmark 144 | #boundary_reward = -10 if self.outside_boundary(agent) else 0 145 | main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) 146 | return main_reward 147 | 148 | def outside_boundary(self, agent): 149 | if agent.state.p_pos[0] > 1 or agent.state.p_pos[0] < -1 or agent.state.p_pos[1] > 1 or agent.state.p_pos[1] < -1: 150 | return True 151 | else: 152 | return False 153 | 154 | 155 | def agent_reward(self, agent, world): 156 | # Agents are rewarded based on minimum agent distance to each landmark 157 | rew = 0 158 | shape = False 159 | adversaries = self.adversaries(world) 160 | if shape: 161 | for adv in adversaries: 162 | rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos))) 163 | if agent.collide: 164 | for a in adversaries: 165 | if self.is_collision(a, agent): 166 | rew -= 5 167 | def bound(x): 168 | if x < 0.9: 169 | return 0 170 | if x < 1.0: 171 | return (x - 0.9) * 10 172 | return min(np.exp(2 * x - 2), 10) # 1 + (x - 1) * (x - 1) 173 | 174 | for p in range(world.dim_p): 175 | x = abs(agent.state.p_pos[p]) 176 | rew -= 2 * bound(x) 177 | 178 | for food in world.food: 179 | if self.is_collision(agent, food): 180 | rew += 2 181 | rew += 0.05 * min([np.sqrt(np.sum(np.square(food.state.p_pos - agent.state.p_pos))) for food in world.food]) 182 | 183 | return rew 184 | 185 | def adversary_reward(self, agent, world): 186 | # Agents are rewarded based on minimum agent distance to each landmark 187 | rew = 0 188 | shape = True 189 | agents = self.good_agents(world) 190 | adversaries = self.adversaries(world) 191 | if shape: 192 | rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in agents]) 193 | if agent.collide: 194 | for ag in agents: 195 | for adv in adversaries: 196 | if self.is_collision(ag, adv): 197 | rew += 5 198 | return rew 199 | 200 | 201 | def observation2(self, agent, world): 202 | # get positions of all entities in this agent's reference frame 203 | entity_pos = [] 204 | for entity in world.landmarks: 205 | if not entity.boundary: 206 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 207 | 208 | food_pos = [] 209 | for entity in world.food: 210 | if not entity.boundary: 211 | food_pos.append(entity.state.p_pos - agent.state.p_pos) 212 | # communication of all other agents 213 | comm = [] 214 | other_pos = [] 215 | other_vel = [] 216 | for other in world.agents: 217 | if other is agent: continue 218 | comm.append(other.state.c) 219 | other_pos.append(other.state.p_pos - agent.state.p_pos) 220 | if not other.adversary: 221 | other_vel.append(other.state.p_vel) 222 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel) 223 | 224 | def observation(self, agent, world): 225 | # get positions of all entities in this agent's reference frame 226 | entity_pos = [] 227 | for entity in world.landmarks: 228 | if not entity.boundary: 229 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 230 | 231 | in_forest = [np.array([-1]), np.array([-1])] 232 | inf1 = False 233 | inf2 = False 234 | if self.is_collision(agent, world.forests[0]): 235 | in_forest[0] = np.array([1]) 236 | inf1= True 237 | if self.is_collision(agent, world.forests[1]): 238 | in_forest[1] = np.array([1]) 239 | inf2 = True 240 | 241 | food_pos = [] 242 | for entity in world.food: 243 | if not entity.boundary: 244 | food_pos.append(entity.state.p_pos - agent.state.p_pos) 245 | # communication of all other agents 246 | comm = [] 247 | other_pos = [] 248 | other_vel = [] 249 | for other in world.agents: 250 | if other is agent: continue 251 | comm.append(other.state.c) 252 | oth_f1 = self.is_collision(other, world.forests[0]) 253 | oth_f2 = self.is_collision(other, world.forests[1]) 254 | if (inf1 and oth_f1) or (inf2 and oth_f2) or (not inf1 and not oth_f1 and not inf2 and not oth_f2) or agent.leader: #without forest vis 255 | other_pos.append(other.state.p_pos - agent.state.p_pos) 256 | if not other.adversary: 257 | other_vel.append(other.state.p_vel) 258 | else: 259 | other_pos.append([0, 0]) 260 | if not other.adversary: 261 | other_vel.append([0, 0]) 262 | 263 | # to tell the pred when the prey are in the forest 264 | prey_forest = [] 265 | ga = self.good_agents(world) 266 | for a in ga: 267 | if any([self.is_collision(a, f) for f in world.forests]): 268 | prey_forest.append(np.array([1])) 269 | else: 270 | prey_forest.append(np.array([-1])) 271 | # to tell leader when pred are in forest 272 | prey_forest_lead = [] 273 | for f in world.forests: 274 | if any([self.is_collision(a, f) for a in ga]): 275 | prey_forest_lead.append(np.array([1])) 276 | else: 277 | prey_forest_lead.append(np.array([-1])) 278 | 279 | comm = [world.agents[0].state.c] 280 | 281 | if agent.adversary and not agent.leader: 282 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm) 283 | if agent.leader: 284 | return np.concatenate( 285 | [agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm) 286 | else: 287 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + in_forest + other_vel) 288 | 289 | 290 | -------------------------------------------------------------------------------- /obs_img.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import cv2 4 | 5 | def set_value_in_img(img, pos_x, pos_y, value): 6 | """ 7 | draw 3*3 rectangle in img 8 | :param img: 9 | :param pos_x: 10 | :param pos_y: 11 | :param value: 12 | :return: 13 | """ 14 | img_obs_size_x = 300 15 | img_obs_size_y = 300 16 | 17 | img[pos_x, pos_y] = value 18 | 19 | 20 | if __name__ == '__main__': 21 | img_obs_size_x = 300 22 | img_obs_size_y = 300 23 | # 个体img:所有己方单位位置 24 | obs_img = np.full((img_obs_size_x, img_obs_size_y, 3), 255, dtype=np.int32) 25 | # print(obs_img) 26 | a = plt.imshow(obs_img) 27 | # plt.show(a) 28 | 29 | for i in range(300): 30 | set_value_in_img(obs_img, 150, 150, [48, 158, 67]) 31 | # print(obs_img) 32 | b = plt.imshow(obs_img, filternorm=1, filterrad=4, interpolation='lanczos') 33 | plt.show(b) 34 | print(obs_img[150][150]) -------------------------------------------------------------------------------- /plot_reward.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | 5 | f = open('./learning_curves/exp_torch_4/rewards.pkl','rb') 6 | f1 = open('./learning_curves/exp_torch_5/rewards.pkl','rb') 7 | f2 = open('./learning_curves/exp_torch_8/rewards.pkl','rb') 8 | 9 | rewards = pickle.load(f) 10 | rewards1 = pickle.load(f1) 11 | rewards2 = pickle.load(f2) 12 | plt.plot(rewards) 13 | plt.plot(rewards1) 14 | plt.plot(rewards2) 15 | plt.grid() 16 | plt.xlabel('epoch') 17 | plt.ylabel('reward') 18 | plt.title('Rewards') 19 | plt.legend(['4', '5', '6']) 20 | plt.show() 21 | -------------------------------------------------------------------------------- /replay_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | class ReplayBuffer(object): 5 | def __init__(self, size): 6 | """Create Prioritized Replay buffer. 7 | 8 | Parameters 9 | ---------- 10 | size: int 11 | Max number of transitions to store in the buffer. When the buffer 12 | overflows the old memories are dropped. 13 | """ 14 | self._storage = [] 15 | self._maxsize = int(size) 16 | self._next_idx = 0 17 | 18 | def __len__(self): 19 | return len(self._storage) 20 | 21 | def clear(self): 22 | self._storage = [] 23 | self._next_idx = 0 24 | 25 | def add(self, obs_t, action, reward, obs_tp1, done): 26 | data = (obs_t, action, reward, obs_tp1, done) 27 | 28 | if self._next_idx >= len(self._storage): 29 | self._storage.append(data) 30 | else: 31 | self._storage[self._next_idx] = data 32 | self._next_idx = (self._next_idx + 1) % self._maxsize 33 | 34 | def _encode_sample(self, idxes, agent_idx): 35 | obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], [] 36 | for i in idxes: 37 | data = self._storage[i] 38 | obs_t, action, reward, obs_tp1, done = data 39 | obses_t.append(np.concatenate(obs_t[:])) 40 | actions.append(action) 41 | rewards.append(reward[agent_idx]) 42 | obses_tp1.append(np.concatenate(obs_tp1[:])) 43 | dones.append(done[agent_idx]) 44 | return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones) 45 | 46 | def make_index(self, batch_size): 47 | return [random.randint(0, len(self._storage) - 1) for _ in range(batch_size)] 48 | 49 | def make_latest_index(self, batch_size): 50 | idx = [(self._next_idx - 1 - i) % self._maxsize for i in range(batch_size)] 51 | np.random.shuffle(idx) 52 | return idx 53 | 54 | def sample_index(self, idxes): 55 | return self._encode_sample(idxes) 56 | 57 | def sample(self, batch_size, agent_idx): 58 | """Sample a batch of experiences. 59 | 60 | Parameters 61 | ---------- 62 | batch_size: int 63 | How many transitions to sample. 64 | 65 | Returns 66 | ------- 67 | obs_batch: np.array 68 | batch of observations 69 | act_batch: np.array 70 | batch of actions executed given obs_batch 71 | rew_batch: np.array 72 | rewards received as results of executing act_batch 73 | next_obs_batch: np.array 74 | next set of observations seen after executing act_batch 75 | done_mask: np.array 76 | done_mask[i] = 1 if executing act_batch[i] resulted in 77 | the end of an episode and 0 otherwise. 78 | """ 79 | if batch_size > 0: 80 | idxes = self.make_index(batch_size) 81 | else: 82 | idxes = range(0, len(self._storage)) 83 | return self._encode_sample(idxes, agent_idx) 84 | 85 | def collect(self): 86 | return self.sample(-1) 87 | -------------------------------------------------------------------------------- /scenario-options/band-loc/core_uav.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # physical/external base state of all entites 4 | class EntityState(object): 5 | def __init__(self): 6 | # physical position 7 | self.p_pos = None 8 | # physical velocity 9 | self.p_vel = None 10 | self.hold_action = 0 11 | 12 | # state of agents (including communication and internal/mental state) 13 | class AgentState(EntityState): 14 | def __init__(self): 15 | super(AgentState, self).__init__() 16 | # communication allocation 17 | self.c = None 18 | self.UAV_Rate = None 19 | self.assign = None 20 | self.SNRforUser = None 21 | 22 | # action of the agent 23 | class Action(object): 24 | def __init__(self): 25 | # physical action 26 | self.u = None 27 | # communication action 28 | self.c = None 29 | 30 | # properties and state of physical world entity 31 | class Entity(object): 32 | def __init__(self): 33 | # name 34 | self.name = '' 35 | # properties: 36 | self.size = 0.0250 37 | # entity can move / be pushed 38 | self.movable = False 39 | # entity collides with others 40 | self.collide = True 41 | # material density (affects mass) 42 | self.density = 25.0 43 | # color 44 | self.color = None 45 | # max speed and accel 46 | self.max_speed = 10 47 | self.accel = None 48 | # state 49 | self.state = EntityState() 50 | # mass 51 | self.initial_mass = 1 52 | 53 | @property 54 | def mass(self): 55 | return self.initial_mass 56 | 57 | # properties of landmark entities 58 | class Landmark(Entity): 59 | def __init__(self): 60 | super(Landmark, self).__init__() 61 | self.assign_index = None 62 | self.switch_cout = 0 63 | 64 | # properties of agent entities 65 | class Agent(Entity): 66 | def __init__(self): 67 | super(Agent, self).__init__() 68 | # agents are movable by default 69 | self.movable = True 70 | # cannot send communication signals 71 | self.silent = False 72 | # cannot observe the world 73 | self.blind = False 74 | # physical motor noise amount 75 | self.u_noise = None 76 | # communication noise amount 77 | self.c_noise = None 78 | # control range 79 | self.u_range = 1.0 80 | # state 81 | self.state = AgentState() 82 | # action 83 | self.action = Action() 84 | # script behavior to execute 85 | self.action_callback = None 86 | 87 | # multi-agent world 88 | class World(object): 89 | def __init__(self): 90 | # list of agents and entities (can change at execution-time!) 91 | self.agents = [] 92 | self.landmarks = [] 93 | # communication channel dimensionality 94 | self.dim_c = 0 95 | # position dimensionality 96 | self.dim_p = 2 97 | # color dimensionality 98 | self.dim_color = 3 99 | # simulation timestep 100 | self.dt = 0.1 101 | # physical damping 102 | self.damping = 0.25 103 | # contact response parameters 104 | self.contact_force = 1e+2 105 | self.contact_margin = 1e-3 106 | self.collaborative = False 107 | 108 | self.pow_UAVs = 100 # 无人机发射功率 109 | self.pow_GBS = None # 地面基站发射功率 110 | self.gain_UAV = 10**(-40/10) # 单位db 无人机链路单位距离下的信道增益 111 | self.gain_GBS = 60 # 单位db 地面基站链路单位距离下的信道增益 112 | self.los_exp_UAV = 2 # 无人机链路衰减指数 113 | self.los_exp_GBS = 3 # 地面基站链路衰减指数 114 | self.max_band = 10e6 # 最大可用带宽 115 | self.noise_pdf = 10**(-167/10) # 噪声功率谱密度 dBm 116 | 117 | self.bandwidth = None # 带宽 118 | self.Rate = None 119 | self.SNR = None 120 | 121 | # return all entities in the world 122 | @property 123 | def entities(self): 124 | return self.agents + self.landmarks 125 | 126 | # return all agents controllable by external policies 127 | @property 128 | def policy_agents(self): 129 | return [agent for agent in self.agents if agent.action_callback is None] 130 | 131 | # return all agents controlled by world scripts 132 | @property 133 | def scripted_agents(self): 134 | return [agent for agent in self.agents if agent.action_callback is not None] 135 | 136 | # update state of the world 137 | def step(self): 138 | # set actions for scripted agents 139 | for agent in self.scripted_agents: 140 | agent.action = agent.action_callback(agent, self) 141 | 142 | # gather forces applied to entities 143 | p_force = [None] * len(self.entities) 144 | # apply agent physical controls 145 | p_force = self.apply_action_force(p_force) 146 | # apply environment forces 147 | # p_force = self.apply_environment_force(p_force) 148 | # integrate physical state 149 | self.integrate_state(p_force) 150 | 151 | self.update_landmark_pos() 152 | 153 | # self.update_agent_pos() 154 | 155 | # update agent state 156 | for agent in self.agents: 157 | self.update_agent_state(agent) 158 | self.update_world_state() 159 | 160 | # dict_state = [] 161 | # for i, landmark in enumerate(self.landmarks): 162 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos) 163 | # dict_state.append({'User {}'.format(i): landmark.state.p_pos}) 164 | # 165 | # for i, agent in enumerate(self.agents): 166 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos) 167 | # dict_state.append({'UAV {}'.format(i): agent.state.p_pos, 'band_asign': agent.state.c}) 168 | # with open('./state_results/states.txt','a') as f: 169 | # f.write(str(dict_state)+'\n') 170 | 171 | # gather agent action forces 172 | def apply_action_force(self, p_force): 173 | # set applied forces 174 | for i,agent in enumerate(self.agents): 175 | if agent.movable: 176 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0 177 | p_force[i] = agent.action.u + noise 178 | return p_force 179 | 180 | # gather physical forces acting on entities 181 | def apply_environment_force(self, p_force): 182 | # simple (but inefficient) collision response 183 | for a,entity_a in enumerate(self.entities): 184 | for b,entity_b in enumerate(self.entities): 185 | if(b <= a): continue 186 | [f_a, f_b] = self.get_collision_force(entity_a, entity_b) 187 | if(f_a is not None): 188 | if(p_force[a] is None): p_force[a] = 0.0 189 | p_force[a] = f_a + p_force[a] 190 | if(f_b is not None): 191 | if(p_force[b] is None): p_force[b] = 0.0 192 | p_force[b] = f_b + p_force[b] 193 | return p_force 194 | 195 | # integrate physical state 196 | def integrate_state(self, p_force): 197 | for i, agent in enumerate(self.agents): 198 | if not agent.movable: continue 199 | # noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0 200 | # agent.state.p_vel = (agent.action.u + noise) * (1 - self.damping) 201 | agent.state.p_vel = agent.state.p_vel * (1 - self.damping) 202 | if (p_force[i] is not None): 203 | agent.state.p_vel += (p_force[i] / agent.mass) * self.dt 204 | if agent.max_speed is not None: 205 | speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1])) 206 | if speed > agent.max_speed: 207 | agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) + 208 | np.square(agent.state.p_vel[1])) * agent.max_speed 209 | agent.state.p_pos += agent.state.p_vel * self.dt 210 | 211 | def update_landmark_pos(self): 212 | # for landmark in self.landmarks: 213 | # if not landmark.movable: continue 214 | # if landmark.state.hold_action > 0: 215 | # landmark.state.hold_action -= 1 216 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 217 | # else: 218 | # landmark.state.hold_action = np.random.randint(0,10) 219 | # 220 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 221 | # while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1): 222 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 223 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 224 | # # print(temp_pos) 225 | # landmark.state.p_pos = temp_pos 226 | 227 | for landmark in self.landmarks: 228 | if not landmark.movable: continue 229 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 230 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 231 | while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1): 232 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 233 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 234 | # print(temp_pos) 235 | landmark.state.p_pos = temp_pos 236 | 237 | 238 | def update_agent_state(self, agent): 239 | # set communication state (directly f or now) 240 | if agent.silent: 241 | agent.state.c = np.zeros(self.dim_c) 242 | else: 243 | # noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0 244 | # agent.state.c = agent.action.c + noise 245 | agent.state.c = agent.action.c 246 | 247 | # def update_agent_pos(self): 248 | # # set communication state (directly for now) 249 | # for i, agent in enumerate(self.agents): 250 | # if not agent.movable: continue 251 | # noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0 252 | # agent.state.p_vel = (agent.action.u + noise) * (1 - self.damping) 253 | # print(agent.state.p_vel) 254 | # if agent.max_speed is not None: 255 | # speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1])) 256 | # if speed > agent.max_speed: 257 | # print('高于最大速度') 258 | # agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) + 259 | # np.square(agent.state.p_vel[1])) * agent.max_speed 260 | # agent.state.p_pos += agent.state.p_vel * self.dt 261 | 262 | def update_world_state(self): 263 | # 根据基站提供的方案,用户进行选择 264 | 265 | bw = np.zeros((len(self.agents), self.dim_c)) 266 | disVec = np.zeros((len(self.agents), self.dim_c)) 267 | height = np.ones((len(self.agents), self.dim_c)) * 200 # 无人机的飞行高度 268 | for index, agent in enumerate(self.agents): 269 | bw[index] = np.array(list(self.max_band * i for i in agent.action.c)) 270 | # 计算无人机到各个用户的欧式距离 271 | for i, landmark in enumerate(self.landmarks): 272 | disVec[index][i] = np.linalg.norm(agent.state.p_pos - landmark.state.p_pos, 2) * 1000 273 | # 距离超过定值时,无法进行通信,将带宽分配为0 274 | # bw[index][i] = 0.1 if disVec[index][i] > 0.025*8 else bw[index][i] 275 | # 接收到的信号功率 276 | signal_pow = self.pow_UAVs * self.gain_UAV / (disVec ** self.los_exp_UAV + height ** 2) 277 | 278 | # 接收端噪声功率 279 | noise_pow = bw * self.noise_pdf + 1e-9 280 | # 接收端信噪比 281 | SNR = signal_pow / noise_pow 282 | 283 | SNR[SNR < np.sqrt(10)] = 0 #SNR小于5dB时,无法通信,将其强制置零 284 | 285 | 286 | # for i in range(self.dim_c): 287 | # for j in range(len(self.agents)): 288 | # if 10*np.log10(SNR[j][i]) < 15: 289 | # bw[j][i] = 0 290 | 291 | # 信息传输速率 292 | Rate = bw * np.log2(np.ones((len(self.agents), self.dim_c)) + SNR) 293 | 294 | ## 保证每个用户只与一个无人机建立通信链路 295 | mask = np.zeros((len(self.agents), self.dim_c)) #被遮掉的位置为0 296 | # mask = -1 * np.ones((len(self.agents), self.dim_c)) #被遮掉的位置为负数 297 | 298 | for i, landmark in enumerate(self.landmarks): 299 | if (landmark.assign_index is not None) and (landmark.assign_index != np.argmax(Rate[:, i])): 300 | landmark.switch_cout = 1 301 | landmark.assign_index = np.argmax(Rate[:, i]) 302 | else: 303 | landmark.assign_index = np.argmax(Rate[:, i]) 304 | 305 | mask[landmark.assign_index][i] = 1 306 | 307 | 308 | self.bandwidth = mask * bw 309 | self.Rate = mask * Rate 310 | self.SNR = 10*np.log10(mask * SNR + 1e-10) 311 | 312 | for i, agent in enumerate(self.agents): 313 | agent.state.assign = self.bandwidth[i, :]/self.max_band 314 | agent.state.UAV_Rate = self.Rate[i, :] 315 | agent.state.SNRforUser = SNR[i,:] 316 | 317 | # for i, landmark in enumerate(self.landmarks): 318 | # band = self.bandwidth.transpose() 319 | # landmark.assign_index = np.nonzero(band[i]) 320 | 321 | # get collision forces for any contact between two entities 322 | def get_collision_force(self, entity_a, entity_b): 323 | if (not entity_a.collide) or (not entity_b.collide): 324 | return [None, None] # not a collider 325 | if (entity_a is entity_b): 326 | return [None, None] # don't collide against itself 327 | # compute actual distance between entities 328 | delta_pos = entity_a.state.p_pos - entity_b.state.p_pos 329 | dist = np.sqrt(np.sum(np.square(delta_pos))) 330 | # minimum allowable distance 331 | dist_min = entity_a.size + entity_b.size 332 | # softmax penetration 333 | k = self.contact_margin 334 | penetration = np.logaddexp(0, -(dist - dist_min)/k)*k 335 | force = self.contact_force * delta_pos / dist * penetration 336 | force_a = +force if entity_a.movable else None 337 | force_b = -force if entity_b.movable else None 338 | return [force_a, force_b] -------------------------------------------------------------------------------- /scenario-options/band-loc/environment_uav.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.envs.registration import EnvSpec 4 | import numpy as np 5 | from multiagent.multi_discrete import MultiDiscrete 6 | 7 | # environment for all agents in the multiagent world 8 | # currently code assumes that no agents will be created/destroyed at runtime! 9 | class MultiAgentEnv(gym.Env): 10 | metadata = { 11 | 'render.modes' : ['human', 'rgb_array'] 12 | } 13 | 14 | def __init__(self, world, reset_callback=None, reward_callback=None, 15 | observation_callback=None, info_callback=None, 16 | done_callback=None, shared_viewer=True): 17 | 18 | self.world = world 19 | self.agents = self.world.policy_agents 20 | # set required vectorized gym env property 21 | self.n = len(world.policy_agents) 22 | # scenario callbacks 23 | self.reset_callback = reset_callback 24 | self.reward_callback = reward_callback 25 | self.observation_callback = observation_callback 26 | self.info_callback = info_callback 27 | self.done_callback = done_callback 28 | # environment parameters 29 | self.discrete_action_space = True 30 | # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector 31 | self.discrete_action_input = False 32 | # if true, even the action is continuous, action will be performed discretely 33 | self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False 34 | # if true, every agent has the same reward 35 | self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False 36 | self.time = 0 37 | 38 | # configure spaces 39 | self.action_space = [] 40 | self.observation_space = [] 41 | for agent in self.agents: 42 | total_action_space = [] 43 | # physical action space 44 | if self.discrete_action_space: 45 | u_action_space = spaces.Discrete(world.dim_p * 2 + 1) 46 | else: 47 | u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32) 48 | if agent.movable: 49 | total_action_space.append(u_action_space) 50 | # communication action space 51 | if self.discrete_action_space: 52 | c_action_space = spaces.Discrete(world.dim_c) 53 | else: 54 | c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32) 55 | if not agent.silent: 56 | total_action_space.append(c_action_space) 57 | # total action space 58 | if len(total_action_space) > 1: 59 | # all action spaces are discrete, so simplify to MultiDiscrete action space 60 | if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]): 61 | act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space]) 62 | else: 63 | act_space = spaces.Tuple(total_action_space) 64 | self.action_space.append(act_space) 65 | else: 66 | self.action_space.append(total_action_space[0]) 67 | # observation space 68 | obs_dim = len(observation_callback(agent, self.world)) 69 | self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32)) 70 | # agent.action.c = np.zeros(self.world.dim_c) 71 | 72 | # rendering 73 | self.shared_viewer = shared_viewer 74 | if self.shared_viewer: 75 | self.viewers = [None] 76 | else: 77 | self.viewers = [None] * self.n 78 | self._reset_render() 79 | 80 | def step(self, action_n): 81 | obs_n = [] 82 | reward_n = [] 83 | done_n = [] 84 | info_n = {'n': []} 85 | self.agents = self.world.policy_agents 86 | # set action for each agent 87 | for i, agent in enumerate(self.agents): 88 | self._set_action(action_n[i], agent, self.action_space[i]) 89 | 90 | # advance world state 91 | self.world.step() 92 | # record observation for each agent 93 | for agent in self.agents: 94 | obs_n.append(self._get_obs(agent)) 95 | reward_n.append(self._get_reward(agent)) 96 | done_n.append(self._get_done(agent)) 97 | 98 | info_n['n'].append(self._get_info(agent)) 99 | 100 | # all agents get total reward in cooperative case 101 | reward = np.sum(reward_n) 102 | if self.shared_reward: 103 | reward_n = [reward] * self.n 104 | 105 | return obs_n, reward_n, done_n, info_n 106 | 107 | def reset(self): 108 | # reset world 109 | self.reset_callback(self.world) 110 | # reset renderer 111 | self._reset_render() 112 | # record observations for each agent 113 | obs_n = [] 114 | self.agents = self.world.policy_agents 115 | for agent in self.agents: 116 | obs_n.append(self._get_obs(agent)) 117 | return obs_n 118 | 119 | # get info used for benchmarking 120 | def _get_info(self, agent): 121 | if self.info_callback is None: 122 | return {} 123 | return self.info_callback(agent, self.world) 124 | 125 | # get observation for a particular agent 126 | def _get_obs(self, agent): 127 | if self.observation_callback is None: 128 | return np.zeros(0) 129 | return self.observation_callback(agent, self.world) 130 | 131 | # get dones for a particular agent 132 | # unused right now -- agents are allowed to go beyond the viewing screen 133 | def _get_done(self, agent): 134 | if self.done_callback is None: 135 | return False 136 | return self.done_callback(agent, self.world) 137 | 138 | # get reward for a particular agent 139 | def _get_reward(self, agent): 140 | if self.reward_callback is None: 141 | return 0.0 142 | return self.reward_callback(agent, self.world) 143 | 144 | # set env action for a particular agent 145 | def _set_action(self, action, agent, action_space, time=None): 146 | agent.action.u = np.zeros(self.world.dim_p) 147 | agent.action.c = np.zeros(self.world.dim_c) 148 | # process action 149 | if isinstance(action_space, MultiDiscrete): 150 | act = [] 151 | size = action_space.high - action_space.low + 1 152 | index = 0 153 | for s in size: 154 | act.append(action[index:(index+s)]) 155 | index += s 156 | action = act 157 | else: 158 | action = [action] 159 | 160 | 161 | if agent.movable: 162 | # physical action 163 | if self.discrete_action_input: 164 | agent.action.u = np.zeros(self.world.dim_p) 165 | # process discrete action 166 | if action[0] == 1: agent.action.u[0] = -1.0 167 | if action[0] == 2: agent.action.u[0] = +1.0 168 | if action[0] == 3: agent.action.u[1] = -1.0 169 | if action[0] == 4: agent.action.u[1] = +1.0 170 | else: 171 | if self.force_discrete_action: 172 | d = np.argmax(action[0]) 173 | action[0][:] = 0.0 174 | action[0][d] = 1.0 175 | if self.discrete_action_space: 176 | agent.action.u[0] += action[0][1] - action[0][2] 177 | agent.action.u[1] += action[0][3] - action[0][4] 178 | else: 179 | agent.action.u = action[0] 180 | sensitivity = 5 181 | if agent.accel is not None: 182 | sensitivity = agent.accel 183 | agent.action.u *= sensitivity 184 | action = action[1:] 185 | if not agent.silent: 186 | # communication action 187 | if self.discrete_action_input: 188 | agent.action.c = np.zeros(self.world.dim_c) 189 | agent.action.c[action[0]] = 1.0 190 | else: 191 | agent.action.c = action[0] 192 | action = action[1:] 193 | # make sure we used all elements of action 194 | assert len(action) == 0 195 | 196 | # reset rendering assets 197 | def _reset_render(self): 198 | self.render_geoms = None 199 | self.render_geoms_xform = None 200 | # self.render_geoms_uav = None 201 | # self.render_geoms_xform_uav = None 202 | 203 | # render environment 204 | def render(self, mode='human'): 205 | # if mode == 'human': 206 | # alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 207 | # message = '' 208 | # for agent in self.world.agents: 209 | # comm = [] 210 | # for other in self.world.agents: 211 | # if other is agent: continue 212 | # if np.all(other.state.c == 0): 213 | # word = '_' 214 | # else: 215 | # word = alphabet[np.argmax(other.state.c)] 216 | # message += (other.name + ' to ' + agent.name + ': ' + word + ' ') 217 | # print(message) 218 | 219 | for i in range(len(self.viewers)): 220 | # create viewers (if necessary) 221 | if self.viewers[i] is None: 222 | # import rendering only if we need it (and don't import for headless machines) 223 | #from gym.envs.classic_control import rendering 224 | from multiagent import rendering 225 | self.viewers[i] = rendering.Viewer(700,700) 226 | 227 | # create rendering geometry 228 | if self.render_geoms is None: 229 | # import rendering only if we need it (and don't import for headless machines) 230 | #from gym.envs.classic_control import rendering 231 | from multiagent import rendering 232 | self.render_geoms = [] 233 | self.render_geoms_xform = [] 234 | self.render_geoms_uav = [] 235 | self.render_geoms_xform_uav = [] 236 | self.render_geoms_line = [] 237 | 238 | 239 | range_color = [0.25, 0.25, 0.75] 240 | for entity in self.world.entities: 241 | geom = rendering.make_circle(entity.size) 242 | xform = rendering.Transform() 243 | 244 | if 'agent' in entity.name: 245 | geom.set_color(*entity.color, alpha=0.5) 246 | 247 | geom_uav = rendering.make_circle(entity.size*4) 248 | xform_uav = rendering.Transform() 249 | geom_uav.set_color(*range_color, alpha=0.25) 250 | geom_uav.add_attr(xform_uav) 251 | 252 | self.render_geoms_uav.append(geom_uav) 253 | self.render_geoms_xform_uav.append(xform_uav) 254 | 255 | else: 256 | geom.set_color(*entity.color) 257 | 258 | geom_line = rendering.Line(entity.state.p_pos, self.world.agents[entity.assign_index].state.p_pos) 259 | self.render_geoms_line.append(geom_line) 260 | 261 | geom.add_attr(xform) 262 | self.render_geoms.append(geom) 263 | self.render_geoms_xform.append(xform) 264 | 265 | # add geoms to viewer 266 | for viewer in self.viewers: 267 | viewer.geoms = [] 268 | for geom in self.render_geoms: 269 | viewer.add_geom(geom) 270 | for geom_uav in self.render_geoms_uav: 271 | viewer.add_geom(geom_uav) 272 | for geom_line in self.render_geoms_line: 273 | viewer.add_geom(geom_line) 274 | 275 | results = [] 276 | for i in range(len(self.viewers)): 277 | from multiagent import rendering 278 | # update bounds to center around agent 279 | cam_range = 1 280 | if self.shared_viewer: 281 | pos = np.zeros(self.world.dim_p) 282 | else: 283 | pos = self.agents[i].state.p_pos 284 | self.viewers[i].set_bounds(pos[0]-cam_range, pos[0]+cam_range, pos[1]-cam_range, pos[1]+cam_range) 285 | # update geometry positions 286 | for e, entity in enumerate(self.world.entities): 287 | self.render_geoms_xform[e].set_translation(*entity.state.p_pos) 288 | for a, agent in enumerate(self.world.policy_agents): 289 | self.render_geoms_xform_uav[a].set_translation(*agent.state.p_pos) 290 | for l, landmark in enumerate(self.world.landmarks): 291 | self.render_geoms_line[l].start = landmark.state.p_pos 292 | self.render_geoms_line[l].end = self.world.agents[landmark.assign_index].state.p_pos 293 | 294 | # render to display or array 295 | results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array')) 296 | 297 | return results 298 | 299 | # create receptor field locations in local coordinate frame 300 | def _make_receptor_locations(self, agent): 301 | receptor_type = 'polar' 302 | range_min = 0.05 * 2.0 303 | range_max = 1.00 304 | dx = [] 305 | # circular receptive field 306 | if receptor_type == 'polar': 307 | for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False): 308 | for distance in np.linspace(range_min, range_max, 3): 309 | dx.append(distance * np.array([np.cos(angle), np.sin(angle)])) 310 | # add origin 311 | dx.append(np.array([0.0, 0.0])) 312 | # grid receptive field 313 | if receptor_type == 'grid': 314 | for x in np.linspace(-range_max, +range_max, 5): 315 | for y in np.linspace(-range_max, +range_max, 5): 316 | dx.append(np.array([x,y])) 317 | return dx -------------------------------------------------------------------------------- /scenario-options/band-loc/simple_collaborative_comm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiagent.core_uav import World, Agent, Landmark 3 | from multiagent.scenario import BaseScenario 4 | 5 | class Scenario(BaseScenario): 6 | def make_world(self): 7 | world = World() 8 | # set any world properties first 9 | world.dim_c = 10 10 | num_uav = 3 11 | world.collaborative = True # whether agents share rewards 12 | # add agents 13 | world.agents = [Agent() for i in range(num_uav)] 14 | for i, agent in enumerate(world.agents): 15 | agent.name = 'agent %d' % i 16 | agent.collide = True 17 | # add landmarks 18 | world.landmarks = [Landmark() for i in range(world.dim_c)] 19 | for i, landmark in enumerate(world.landmarks): 20 | landmark.name = 'landmark %d' % i 21 | landmark.collide = False 22 | landmark.movable = True 23 | 24 | # make initial conditions 25 | self.reset_world(world) 26 | return world 27 | 28 | def reset_world(self, world): 29 | # set color for landmarks 30 | # world.landmarks[0].color = np.array([0.25, 0.25, 0.75]) 31 | # world.landmarks[1].color = np.array([0.25, 0.25, 0.75]) 32 | # world.landmarks[2].color = np.array([0.25, 0.25, 0.75]) 33 | # world.landmarks[3].color = np.array([0.25, 0.25, 0.75]) 34 | # world.landmarks[4].color = np.array([0.25, 0.25, 0.75]) 35 | 36 | # set random initial states 37 | # for i, agent in enumerate(world.agents): 38 | # agent.color = np.array([0.35, 0.35, 0.85]) 39 | # # random properties for landmarks 40 | # for i, landmark in enumerate(world.landmarks): 41 | # landmark.color = np.array([0.25, 0.25, 0.25]) 42 | # set random initial states 43 | for i, agent in enumerate(world.agents): 44 | agent.color = np.array([[0.35, 0.35, 0.85], [0.25, 0.75, 0.25], [0.5, 0.15, 0.35]])[i] 45 | # agent.state.p_pos = np.array([[0.35, 0.35], [-0.35, -0.35]])[i] 46 | agent.state.p_pos = np.array([[0., 0.5], [-0.43, -0.25], [0.43, -0.25]])[i] 47 | # agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 48 | agent.state.p_vel = np.zeros(world.dim_p) 49 | agent.state.c = np.zeros(world.dim_c) 50 | agent.state.assign = np.zeros(world.dim_c) 51 | agent.state.SNRforUser = np.zeros(world.dim_c) 52 | 53 | for i, landmark in enumerate(world.landmarks): 54 | # landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 55 | 56 | # landmark.state.p_pos = np.array([[-0.8154612 , -0.24502182], [0.89699076, -0.14581629], 57 | # [-0.324932, 0.60295496], [0.62360916, -0.52245444], 58 | # [0.65862443, 0.23426809]])[i] 59 | 60 | landmark.state.p_pos = np.array([[-0.8154612, -0.24502182], [0.89699076, -0.14581629], 61 | [-0.3249325, 0.60295496], [0.62360916, -0.52245444], 62 | [0.65862443, 0.23426809], [0.29590076, 0.54571629], 63 | [-0.89699076, -0.74682629], [0.84629571, -0.98582649], 64 | [-0.89699076, 0.14581629], [-0.14699076, -0.75584629]])[i] 65 | 66 | # landmark.state.p_pos = np.array([[0.89699076, -0.14581629], [-0.8154612, -0.24502182], 67 | # [0.65862443, 0.23426809], [-0.3249325, 0.60295496], 68 | # [0.62360916, -0.52245444], [0.29590076, 0.54571629], 69 | # [-0.89699076, -0.74682629], [-0.89699076, 0.14581629], 70 | # [0.84629571, -0.98582649], [-0.14699076, -0.75584629]])[i] 71 | 72 | landmark.state.p_vel = np.zeros(world.dim_p) 73 | # landmark.state.p_vel = np.random.uniform(-1, +1, world.dim_p) 74 | landmark.color = np.array([0.25, 0.25, 0.25]) 75 | landmark.assign_index = None 76 | landmark.switch_cout = 0 77 | 78 | def benchmark_data(self, agent, world): 79 | rew = 0 80 | collisions = 0 81 | occupied_landmarks = 0 82 | min_dists = 0 83 | for l in world.landmarks: 84 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents] 85 | min_dists += min(dists) 86 | rew -= min(dists) 87 | if min(dists) < 0.1: 88 | occupied_landmarks += 1 89 | if agent.collide: 90 | for a in world.agents: 91 | if self.is_collision(a, agent): 92 | rew -= 1 93 | collisions += 1 94 | return rew, collisions, min_dists, occupied_landmarks 95 | 96 | def is_collision(self, agent1, agent2): 97 | delta_pos = agent1.state.p_pos - agent2.state.p_pos 98 | dist = np.sqrt(np.sum(np.square(delta_pos))) 99 | dist_min = agent1.size + agent2.size 100 | return True if dist < dist_min else False 101 | 102 | def reward(self, agent, world): 103 | p = 0.7 104 | try: 105 | 106 | # reward = 10*np.log(p*np.min(world.Rate[np.nonzero(world.Rate)])+(1-p)*np.average(world.Rate[np.nonzero(world.Rate)])) 107 | # reward = 10 * np.log(np.min(world.Rate[np.nonzero(world.Rate)])) 108 | 109 | # 下面的方式寻找最小信息率的用户比较保险,避免当所有的rate均为零时,上一种方式会出错 110 | min_rate = None 111 | for i in range(world.dim_c): 112 | min_rate = np.max(world.Rate[:,i]) if min_rate is None else min(min_rate, np.max(world.Rate[:,i])) 113 | reward = 10*np.log(min_rate+1e-9) 114 | 115 | # reward = np.log(np.min(agent.state.UAV_Rate[np.nonzero(agent.state.UAV_Rate)])) 116 | # reward = np.log(np.sum(world.Rate)) 117 | except ValueError: 118 | reward = 0 119 | 120 | # reward = np.log(np.sum(world.Rate)) 121 | 122 | for i in range(world.dim_c): 123 | if np.max(world.SNR[:, i]) < 15: 124 | reward -= 10 125 | #print('SNR not saftistied') 126 | 127 | for agent in world.agents: 128 | if not (-1 <= agent.state.p_pos[0] <= 1 and -1 <= agent.state.p_pos[1] <= 1): 129 | reward -= 100 130 | # print(entity.name) 131 | # print(entity.state.p_pos) 132 | # raise Exception('超出边界') 133 | 134 | # # 用户的切换次数作为负奖励 135 | # for landmark in world.landmarks: 136 | # reward -= landmark.switch_cout 137 | # landmark.switch_cout = 0 138 | 139 | if agent.collide: 140 | for a in world.agents: 141 | if a.name is not agent.name: 142 | if self.is_collision(a, agent): 143 | reward -= 100 144 | 145 | return reward 146 | 147 | def observation(self, agent, world): 148 | # get positions of all entities in this agent's reference frame 149 | entity_pos = [] 150 | for entity in world.landmarks: # world.entities: 151 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 152 | # entity colors 153 | entity_color = [] 154 | for entity in world.landmarks: # world.entities: 155 | entity_color.append(entity.color) 156 | # communication of all other agents 157 | other_comm = [] 158 | other_pos = [] 159 | assign = [] 160 | SNRforUser = [] 161 | for other in world.agents: 162 | SNRforUser.append(other.state.SNRforUser) 163 | assign.append(other.state.assign) 164 | if other is agent: continue 165 | other_comm.append(other.state.c) 166 | other_pos.append(other.state.p_pos - agent.state.p_pos) 167 | 168 | maxSNR = np.max(SNRforUser) 169 | normalized_SNR = SNRforUser.copy() 170 | for i, item in enumerate(SNRforUser): 171 | if maxSNR != 0: normalized_SNR[i] = item / maxSNR 172 | # return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_comm) # original code 173 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + assign + normalized_SNR) -------------------------------------------------------------------------------- /scenario-options/pow-band-loc.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/scenario-options/pow-band-loc.7z -------------------------------------------------------------------------------- /scenario-options/pow-band-loc/core_uav.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # physical/external base state of all entites 4 | class EntityState(object): 5 | def __init__(self): 6 | # physical position 7 | self.p_pos = None 8 | # physical velocity 9 | self.p_vel = None 10 | self.hold_action = 0 11 | 12 | # state of agents (including communication and internal/mental state) 13 | class AgentState(EntityState): 14 | def __init__(self): 15 | super(AgentState, self).__init__() 16 | # communication allocation 17 | self.c = None 18 | self.UAV_Rate = None 19 | self.SNRforUser = None 20 | self.assign = None 21 | 22 | self.assign_pow = None 23 | 24 | # action of the agent 25 | class Action(object): 26 | def __init__(self): 27 | # physical action 28 | self.u = None 29 | # communication action 30 | self.c = None 31 | 32 | self.pow = None 33 | 34 | # properties and state of physical world entity 35 | class Entity(object): 36 | def __init__(self): 37 | # name 38 | self.name = '' 39 | # properties: 40 | self.size = 0.0250 41 | # entity can move / be pushed 42 | self.movable = False 43 | # entity collides with others 44 | self.collide = True 45 | # material density (affects mass) 46 | self.density = 25.0 47 | # color 48 | self.color = None 49 | # max speed and accel 50 | self.max_speed = 10 51 | self.accel = None 52 | # state 53 | self.state = EntityState() 54 | # mass 55 | self.initial_mass = 1 56 | 57 | @property 58 | def mass(self): 59 | return self.initial_mass 60 | 61 | # properties of landmark entities 62 | class Landmark(Entity): 63 | def __init__(self): 64 | super(Landmark, self).__init__() 65 | self.assign_index = None 66 | self.switch_cout = 0 67 | 68 | # properties of agent entities 69 | class Agent(Entity): 70 | def __init__(self): 71 | super(Agent, self).__init__() 72 | # agents are movable by default 73 | self.movable = True 74 | # cannot send communication signals 75 | self.silent = False 76 | # cannot observe the world 77 | self.blind = False 78 | # physical motor noise amount 79 | self.u_noise = None 80 | # communication noise amount 81 | self.c_noise = None 82 | # control range 83 | self.u_range = 1.0 84 | # state 85 | self.state = AgentState() 86 | # action 87 | self.action = Action() 88 | # script behavior to execute 89 | self.action_callback = None 90 | 91 | # multi-agent world 92 | class World(object): 93 | def __init__(self): 94 | # list of agents and entities (can change at execution-time!) 95 | self.agents = [] 96 | self.landmarks = [] 97 | # communication channel dimensionality 98 | self.dim_c = 0 99 | # position dimensionality 100 | self.dim_p = 2 101 | 102 | self.dim_pow = 0 103 | 104 | # color dimensionality 105 | self.dim_color = 3 106 | # simulation timestep 107 | self.dt = 0.1 108 | # physical damping 109 | self.damping = 0.25 110 | # contact response parameters 111 | self.contact_force = 1e+2 112 | self.contact_margin = 1e-3 113 | self.collaborative = False 114 | 115 | self.pow_UAVs = 1000 # 无人机发射功率 116 | self.pow_GBS = None # 地面基站发射功率 117 | self.gain_UAV = 10**(-40/10) # 单位db 无人机链路单位距离下的信道增益 118 | self.gain_GBS = 60 # 单位db 地面基站链路单位距离下的信道增益 119 | self.los_exp_UAV = 2 # 无人机链路衰减指数 120 | self.los_exp_GBS = 3 # 地面基站链路衰减指数 121 | self.max_band = 10e6 # 最大可用带宽 122 | self.noise_pdf = 10**(-167/10) # 噪声功率谱密度 dBm 123 | 124 | self.bandwidth = None # 带宽 125 | self.tx_pow = None # 发射功率 126 | self.Rate = None 127 | self.SNR = None 128 | 129 | # return all entities in the world 130 | @property 131 | def entities(self): 132 | return self.agents + self.landmarks 133 | 134 | # return all agents controllable by external policies 135 | @property 136 | def policy_agents(self): 137 | return [agent for agent in self.agents if agent.action_callback is None] 138 | 139 | # return all agents controlled by world scripts 140 | @property 141 | def scripted_agents(self): 142 | return [agent for agent in self.agents if agent.action_callback is not None] 143 | 144 | # update state of the world 145 | def step(self): 146 | # set actions for scripted agents 147 | for agent in self.scripted_agents: 148 | agent.action = agent.action_callback(agent, self) 149 | 150 | # gather forces applied to entities 151 | p_force = [None] * len(self.entities) 152 | # apply agent physical controls 153 | p_force = self.apply_action_force(p_force) 154 | # apply environment forces 155 | # p_force = self.apply_environment_force(p_force) 156 | # integrate physical state 157 | self.integrate_state(p_force) 158 | 159 | self.update_landmark_pos() 160 | 161 | # self.update_agent_pos() 162 | 163 | # update agent state 164 | for agent in self.agents: 165 | self.update_agent_state(agent) 166 | self.update_world_state() 167 | 168 | # dict_state = [] 169 | # for i, landmark in enumerate(self.landmarks): 170 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos) 171 | # dict_state.append({'User {}'.format(i): landmark.state.p_pos}) 172 | # 173 | # for i, agent in enumerate(self.agents): 174 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos) 175 | # dict_state.append({'UAV {}'.format(i): agent.state.p_pos, 'band_asign': agent.state.c}) 176 | # with open('./state_results/states.txt','a') as f: 177 | # f.write(str(dict_state)+'\n') 178 | 179 | # gather agent action forces 180 | def apply_action_force(self, p_force): 181 | # set applied forces 182 | for i,agent in enumerate(self.agents): 183 | if agent.movable: 184 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0 185 | p_force[i] = agent.action.u + noise 186 | return p_force 187 | 188 | # gather physical forces acting on entities 189 | def apply_environment_force(self, p_force): 190 | # simple (but inefficient) collision response 191 | for a,entity_a in enumerate(self.entities): 192 | for b,entity_b in enumerate(self.entities): 193 | if(b <= a): continue 194 | [f_a, f_b] = self.get_collision_force(entity_a, entity_b) 195 | if(f_a is not None): 196 | if(p_force[a] is None): p_force[a] = 0.0 197 | p_force[a] = f_a + p_force[a] 198 | if(f_b is not None): 199 | if(p_force[b] is None): p_force[b] = 0.0 200 | p_force[b] = f_b + p_force[b] 201 | return p_force 202 | 203 | # integrate physical state 204 | def integrate_state(self, p_force): 205 | for i, agent in enumerate(self.agents): 206 | if not agent.movable: continue 207 | agent.state.p_vel = agent.state.p_vel * (1 - self.damping) 208 | if (p_force[i] is not None): 209 | agent.state.p_vel += (p_force[i] / agent.mass) * self.dt 210 | if agent.max_speed is not None: 211 | speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1])) 212 | if speed > agent.max_speed: 213 | agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) + 214 | np.square(agent.state.p_vel[1])) * agent.max_speed 215 | agent.state.p_pos += agent.state.p_vel * self.dt 216 | 217 | def update_landmark_pos(self): 218 | # for landmark in self.landmarks: 219 | # if not landmark.movable: continue 220 | # if landmark.state.hold_action > 0: 221 | # landmark.state.hold_action -= 1 222 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 223 | # else: 224 | # landmark.state.hold_action = np.random.randint(0,10) 225 | # 226 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 227 | # while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1): 228 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 229 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 230 | # # print(temp_pos) 231 | # landmark.state.p_pos = temp_pos 232 | 233 | for landmark in self.landmarks: 234 | if not landmark.movable: continue 235 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 236 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 237 | while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1): 238 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5 239 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt 240 | # print(temp_pos) 241 | landmark.state.p_pos = temp_pos 242 | 243 | 244 | def update_agent_state(self, agent): 245 | # set communication state (directly for now) 246 | if agent.silent: 247 | agent.state.c = np.zeros(self.dim_c) 248 | else: 249 | # noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0 250 | # agent.state.c = agent.action.c + noise 251 | agent.state.c = agent.action.c 252 | 253 | def update_agent_pos(self): 254 | # set communication state (directly for now) 255 | for i, agent in enumerate(self.agents): 256 | if not agent.movable: continue 257 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0 258 | agent.state.p_vel = (agent.action.u + noise) * (1 - self.damping) 259 | print(agent.state.p_vel) 260 | if agent.max_speed is not None: 261 | speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1])) 262 | if speed > agent.max_speed: 263 | print('高于最大速度') 264 | agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) + 265 | np.square(agent.state.p_vel[1])) * agent.max_speed 266 | agent.state.p_pos += agent.state.p_vel * self.dt 267 | 268 | def update_world_state(self): 269 | # 根据基站提供的方案,用户进行选择 270 | 271 | bw = np.zeros((len(self.agents), self.dim_c)) 272 | power = np.zeros((len(self.agents), self.dim_pow)) 273 | disVec = np.zeros((len(self.agents), self.dim_c)) 274 | height = np.ones((len(self.agents), self.dim_c)) * 200 # 无人机的飞行高度 275 | for index, agent in enumerate(self.agents): 276 | bw[index] = np.array(list(self.max_band * i for i in agent.action.c)) 277 | power[index] = np.array(list(self.pow_UAVs * i for i in agent.action.pow)) 278 | # 计算无人机到各个用户的欧式距离 279 | for i, landmark in enumerate(self.landmarks): 280 | disVec[index][i] = np.linalg.norm(agent.state.p_pos - landmark.state.p_pos, 2) * 1000 281 | # 距离超过定值时,无法进行通信,将带宽分配为0 282 | # bw[index][i] = 0.1 if disVec[index][i] > 0.025*8 else bw[index][i] 283 | # 接收到的信号功率 284 | signal_pow = power * self.gain_UAV / (disVec ** self.los_exp_UAV + height ** 2) 285 | 286 | # 接收端噪声功率 287 | noise_pow = bw * self.noise_pdf + 1e-9 288 | # 接收端信噪比 289 | SNR = signal_pow / noise_pow 290 | 291 | SNR[SNR < np.sqrt(10)] = 0 #SNR小于5dB时,无法通信,将其强制置零 292 | 293 | 294 | # for i in range(self.dim_c): 295 | # for j in range(len(self.agents)): 296 | # if 10*np.log10(SNR[j][i]) < 15: 297 | # bw[j][i] = 0 298 | 299 | # 信息传输速率 300 | Rate = bw * np.log2(np.ones((len(self.agents), self.dim_c)) + SNR) 301 | 302 | ## 保证每个用户只与一个无人机建立通信链路 303 | mask = np.zeros((len(self.agents), self.dim_c)) #被遮掉的位置为0 304 | # mask = -1 * np.ones((len(self.agents), self.dim_c)) #被遮掉的位置为负数 305 | 306 | for i, landmark in enumerate(self.landmarks): 307 | if (landmark.assign_index is not None) and (landmark.assign_index != np.argmax(Rate[:, i])): 308 | landmark.switch_cout = 1 309 | landmark.assign_index = np.argmax(Rate[:, i]) 310 | else: 311 | landmark.assign_index = np.argmax(Rate[:, i]) 312 | 313 | mask[landmark.assign_index][i] = 1 314 | 315 | 316 | self.bandwidth = mask * bw 317 | self.tx_pow = mask * power 318 | self.Rate = mask * Rate 319 | self.SNR = 10*np.log10(mask * SNR + 1e-10) 320 | 321 | for i, agent in enumerate(self.agents): 322 | agent.state.assign = self.bandwidth[i, :]/self.max_band 323 | agent.state.UAV_Rate = self.Rate[i, :] 324 | agent.state.SNRforUser = SNR[i, :] 325 | agent.state.assign_pow = self.tx_pow[i, :]/self.pow_UAVs 326 | 327 | # for i, landmark in enumerate(self.landmarks): 328 | # band = self.bandwidth.transpose() 329 | # landmark.assign_index = np.nonzero(band[i]) 330 | 331 | 332 | 333 | # get collision forces for any contact between two entities 334 | def get_collision_force(self, entity_a, entity_b): 335 | if (not entity_a.collide) or (not entity_b.collide): 336 | return [None, None] # not a collider 337 | if (entity_a is entity_b): 338 | return [None, None] # don't collide against itself 339 | # compute actual distance between entities 340 | delta_pos = entity_a.state.p_pos - entity_b.state.p_pos 341 | dist = np.sqrt(np.sum(np.square(delta_pos))) 342 | # minimum allowable distance 343 | dist_min = entity_a.size + entity_b.size 344 | # softmax penetration 345 | k = self.contact_margin 346 | penetration = np.logaddexp(0, -(dist - dist_min)/k)*k 347 | force = self.contact_force * delta_pos / dist * penetration 348 | force_a = +force if entity_a.movable else None 349 | force_b = -force if entity_b.movable else None 350 | return [force_a, force_b] -------------------------------------------------------------------------------- /scenario-options/pow-band-loc/environment_uav.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.envs.registration import EnvSpec 4 | import numpy as np 5 | from multiagent.multi_discrete import MultiDiscrete 6 | 7 | # environment for all agents in the multiagent world 8 | # currently code assumes that no agents will be created/destroyed at runtime! 9 | class MultiAgentEnv(gym.Env): 10 | metadata = { 11 | 'render.modes' : ['human', 'rgb_array'] 12 | } 13 | 14 | def __init__(self, world, reset_callback=None, reward_callback=None, 15 | observation_callback=None, info_callback=None, 16 | done_callback=None, shared_viewer=True): 17 | 18 | self.world = world 19 | self.agents = self.world.policy_agents 20 | # set required vectorized gym env property 21 | self.n = len(world.policy_agents) 22 | # scenario callbacks 23 | self.reset_callback = reset_callback 24 | self.reward_callback = reward_callback 25 | self.observation_callback = observation_callback 26 | self.info_callback = info_callback 27 | self.done_callback = done_callback 28 | # environment parameters 29 | self.discrete_action_space = True 30 | # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector 31 | self.discrete_action_input = False 32 | # if true, even the action is continuous, action will be performed discretely 33 | self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False 34 | # if true, every agent has the same reward 35 | self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False 36 | self.time = 0 37 | 38 | # configure spaces 39 | self.action_space = [] 40 | self.observation_space = [] 41 | for agent in self.agents: 42 | total_action_space = [] 43 | # physical action space 44 | if self.discrete_action_space: 45 | u_action_space = spaces.Discrete(world.dim_p * 2 + 1) 46 | else: 47 | u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32) 48 | if agent.movable: 49 | total_action_space.append(u_action_space) 50 | # communication action space 51 | if self.discrete_action_space: 52 | c_action_space = spaces.Discrete(world.dim_c) 53 | pow_action_space = spaces.Discrete(world.dim_pow) 54 | else: 55 | c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32) 56 | pow_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_pow,), dtype=np.float32) 57 | if not agent.silent: 58 | total_action_space.append(c_action_space) 59 | total_action_space.append(pow_action_space) 60 | # total action space 61 | if len(total_action_space) > 1: 62 | # all action spaces are discrete, so simplify to MultiDiscrete action space 63 | if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]): 64 | act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space]) 65 | else: 66 | act_space = spaces.Tuple(total_action_space) 67 | self.action_space.append(act_space) 68 | else: 69 | self.action_space.append(total_action_space[0]) 70 | # observation space 71 | obs_dim = len(observation_callback(agent, self.world)) 72 | self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32)) 73 | # agent.action.c = np.zeros(self.world.dim_c) 74 | # agent.action.pow = np.zeros(self.world.dim_pow) 75 | 76 | # rendering 77 | self.shared_viewer = shared_viewer 78 | if self.shared_viewer: 79 | self.viewers = [None] 80 | else: 81 | self.viewers = [None] * self.n 82 | self._reset_render() 83 | 84 | def step(self, action_n): 85 | obs_n = [] 86 | reward_n = [] 87 | done_n = [] 88 | info_n = {'n': []} 89 | self.agents = self.world.policy_agents 90 | # set action for each agent 91 | for i, agent in enumerate(self.agents): 92 | self._set_action(action_n[i], agent, self.action_space[i]) 93 | 94 | # advance world state 95 | self.world.step() 96 | # record observation for each agent 97 | for agent in self.agents: 98 | obs_n.append(self._get_obs(agent)) 99 | reward_n.append(self._get_reward(agent)) 100 | done_n.append(self._get_done(agent)) 101 | 102 | info_n['n'].append(self._get_info(agent)) 103 | 104 | # all agents get total reward in cooperative case 105 | reward = np.sum(reward_n) 106 | if self.shared_reward: 107 | reward_n = [reward] * self.n 108 | 109 | return obs_n, reward_n, done_n, info_n 110 | 111 | def reset(self): 112 | # reset world 113 | self.reset_callback(self.world) 114 | # reset renderer 115 | self._reset_render() 116 | # record observations for each agent 117 | obs_n = [] 118 | self.agents = self.world.policy_agents 119 | for agent in self.agents: 120 | obs_n.append(self._get_obs(agent)) 121 | return obs_n 122 | 123 | # get info used for benchmarking 124 | def _get_info(self, agent): 125 | if self.info_callback is None: 126 | return {} 127 | return self.info_callback(agent, self.world) 128 | 129 | # get observation for a particular agent 130 | def _get_obs(self, agent): 131 | if self.observation_callback is None: 132 | return np.zeros(0) 133 | return self.observation_callback(agent, self.world) 134 | 135 | # get dones for a particular agent 136 | # unused right now -- agents are allowed to go beyond the viewing screen 137 | def _get_done(self, agent): 138 | if self.done_callback is None: 139 | return False 140 | return self.done_callback(agent, self.world) 141 | 142 | # get reward for a particular agent 143 | def _get_reward(self, agent): 144 | if self.reward_callback is None: 145 | return 0.0 146 | return self.reward_callback(agent, self.world) 147 | 148 | # set env action for a particular agent 149 | def _set_action(self, action, agent, action_space, time=None): 150 | agent.action.u = np.zeros(self.world.dim_p) 151 | agent.action.c = np.zeros(self.world.dim_c) 152 | # process action 153 | if isinstance(action_space, MultiDiscrete): 154 | act = [] 155 | size = action_space.high - action_space.low + 1 156 | index = 0 157 | for s in size: 158 | act.append(action[index:(index+s)]) 159 | index += s 160 | action = act 161 | else: 162 | action = [action] 163 | 164 | if agent.movable: 165 | # physical action 166 | if self.discrete_action_input: 167 | agent.action.u = np.zeros(self.world.dim_p) 168 | # process discrete action 169 | if action[0] == 1: agent.action.u[0] = -1.0 170 | if action[0] == 2: agent.action.u[0] = +1.0 171 | if action[0] == 3: agent.action.u[1] = -1.0 172 | if action[0] == 4: agent.action.u[1] = +1.0 173 | else: 174 | if self.force_discrete_action: 175 | d = np.argmax(action[0]) 176 | action[0][:] = 0.0 177 | action[0][d] = 1.0 178 | if self.discrete_action_space: 179 | agent.action.u[0] += action[0][1] - action[0][2] 180 | agent.action.u[1] += action[0][3] - action[0][4] 181 | else: 182 | agent.action.u = action[0] 183 | sensitivity = 5 184 | if agent.accel is not None: 185 | sensitivity = agent.accel 186 | agent.action.u *= sensitivity 187 | action = action[1:] 188 | if not agent.silent: 189 | # communication action 190 | if self.discrete_action_input: 191 | agent.action.c = np.zeros(self.world.dim_c) 192 | agent.action.pow = np.zeros(self.world.dim_pow) 193 | agent.action.c[action[0]] = 1.0 194 | agent.action.pow[action[1]] = 1.0 195 | else: 196 | agent.action.c = action[0] 197 | agent.action.pow = action[0] 198 | 199 | action = action[2:] #因为前面设置了两个动作,c和pow,所以这里从2开始往后切。 200 | # make sure we used all elements of action 201 | assert len(action) == 0 202 | 203 | # reset rendering assets 204 | def _reset_render(self): 205 | self.render_geoms = None 206 | self.render_geoms_xform = None 207 | # self.render_geoms_uav = None 208 | # self.render_geoms_xform_uav = None 209 | 210 | # render environment 211 | def render(self, mode='human'): 212 | # if mode == 'human': 213 | # alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 214 | # message = '' 215 | # for agent in self.world.agents: 216 | # comm = [] 217 | # for other in self.world.agents: 218 | # if other is agent: continue 219 | # if np.all(other.state.c == 0): 220 | # word = '_' 221 | # else: 222 | # word = alphabet[np.argmax(other.state.c)] 223 | # message += (other.name + ' to ' + agent.name + ': ' + word + ' ') 224 | # print(message) 225 | 226 | for i in range(len(self.viewers)): 227 | # create viewers (if necessary) 228 | if self.viewers[i] is None: 229 | # import rendering only if we need it (and don't import for headless machines) 230 | #from gym.envs.classic_control import rendering 231 | from multiagent import rendering 232 | self.viewers[i] = rendering.Viewer(700,700) 233 | 234 | # create rendering geometry 235 | if self.render_geoms is None: 236 | # import rendering only if we need it (and don't import for headless machines) 237 | #from gym.envs.classic_control import rendering 238 | from multiagent import rendering 239 | self.render_geoms = [] 240 | self.render_geoms_xform = [] 241 | self.render_geoms_uav = [] 242 | self.render_geoms_xform_uav = [] 243 | self.render_geoms_line = [] 244 | 245 | 246 | range_color = [0.25, 0.25, 0.75] 247 | for entity in self.world.entities: 248 | geom = rendering.make_circle(entity.size) 249 | xform = rendering.Transform() 250 | 251 | if 'agent' in entity.name: 252 | geom.set_color(*entity.color, alpha=0.5) 253 | 254 | geom_uav = rendering.make_circle(entity.size*4) 255 | xform_uav = rendering.Transform() 256 | geom_uav.set_color(*range_color, alpha=0.25) 257 | geom_uav.add_attr(xform_uav) 258 | 259 | self.render_geoms_uav.append(geom_uav) 260 | self.render_geoms_xform_uav.append(xform_uav) 261 | 262 | else: 263 | geom.set_color(*entity.color) 264 | 265 | geom_line = rendering.Line(entity.state.p_pos, self.world.agents[entity.assign_index].state.p_pos) 266 | self.render_geoms_line.append(geom_line) 267 | 268 | geom.add_attr(xform) 269 | self.render_geoms.append(geom) 270 | self.render_geoms_xform.append(xform) 271 | 272 | # add geoms to viewer 273 | for viewer in self.viewers: 274 | viewer.geoms = [] 275 | for geom in self.render_geoms: 276 | viewer.add_geom(geom) 277 | for geom_uav in self.render_geoms_uav: 278 | viewer.add_geom(geom_uav) 279 | for geom_line in self.render_geoms_line: 280 | viewer.add_geom(geom_line) 281 | 282 | results = [] 283 | for i in range(len(self.viewers)): 284 | from multiagent import rendering 285 | # update bounds to center around agent 286 | cam_range = 1 287 | if self.shared_viewer: 288 | pos = np.zeros(self.world.dim_p) 289 | else: 290 | pos = self.agents[i].state.p_pos 291 | self.viewers[i].set_bounds(pos[0]-cam_range, pos[0]+cam_range, pos[1]-cam_range, pos[1]+cam_range) 292 | # update geometry positions 293 | for e, entity in enumerate(self.world.entities): 294 | self.render_geoms_xform[e].set_translation(*entity.state.p_pos) 295 | for a, agent in enumerate(self.world.policy_agents): 296 | self.render_geoms_xform_uav[a].set_translation(*agent.state.p_pos) 297 | for l, landmark in enumerate(self.world.landmarks): 298 | self.render_geoms_line[l].start = landmark.state.p_pos 299 | self.render_geoms_line[l].end = self.world.agents[landmark.assign_index].state.p_pos 300 | 301 | # render to display or array 302 | results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array')) 303 | 304 | return results 305 | 306 | # create receptor field locations in local coordinate frame 307 | def _make_receptor_locations(self, agent): 308 | receptor_type = 'polar' 309 | range_min = 0.05 * 2.0 310 | range_max = 1.00 311 | dx = [] 312 | # circular receptive field 313 | if receptor_type == 'polar': 314 | for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False): 315 | for distance in np.linspace(range_min, range_max, 3): 316 | dx.append(distance * np.array([np.cos(angle), np.sin(angle)])) 317 | # add origin 318 | dx.append(np.array([0.0, 0.0])) 319 | # grid receptive field 320 | if receptor_type == 'grid': 321 | for x in np.linspace(-range_max, +range_max, 5): 322 | for y in np.linspace(-range_max, +range_max, 5): 323 | dx.append(np.array([x,y])) 324 | return dx -------------------------------------------------------------------------------- /scenario-options/pow-band-loc/simple_collaborative_comm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiagent.core_uav import World, Agent, Landmark 3 | from multiagent.scenario import BaseScenario 4 | 5 | class Scenario(BaseScenario): 6 | def make_world(self): 7 | world = World() 8 | # set any world properties first 9 | world.dim_c = 10 10 | world.dim_pow = 10 11 | num_uav = 3 12 | world.collaborative = True # whether agents share rewards 13 | # add agents 14 | world.agents = [Agent() for i in range(num_uav)] 15 | for i, agent in enumerate(world.agents): 16 | agent.name = 'agent %d' % i 17 | agent.collide = True 18 | # add landmarks 19 | world.landmarks = [Landmark() for i in range(world.dim_c)] 20 | for i, landmark in enumerate(world.landmarks): 21 | landmark.name = 'landmark %d' % i 22 | landmark.collide = False 23 | landmark.movable = True 24 | 25 | # make initial conditions 26 | self.reset_world(world) 27 | return world 28 | 29 | def reset_world(self, world): 30 | # set color for landmarks 31 | # world.landmarks[0].color = np.array([0.25, 0.25, 0.75]) 32 | # world.landmarks[1].color = np.array([0.25, 0.25, 0.75]) 33 | # world.landmarks[2].color = np.array([0.25, 0.25, 0.75]) 34 | # world.landmarks[3].color = np.array([0.25, 0.25, 0.75]) 35 | # world.landmarks[4].color = np.array([0.25, 0.25, 0.75]) 36 | 37 | # set random initial states 38 | # for i, agent in enumerate(world.agents): 39 | # agent.color = np.array([0.35, 0.35, 0.85]) 40 | # # random properties for landmarks 41 | # for i, landmark in enumerate(world.landmarks): 42 | # landmark.color = np.array([0.25, 0.25, 0.25]) 43 | # set random initial states 44 | for i, agent in enumerate(world.agents): 45 | agent.color = np.array([[0.35, 0.35, 0.85], [0.25, 0.75, 0.25], [0.5, 0.15, 0.35]])[i] 46 | # agent.state.p_pos = np.array([[0.35, 0.35], [-0.35, -0.35]])[i] 47 | agent.state.p_pos = np.array([[0., 0.5], [-0.43, -0.25], [0.43, -0.25]])[i] 48 | # agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 49 | agent.state.p_vel = np.zeros(world.dim_p) 50 | agent.state.c = np.zeros(world.dim_c) 51 | 52 | agent.state.assign = np.zeros(world.dim_c) 53 | agent.state.SNRforUser = np.zeros(world.dim_c) 54 | agent.state.assign_pow = np.zeros(world.dim_c) 55 | 56 | for i, landmark in enumerate(world.landmarks): 57 | # landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) 58 | # landmark.state.p_pos = np.array([[-0.8154612 , -0.24502182], [0.89699076, -0.14581629], 59 | # [-0.324932, 0.60295496], [0.62360916, -0.52245444], 60 | # [0.65862443, 0.23426809]])[i] 61 | 62 | landmark.state.p_pos = np.array([[-0.8154612, -0.24502182], [0.89699076, -0.14581629], 63 | [-0.3249325, 0.60295496], [0.62360916, -0.52245444], 64 | [0.65862443, 0.23426809], [0.29590076, 0.54571629], 65 | [-0.89699076, -0.74682629], [0.84629571, -0.98582649], 66 | [-0.89699076, 0.14581629], [-0.14699076, -0.75584629]])[i] 67 | 68 | landmark.state.p_vel = np.zeros(world.dim_p) 69 | # landmark.state.p_vel = np.random.uniform(-1, +1, world.dim_p) 70 | landmark.color = np.array([0.25, 0.25, 0.25]) 71 | landmark.assign_index = None 72 | landmark.switch_cout = 0 73 | 74 | def benchmark_data(self, agent, world): 75 | rew = 0 76 | collisions = 0 77 | occupied_landmarks = 0 78 | min_dists = 0 79 | for l in world.landmarks: 80 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents] 81 | min_dists += min(dists) 82 | rew -= min(dists) 83 | if min(dists) < 0.1: 84 | occupied_landmarks += 1 85 | if agent.collide: 86 | for a in world.agents: 87 | if self.is_collision(a, agent): 88 | rew -= 1 89 | collisions += 1 90 | return rew, collisions, min_dists, occupied_landmarks 91 | 92 | def is_collision(self, agent1, agent2): 93 | delta_pos = agent1.state.p_pos - agent2.state.p_pos 94 | dist = np.sqrt(np.sum(np.square(delta_pos))) 95 | dist_min = agent1.size + agent2.size 96 | return True if dist < dist_min else False 97 | 98 | def reward(self, agent, world): 99 | try: 100 | min_rate = None 101 | for i in range(world.dim_c): 102 | min_rate = np.max(world.Rate[:, i]) if min_rate is None else min(min_rate, np.max(world.Rate[:, i])) 103 | reward = 10*np.log(min_rate+1e-9) 104 | # reward = 10*np.log(np.min(world.Rate[np.nonzero(world.Rate)])) 105 | # reward = np.log(np.min(agent.state.UAV_Rate[np.nonzero(agent.state.UAV_Rate)])) 106 | # reward = np.log(np.sum(world.Rate)) 107 | except ValueError: 108 | reward = 0 109 | 110 | # reward = np.log(np.sum(world.Rate)) 111 | 112 | for i in range(world.dim_c): 113 | if np.max(world.SNR[:, i]) < 15: 114 | reward -= 10 115 | #print('SNR not saftistied') 116 | 117 | for agent in world.agents: 118 | if not (-1 <= agent.state.p_pos[0] <= 1 and -1 <= agent.state.p_pos[1] <= 1): 119 | reward -= 100 120 | # print(entity.name) 121 | # print(entity.state.p_pos) 122 | # raise Exception('超出边界') 123 | 124 | # 用户的切换次数作为负奖励 125 | for landmark in world.landmarks: 126 | reward -= landmark.switch_cout 127 | landmark.switch_cout = 0 128 | 129 | if agent.collide: 130 | for a in world.agents: 131 | if a.name is not agent.name: 132 | if self.is_collision(a, agent): 133 | reward -= 100 134 | 135 | return reward 136 | 137 | def observation(self, agent, world): 138 | # get positions of all entities in this agent's reference frame 139 | entity_pos = [] 140 | for entity in world.landmarks: # world.entities: 141 | entity_pos.append(entity.state.p_pos - agent.state.p_pos) 142 | # entity colors 143 | entity_color = [] 144 | for entity in world.landmarks: # world.entities: 145 | entity_color.append(entity.color) 146 | # communication of all other agents 147 | comm = [] 148 | other_pos = [] 149 | assign = [] 150 | SNRforUser = [] 151 | assign_pow = [] 152 | for other in world.agents: 153 | assign.append(other.state.assign) 154 | SNRforUser.append(other.state.SNRforUser) 155 | assign_pow.append(other.state.assign_pow) 156 | if other is agent: continue 157 | comm.append(other.state.c) 158 | other_pos.append(other.state.p_pos - agent.state.p_pos) 159 | # print(assign) 160 | # print(assign_pow) 161 | # return np.concatenate( 162 | # [agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm) 163 | 164 | # return np.concatenate( 165 | # [agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + assign) 166 | 167 | maxSNR = np.max(SNRforUser) 168 | normalized_SNR = SNRforUser.copy() 169 | for i, item in enumerate(SNRforUser): 170 | if maxSNR != 0: normalized_SNR[i] = item / maxSNR 171 | 172 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + assign + normalized_SNR+ assign_pow) 173 | --------------------------------------------------------------------------------