├── .idea
├── .gitignore
├── MADDPG_torch.iml
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── README.md
├── __pycache__
├── arguments.cpython-36.pyc
├── model.cpython-36.pyc
└── replay_buffer.cpython-36.pyc
├── arguments.py
├── enjoy_split.py
├── learning_curves
├── exp_torch_4
│ ├── agrewards.pkl
│ └── rewards.pkl
├── exp_torch_5
│ ├── agrewards.pkl
│ └── rewards.pkl
├── exp_torch_6
│ ├── agrewards.pkl
│ └── rewards.pkl
├── exp_torch_7
│ ├── agrewards.pkl
│ └── rewards.pkl
└── exp_torch_8
│ ├── agrewards.pkl
│ └── rewards.pkl
├── main_openai.py
├── main_openai_backup.py
├── model.py
├── multiagent
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-36.pyc
│ ├── core.cpython-36.pyc
│ ├── core_uav.cpython-36.pyc
│ ├── environment.cpython-36.pyc
│ ├── environment_uav.cpython-36.pyc
│ ├── multi_discrete.cpython-36.pyc
│ ├── rendering.cpython-36.pyc
│ └── scenario.cpython-36.pyc
├── core.py
├── core_uav.py
├── environment.py
├── environment_uav.py
├── multi_discrete.py
├── policy.py
├── rendering.py
├── scenario.py
└── scenarios
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-36.pyc
│ ├── simple_adversary.cpython-36.pyc
│ └── simple_collaborative_comm.cpython-36.pyc
│ ├── simple.py
│ ├── simple_adversary.py
│ ├── simple_collaborative_comm.py
│ ├── simple_crypto.py
│ ├── simple_push.py
│ ├── simple_reference.py
│ ├── simple_speaker_listener.py
│ ├── simple_spread.py
│ ├── simple_tag.py
│ └── simple_world_comm.py
├── obs_img.py
├── plot_reward.py
├── replay_buffer.py
└── scenario-options
├── band-loc
├── core_uav.py
├── environment_uav.py
└── simple_collaborative_comm.py
├── pow-band-loc.7z
└── pow-band-loc
├── core_uav.py
├── environment_uav.py
└── simple_collaborative_comm.py
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/.idea/MADDPG_torch.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Time: 2019-11-05
2 | ## Desciption:
3 | This project is created for MADDPG, which is already popular in multi-agents. With the population of Pytorch, I think a version of pytorch for this project is useful for learners in multi-agents(Not for profit).
4 |
5 | ## Installation
6 | known dependencies: Python(3.6.8), OpenAI Gym(0.10.5), Pytorch(1.1.0), Numpy(1.17.3)
7 | Step 1: Install the MPE(Multi-Agent Particle Environments) as the readme of OpenAI (or the blog of mine).
8 | Step 2: Download the project and cd to this project. Make sure that you have a '\logs' and '\models' folder in your projects.
9 | Step 3: Run the 'main_openai.py' with 'python main_openai.py --scenario_name simple_tag'
10 |
11 | ## Structure
12 | ./main_openai: Main func
13 | ./arguments.py: Init the par for game, training and saving.
14 | ./model.py: Init the model for the agent.
15 | ./replay_buffer.py: Save the memory for all the agents.
16 | ./enjoy_split.py: A templete for testing the model trained in the 'main_openai.py'.
17 |
18 | ## Command line options
19 | ### Environment options
20 | --scenario_name: defines which environment in the MPE is to be used (default: "simple")
21 | --per_episode_max_len: maximum length of each episode for the environment (default: 45)
22 | --max_episode: total number of training episodes (default: 150000)
23 |
24 | ### Training options
25 | --lr_a: learning rate for actor(default: 1e-2)
26 | --lr_c: learning rate for critic(default: 1e-2)
27 | --gamma: discount factor (default: 0.95)
28 | --batch_size: batch size (default: 1024)
29 | --num_units_openai: number of units in the MLP (default: 64)
30 |
31 | ### Checkpointing
32 | --save_dir: directory where intermediate training results and model will be saved (default: "/tmp/policy/")
33 | --fre4save_model: model is saved every time this number of game steps has been completed (default: 1000)
34 | --start_save_model: the time when we start to save the model(default: 400)
35 |
36 | ## Link for blog
37 | OpenAI MPE: https://github.com/openai/multiagent-particle-envs
38 | The explorer of Game AI: https://zhuanlan.zhihu.com/c_186658689
39 |
--------------------------------------------------------------------------------
/__pycache__/arguments.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/__pycache__/arguments.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/__pycache__/model.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/replay_buffer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/__pycache__/replay_buffer.cpython-36.pyc
--------------------------------------------------------------------------------
/arguments.py:
--------------------------------------------------------------------------------
1 | # Time: 2019-11-05
2 | # Author: Zachary
3 | # Name: MADDPG_torch
4 |
5 | import time
6 | import torch
7 | import argparse
8 |
9 | device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
10 | time_now = time.strftime('%y%m_%d%H%M')
11 |
12 | def parse_args():
13 | parser = argparse.ArgumentParser("reinforcement learning experiments for multiagent environments")
14 |
15 | # environment simple_adversary simple_collaborative_comm
16 | parser.add_argument("--scenario_name", type=str, default="simple_collaborative_comm", help="name of the scenario script")
17 | parser.add_argument("--start_time", type=str, default=time_now, help="the time when start the game")
18 | parser.add_argument("--per_episode_max_len", type=int, default=100, help="maximum episode length")
19 | parser.add_argument("--max_episode", type=int, default=150000, help="maximum episode length")
20 | parser.add_argument("--num-adversaries", type=int, default=0, help="number of adversaries")
21 |
22 | # core training parameters
23 | parser.add_argument("--exp_name", type=str, default="exp_torch_8",help="name of the experiment")
24 | parser.add_argument("--device", default=device, help="torch device ")
25 | parser.add_argument("--learning_start_step", type=int, default=0, help="learning start steps")
26 | parser.add_argument("--max_grad_norm", type=float, default=0.5, help="max gradient norm for clip")
27 | parser.add_argument("--learning_fre", type=int, default=100, help="learning frequency")
28 | parser.add_argument("--tao", type=int, default=0.01, help="how depth we exchange the par of the nn")
29 | parser.add_argument("--lr_a", type=float, default=1e-2, help="learning rate for adam optimizer")
30 | parser.add_argument("--lr_c", type=float, default=1e-2, help="learning rate for adam optimizer")
31 | parser.add_argument("--gamma", type=float, default=0.8, help="discount factor")
32 | parser.add_argument("--batch_size", type=int, default=1024, help="number of episodes to optimize at the same time")
33 | parser.add_argument("--memory_size", type=int, default=1e6, help="number of data stored in the memory")
34 | parser.add_argument("--num_units_1", type=int, default=128, help="number of units in the mlp")
35 | parser.add_argument("--num_units_2", type=int, default=64, help="number of units in the mlp")
36 | parser.add_argument("--num_units_openai", type=int, default=64, help="number of units in the mlp")
37 |
38 | # checkpointing
39 | parser.add_argument("--fre4save_model", type=int, default=100, help="the number of the episode for saving the model")
40 | parser.add_argument("--start_save_model", type=int, default=400, help="the number of the episode for saving the model")
41 | parser.add_argument("--save_dir", type=str, default="./models/", \
42 | help="directory in which training state and model should be saved")
43 | parser.add_argument("--old_model_name", type=str, default="models/exp_torch_8/21_0406_0824_12120000/", \
44 | help="directory in which training state and model are loaded")
45 | # evaluation
46 | parser.add_argument("--restore", action="store_true", default=True)
47 | parser.add_argument("--display", action="store_true", default=True)
48 | parser.add_argument("--test", action="store_true", default=True)
49 | parser.add_argument("--restore_idxs", type=list, default=[0,1,2])
50 | parser.add_argument("--benchmark", action="store_true", default=False)
51 | parser.add_argument("--benchmark-iters", type=int, default=100000, help="number of iterations run for benchmarking")
52 | parser.add_argument("--benchmark-dir", type=str, default="./benchmark_files/", \
53 | help="directory where benchmark data is saved")
54 | parser.add_argument("--plots-dir", type=str, default="./learning_curves/", \
55 | help="directory where plot data is saved")
56 | return parser.parse_args()
57 |
--------------------------------------------------------------------------------
/enjoy_split.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import torch
5 | import multiagent.scenarios as scenarios
6 | from multiagent.environment_uav import MultiAgentEnv
7 |
8 | from model import actor_agent, critic_agent
9 | from arguments import parse_args
10 |
11 | def make_env(scenario_name, arglist, benchmark=False):
12 | """
13 | create the environment from script
14 | """
15 | scenario = scenarios.load(scenario_name + ".py").Scenario()
16 | world = scenario.make_world()
17 | if benchmark:
18 | env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data)
19 | else:
20 | env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation)
21 | return env
22 |
23 | def get_trainers(env, arglist):
24 | trainers_cur = []
25 | trainers_tar = []
26 | optimizers = []
27 | input_size = [8, 10, 10] # the obs size
28 | input_size_global = [23, 25, 25] # cal by README
29 |
30 | """ load the model """
31 | actors_tar = [torch.load(arglist.old_model_name+'a_t_{}.pt'.format(agent_idx), map_location=arglist.device) \
32 | for agent_idx in range(env.n)]
33 |
34 | return actors_tar
35 |
36 | def enjoy(arglist):
37 | """
38 | This func is used for testing the model
39 | """
40 |
41 | episode_step = 0
42 | """ init the env """
43 | env = make_env(arglist.scenario_name, arglist, arglist.benchmark)
44 |
45 | """ init the agents """
46 | obs_shape_n = [env.observation_space[i].shape for i in range(env.n)]
47 | actors_tar = get_trainers(env, arglist)
48 |
49 | """ interact with the env """
50 | obs_n = env.reset()
51 | while(True):
52 |
53 | # update the episode step number
54 | episode_step += 1
55 | # print(episode_step)
56 |
57 | # get action
58 | try:
59 | action_n = []
60 | # action_n = [agent.actor(torch.from_numpy(obs).to(arglist.device, torch.float)).numpy() \
61 | # for agent, obs in zip(trainers_cur, obs_n)]
62 | for actor, obs in zip(actors_tar, obs_n):
63 | action = torch.clamp(actor(torch.from_numpy(obs).to(arglist.device, torch.float)), -1, 1)
64 | action_n.append(action)
65 |
66 |
67 | except:
68 | print(obs_n)
69 |
70 | # interact with env
71 | new_obs_n, rew_n, done_n, info_n = env.step(action_n)
72 |
73 | print('*********************Bandwidht assignment**********************')
74 | for i in env.world.bandwidth:
75 | for j in i:
76 | print('%10.2e' % j, end='')
77 | print('\n', end='')
78 |
79 | print('*************************SNR*****************************')
80 | for i in env.world.SNR:
81 | for j in i:
82 | print('%10.2f' % j, end='')
83 | print('\n', end='')
84 | print('*************************Rate*****************************')
85 | for i in env.world.Rate:
86 | for j in i:
87 | print('%10.2e' % j, end='')
88 | print('\n', end=' ')
89 | print('\n')
90 |
91 | # update the flag
92 | done = all(done_n)
93 | terminal = (episode_step >= arglist.per_episode_max_len)
94 |
95 | # reset the env
96 | if done or terminal:
97 | episode_step = 0
98 | obs_n = env.reset()
99 |
100 | # render the env
101 | # print(rew_n)
102 | env.render()
103 |
104 | if __name__ == '__main__':
105 | arglist = parse_args()
106 | enjoy(arglist)
107 |
--------------------------------------------------------------------------------
/learning_curves/exp_torch_4/agrewards.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_4/agrewards.pkl
--------------------------------------------------------------------------------
/learning_curves/exp_torch_4/rewards.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_4/rewards.pkl
--------------------------------------------------------------------------------
/learning_curves/exp_torch_5/agrewards.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_5/agrewards.pkl
--------------------------------------------------------------------------------
/learning_curves/exp_torch_5/rewards.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_5/rewards.pkl
--------------------------------------------------------------------------------
/learning_curves/exp_torch_6/agrewards.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_6/agrewards.pkl
--------------------------------------------------------------------------------
/learning_curves/exp_torch_6/rewards.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_6/rewards.pkl
--------------------------------------------------------------------------------
/learning_curves/exp_torch_7/agrewards.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_7/agrewards.pkl
--------------------------------------------------------------------------------
/learning_curves/exp_torch_7/rewards.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_7/rewards.pkl
--------------------------------------------------------------------------------
/learning_curves/exp_torch_8/agrewards.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_8/agrewards.pkl
--------------------------------------------------------------------------------
/learning_curves/exp_torch_8/rewards.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/learning_curves/exp_torch_8/rewards.pkl
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | # Time: 2019-11-05
2 | # Author: Zachary
3 | # Name: MADDPG_torch
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | class abstract_agent(nn.Module):
9 | def __init__(self):
10 | super(abstract_agent, self).__init__()
11 |
12 | def act(self, input):
13 | policy, value = self.forward(input) # flow the input through the nn
14 | return policy, value
15 |
16 | class actor_agent(abstract_agent):
17 | def __init__(self, num_inputs, action_size, args):
18 | super(actor_agent, self).__init__()
19 | self.linear_a1 = nn.Linear(num_inputs, args.num_units_1)
20 | self.linear_a2 = nn.Linear(args.num_units_1, args.num_units_2)
21 | self.linear_a = nn.Linear(args.num_units_2, action_size)
22 | self.reset_parameters()
23 | # Activation func init
24 | self.LReLU = nn.LeakyReLU(0.01)
25 | self.tanh= nn.Tanh()
26 | self.train()
27 |
28 | def reset_parameters(self):
29 | gain = nn.init.calculate_gain('leaky_relu')
30 | gain_tanh = nn.init.calculate_gain('tanh')
31 | self.linear_a1.weight.data.mul_(gain)
32 | self.linear_a2.weight.data.mul_(gain)
33 | self.linear_a.weight.data.mul_(gain_tanh)
34 |
35 | def forward(self, input):
36 | """
37 | The forward func defines how the data flows through the graph(layers)
38 | """
39 | x = self.LReLU(self.linear_a1(input))
40 | x = self.LReLU(self.linear_a2(x))
41 | policy = self.tanh(self.linear_a(x))
42 | return policy
43 |
44 | class critic_agent(abstract_agent):
45 | def __init__(self, obs_shape_n, action_shape_n, args):
46 | super(critic_agent, self).__init__()
47 | self.linear_o_c1 = nn.Linear(obs_shape_n, args.num_units_1)
48 | self.linear_a_c1 = nn.Linear(action_shape_n, args.num_units_1)
49 | self.linear_c2 = nn.Linear(args.num_units_1*2, args.num_units_2)
50 | self.linear_c = nn.Linear(args.num_units_2, 1)
51 | self.reset_parameters()
52 |
53 | self.LReLU = nn.LeakyReLU(0.01)
54 | self.tanh= nn.Tanh()
55 | self.train()
56 |
57 | def reset_parameters(self):
58 | gain = nn.init.calculate_gain('leaky_relu')
59 | gain_tanh = nn.init.calculate_gain('tanh')
60 | self.linear_o_c1.weight.data.mul_(gain)
61 | self.linear_a_c1.weight.data.mul_(gain)
62 | self.linear_c2.weight.data.mul_(gain)
63 | self.linear_c.weight.data.mul_(gain)
64 |
65 | def forward(self, obs_input, action_input):
66 | """
67 | input_g: input_global, input features of all agents
68 | """
69 | x_o = self.LReLU(self.linear_o_c1(obs_input))
70 | x_a = self.LReLU(self.linear_a_c1(action_input))
71 | x_cat = torch.cat([x_o, x_a], dim=1)
72 | x = self.LReLU(self.linear_c2(x_cat))
73 | value = self.linear_c(x)
74 | return value
75 |
76 | class openai_critic(abstract_agent):
77 | def __init__(self, obs_shape_n, action_shape_n, args):
78 | super(openai_critic, self).__init__()
79 | # self.LReLU = nn.LeakyReLU(0.01)
80 | self.LReLU = nn.ReLU()
81 | self.linear_c1 = nn.Linear(action_shape_n+obs_shape_n, args.num_units_openai)
82 | self.linear_c2 = nn.Linear(args.num_units_openai, args.num_units_openai)
83 | self.linear_c = nn.Linear(args.num_units_openai, 1)
84 |
85 | self.reset_parameters()
86 | self.train()
87 |
88 | def reset_parameters(self):
89 | # gain = nn.init.calculate_gain('leaky_relu')
90 | # nn.init.xavier_uniform_(self.linear_c1.weight, gain=nn.init.calculate_gain('leaky_relu'))
91 | # nn.init.xavier_uniform_(self.linear_c2.weight, gain=nn.init.calculate_gain('leaky_relu'))
92 | # nn.init.xavier_uniform_(self.linear_c.weight, gain=nn.init.calculate_gain('leaky_relu'))
93 | gain = nn.init.calculate_gain('relu')
94 | nn.init.xavier_uniform_(self.linear_c1.weight, gain=nn.init.calculate_gain('relu'))
95 | nn.init.xavier_uniform_(self.linear_c2.weight, gain=nn.init.calculate_gain('relu'))
96 | nn.init.xavier_uniform_(self.linear_c.weight, gain=nn.init.calculate_gain('relu'))
97 |
98 |
99 | def forward(self, obs_input, action_input):
100 | """
101 | input_g: input_global, input features of all agents
102 | """
103 | x_cat = self.LReLU(self.linear_c1(torch.cat([obs_input, action_input], dim=1)))
104 | x = self.LReLU(self.linear_c2(x_cat))
105 | value = self.linear_c(x)
106 | return value
107 |
108 | class openai_actor(abstract_agent):
109 | def __init__(self, num_inputs, action_size, args):
110 | super(openai_actor, self).__init__()
111 | self.tanh= nn.Tanh()
112 | # self.LReLU = nn.LeakyReLU(0.01)
113 | self.LReLU = nn.ReLU()
114 | self.linear_a1 = nn.Linear(num_inputs, args.num_units_openai)
115 | self.linear_a2 = nn.Linear(args.num_units_openai, args.num_units_openai)
116 | self.linear_a = nn.Linear(args.num_units_openai, action_size)
117 |
118 | self.reset_parameters()
119 | self.train()
120 |
121 | def reset_parameters(self):
122 | # gain = nn.init.calculate_gain('leaky_relu')
123 | # nn.init.xavier_uniform_(self.linear_a1.weight, gain=nn.init.calculate_gain('leaky_relu'))
124 | # nn.init.xavier_uniform_(self.linear_a2.weight, gain=nn.init.calculate_gain('leaky_relu'))
125 | # nn.init.xavier_uniform_(self.linear_a.weight, gain=nn.init.calculate_gain('leaky_relu'))
126 | gain_tanh = nn.init.calculate_gain('tanh')
127 | gain = nn.init.calculate_gain('relu')
128 | nn.init.xavier_uniform_(self.linear_a1.weight, gain=nn.init.calculate_gain('relu'))
129 | nn.init.xavier_uniform_(self.linear_a2.weight, gain=nn.init.calculate_gain('relu'))
130 | nn.init.xavier_uniform_(self.linear_a.weight, gain=nn.init.calculate_gain('relu'))
131 |
132 | def forward(self, input, model_original_out=False):
133 | """
134 | The forward func defines how the data flows through the graph(layers)
135 | flag: 0 sigle input 1 batch input
136 | """
137 | x = self.LReLU(self.linear_a1(input))
138 | x = self.LReLU(self.linear_a2(x))
139 | model_out = self.linear_a(x)
140 | u = torch.rand_like(model_out)
141 | policy = F.softmax(model_out - torch.log(-torch.log(u)), dim=-1)
142 | if model_original_out == True: return model_out, policy # for model_out criterion
143 | return policy
144 |
--------------------------------------------------------------------------------
/multiagent/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 |
3 | # Multiagent envs
4 | # ----------------------------------------
5 |
6 | register(
7 | id='MultiagentSimple-v0',
8 | entry_point='multiagent.envs:SimpleEnv',
9 | # FIXME(cathywu) currently has to be exactly max_path_length parameters in
10 | # rllab run script
11 | max_episode_steps=100,
12 | )
13 |
14 | register(
15 | id='MultiagentSimpleSpeakerListener-v0',
16 | entry_point='multiagent.envs:SimpleSpeakerListenerEnv',
17 | max_episode_steps=100,
18 | )
19 |
--------------------------------------------------------------------------------
/multiagent/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/multiagent/__pycache__/core.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/core.cpython-36.pyc
--------------------------------------------------------------------------------
/multiagent/__pycache__/core_uav.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/core_uav.cpython-36.pyc
--------------------------------------------------------------------------------
/multiagent/__pycache__/environment.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/environment.cpython-36.pyc
--------------------------------------------------------------------------------
/multiagent/__pycache__/environment_uav.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/environment_uav.cpython-36.pyc
--------------------------------------------------------------------------------
/multiagent/__pycache__/multi_discrete.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/multi_discrete.cpython-36.pyc
--------------------------------------------------------------------------------
/multiagent/__pycache__/rendering.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/rendering.cpython-36.pyc
--------------------------------------------------------------------------------
/multiagent/__pycache__/scenario.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/__pycache__/scenario.cpython-36.pyc
--------------------------------------------------------------------------------
/multiagent/core.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # physical/external base state of all entites
4 | class EntityState(object):
5 | def __init__(self):
6 | # physical position
7 | self.p_pos = None
8 | # physical velocity
9 | self.p_vel = None
10 |
11 | # state of agents (including communication and internal/mental state)
12 | class AgentState(EntityState):
13 | def __init__(self):
14 | super(AgentState, self).__init__()
15 | # communication allocation
16 | self.c = None
17 |
18 | # action of the agent
19 | class Action(object):
20 | def __init__(self):
21 | # physical action
22 | self.u = None
23 | # communication action
24 | self.c = None
25 |
26 | # properties and state of physical world entity
27 | class Entity(object):
28 | def __init__(self):
29 | # name
30 | self.name = ''
31 | # properties:
32 | self.size = 0.050
33 | # entity can move / be pushed
34 | self.movable = False
35 | # entity collides with others
36 | self.collide = True
37 | # material density (affects mass)
38 | self.density = 25.0
39 | # color
40 | self.color = None
41 | # max speed and accel
42 | self.max_speed = None
43 | self.accel = None
44 | # state
45 | self.state = EntityState()
46 | # mass
47 | self.initial_mass = 1
48 |
49 | @property
50 | def mass(self):
51 | return self.initial_mass
52 |
53 | # properties of landmark entities
54 | class Landmark(Entity):
55 | def __init__(self):
56 | super(Landmark, self).__init__()
57 |
58 | # properties of agent entities
59 | class Agent(Entity):
60 | def __init__(self):
61 | super(Agent, self).__init__()
62 | # agents are movable by default
63 | self.movable = True
64 | # cannot send communication signals
65 | self.silent = False
66 | # cannot observe the world
67 | self.blind = False
68 | # physical motor noise amount
69 | self.u_noise = None
70 | # communication noise amount
71 | self.c_noise = None
72 | # control range
73 | self.u_range = 1.0
74 | # state
75 | self.state = AgentState()
76 | # action
77 | self.action = Action()
78 | # script behavior to execute
79 | self.action_callback = None
80 |
81 | # multi-agent world
82 | class World(object):
83 | def __init__(self):
84 | # list of agents and entities (can change at execution-time!)
85 | self.agents = []
86 | self.landmarks = []
87 | # communication channel dimensionality
88 | self.dim_c = 0
89 | # position dimensionality
90 | self.dim_p = 2
91 | # color dimensionality
92 | self.dim_color = 3
93 | # simulation timestep
94 | self.dt = 0.1
95 | # physical damping
96 | self.damping = 0.25
97 | # contact response parameters
98 | self.contact_force = 1e+2
99 | self.contact_margin = 1e-3
100 |
101 | # return all entities in the world
102 | @property
103 | def entities(self):
104 | return self.agents + self.landmarks
105 |
106 | # return all agents controllable by external policies
107 | @property
108 | def policy_agents(self):
109 | return [agent for agent in self.agents if agent.action_callback is None]
110 |
111 | # return all agents controlled by world scripts
112 | @property
113 | def scripted_agents(self):
114 | return [agent for agent in self.agents if agent.action_callback is not None]
115 |
116 | # update state of the world
117 | def step(self):
118 | # set actions for scripted agents
119 | for agent in self.scripted_agents:
120 | agent.action = agent.action_callback(agent, self)
121 | # gather forces applied to entities
122 | p_force = [None] * len(self.entities)
123 | # apply agent physical controls
124 | p_force = self.apply_action_force(p_force)
125 | # apply environment forces
126 | p_force = self.apply_environment_force(p_force)
127 | # integrate physical state
128 | self.integrate_state(p_force)
129 | # update agent state
130 | for agent in self.agents:
131 | self.update_agent_state(agent)
132 |
133 | # gather agent action forces
134 | def apply_action_force(self, p_force):
135 | # set applied forces
136 | for i,agent in enumerate(self.agents):
137 | if agent.movable:
138 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
139 | p_force[i] = agent.action.u + noise
140 | return p_force
141 |
142 | # gather physical forces acting on entities
143 | def apply_environment_force(self, p_force):
144 | # simple (but inefficient) collision response
145 | for a,entity_a in enumerate(self.entities):
146 | for b,entity_b in enumerate(self.entities):
147 | if(b <= a): continue
148 | [f_a, f_b] = self.get_collision_force(entity_a, entity_b)
149 | if(f_a is not None):
150 | if(p_force[a] is None): p_force[a] = 0.0
151 | p_force[a] = f_a + p_force[a]
152 | if(f_b is not None):
153 | if(p_force[b] is None): p_force[b] = 0.0
154 | p_force[b] = f_b + p_force[b]
155 | return p_force
156 |
157 | # integrate physical state
158 | def integrate_state(self, p_force):
159 | for i,entity in enumerate(self.entities):
160 | if not entity.movable: continue
161 | entity.state.p_vel = entity.state.p_vel * (1 - self.damping)
162 | if (p_force[i] is not None):
163 | entity.state.p_vel += (p_force[i] / entity.mass) * self.dt
164 | if entity.max_speed is not None:
165 | speed = np.sqrt(np.square(entity.state.p_vel[0]) + np.square(entity.state.p_vel[1]))
166 | if speed > entity.max_speed:
167 | entity.state.p_vel = entity.state.p_vel / np.sqrt(np.square(entity.state.p_vel[0]) +
168 | np.square(entity.state.p_vel[1])) * entity.max_speed
169 | entity.state.p_pos += entity.state.p_vel * self.dt
170 |
171 | def update_agent_state(self, agent):
172 | # set communication state (directly for now)
173 | if agent.silent:
174 | agent.state.c = np.zeros(self.dim_c)
175 | else:
176 | noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0
177 | agent.state.c = agent.action.c + noise
178 |
179 | # get collision forces for any contact between two entities
180 | def get_collision_force(self, entity_a, entity_b):
181 | if (not entity_a.collide) or (not entity_b.collide):
182 | return [None, None] # not a collider
183 | if (entity_a is entity_b):
184 | return [None, None] # don't collide against itself
185 | # compute actual distance between entities
186 | delta_pos = entity_a.state.p_pos - entity_b.state.p_pos
187 | dist = np.sqrt(np.sum(np.square(delta_pos)))
188 | # minimum allowable distance
189 | dist_min = entity_a.size + entity_b.size
190 | # softmax penetration
191 | k = self.contact_margin
192 | penetration = np.logaddexp(0, -(dist - dist_min)/k)*k
193 | force = self.contact_force * delta_pos / dist * penetration
194 | force_a = +force if entity_a.movable else None
195 | force_b = -force if entity_b.movable else None
196 | return [force_a, force_b]
--------------------------------------------------------------------------------
/multiagent/core_uav.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # physical/external base state of all entites
4 | class EntityState(object):
5 | def __init__(self):
6 | # physical position
7 | self.p_pos = None
8 | # physical velocity
9 | self.p_vel = None
10 | self.hold_action = 0
11 |
12 | # state of agents (including communication and internal/mental state)
13 | class AgentState(EntityState):
14 | def __init__(self):
15 | super(AgentState, self).__init__()
16 | # communication allocation
17 | self.c = None
18 | self.UAV_Rate = None
19 | self.assign = None
20 | self.SNRforUser = None
21 |
22 | # action of the agent
23 | class Action(object):
24 | def __init__(self):
25 | # physical action
26 | self.u = None
27 | # communication action
28 | self.c = None
29 |
30 | # properties and state of physical world entity
31 | class Entity(object):
32 | def __init__(self):
33 | # name
34 | self.name = ''
35 | # properties:
36 | self.size = 0.0250
37 | # entity can move / be pushed
38 | self.movable = False
39 | # entity collides with others
40 | self.collide = True
41 | # material density (affects mass)
42 | self.density = 25.0
43 | # color
44 | self.color = None
45 | # max speed and accel
46 | self.max_speed = 10
47 | self.accel = None
48 | # state
49 | self.state = EntityState()
50 | # mass
51 | self.initial_mass = 1
52 |
53 | @property
54 | def mass(self):
55 | return self.initial_mass
56 |
57 | # properties of landmark entities
58 | class Landmark(Entity):
59 | def __init__(self):
60 | super(Landmark, self).__init__()
61 | self.assign_index = None
62 | self.switch_cout = 0
63 |
64 | # properties of agent entities
65 | class Agent(Entity):
66 | def __init__(self):
67 | super(Agent, self).__init__()
68 | # agents are movable by default
69 | self.movable = True
70 | # cannot send communication signals
71 | self.silent = False
72 | # cannot observe the world
73 | self.blind = False
74 | # physical motor noise amount
75 | self.u_noise = None
76 | # communication noise amount
77 | self.c_noise = None
78 | # control range
79 | self.u_range = 1.0
80 | # state
81 | self.state = AgentState()
82 | # action
83 | self.action = Action()
84 | # script behavior to execute
85 | self.action_callback = None
86 |
87 | # multi-agent world
88 | class World(object):
89 | def __init__(self):
90 | # list of agents and entities (can change at execution-time!)
91 | self.agents = []
92 | self.landmarks = []
93 | # communication channel dimensionality
94 | self.dim_c = 0
95 | # position dimensionality
96 | self.dim_p = 2
97 | # color dimensionality
98 | self.dim_color = 3
99 | # simulation timestep
100 | self.dt = 0.1
101 | # physical damping
102 | self.damping = 0.25
103 | # contact response parameters
104 | self.contact_force = 1e+2
105 | self.contact_margin = 1e-3
106 | self.collaborative = False
107 |
108 | self.pow_UAVs = 100 # 无人机发射功率
109 | self.pow_GBS = None # 地面基站发射功率
110 | self.gain_UAV = 10**(-40/10) # 单位db 无人机链路单位距离下的信道增益
111 | self.gain_GBS = 60 # 单位db 地面基站链路单位距离下的信道增益
112 | self.los_exp_UAV = 2 # 无人机链路衰减指数
113 | self.los_exp_GBS = 3 # 地面基站链路衰减指数
114 | self.max_band = 10e6 # 最大可用带宽
115 | self.noise_pdf = 10**(-167/10) # 噪声功率谱密度 dBm
116 |
117 | self.bandwidth = None # 带宽
118 | self.Rate = None
119 | self.SNR = None
120 |
121 | # return all entities in the world
122 | @property
123 | def entities(self):
124 | return self.agents + self.landmarks
125 |
126 | # return all agents controllable by external policies
127 | @property
128 | def policy_agents(self):
129 | return [agent for agent in self.agents if agent.action_callback is None]
130 |
131 | # return all agents controlled by world scripts
132 | @property
133 | def scripted_agents(self):
134 | return [agent for agent in self.agents if agent.action_callback is not None]
135 |
136 | # update state of the world
137 | def step(self):
138 | # set actions for scripted agents
139 | for agent in self.scripted_agents:
140 | agent.action = agent.action_callback(agent, self)
141 |
142 | # gather forces applied to entities
143 | p_force = [None] * len(self.entities)
144 | # apply agent physical controls
145 | p_force = self.apply_action_force(p_force)
146 | # apply environment forces
147 | # p_force = self.apply_environment_force(p_force)
148 | # integrate physical state
149 | self.integrate_state(p_force)
150 |
151 | self.update_landmark_pos()
152 |
153 | # self.update_agent_pos()
154 |
155 | # update agent state
156 | for agent in self.agents:
157 | self.update_agent_state(agent)
158 | self.update_world_state()
159 |
160 | # dict_state = []
161 | # for i, landmark in enumerate(self.landmarks):
162 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos)
163 | # dict_state.append({'User {}'.format(i): landmark.state.p_pos})
164 | #
165 | # for i, agent in enumerate(self.agents):
166 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos)
167 | # dict_state.append({'UAV {}'.format(i): agent.state.p_pos, 'band_asign': agent.state.c})
168 | # with open('./state_results/states.txt','a') as f:
169 | # f.write(str(dict_state)+'\n')
170 |
171 | # gather agent action forces
172 | def apply_action_force(self, p_force):
173 | # set applied forces
174 | for i,agent in enumerate(self.agents):
175 | if agent.movable:
176 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
177 | p_force[i] = agent.action.u + noise
178 | return p_force
179 |
180 | # gather physical forces acting on entities
181 | def apply_environment_force(self, p_force):
182 | # simple (but inefficient) collision response
183 | for a,entity_a in enumerate(self.entities):
184 | for b,entity_b in enumerate(self.entities):
185 | if(b <= a): continue
186 | [f_a, f_b] = self.get_collision_force(entity_a, entity_b)
187 | if(f_a is not None):
188 | if(p_force[a] is None): p_force[a] = 0.0
189 | p_force[a] = f_a + p_force[a]
190 | if(f_b is not None):
191 | if(p_force[b] is None): p_force[b] = 0.0
192 | p_force[b] = f_b + p_force[b]
193 | return p_force
194 |
195 | # integrate physical state
196 | def integrate_state(self, p_force):
197 | for i, agent in enumerate(self.agents):
198 | if not agent.movable: continue
199 | # noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
200 | # agent.state.p_vel = (agent.action.u + noise) * (1 - self.damping)
201 | agent.state.p_vel = agent.state.p_vel * (1 - self.damping)
202 | if (p_force[i] is not None):
203 | agent.state.p_vel += (p_force[i] / agent.mass) * self.dt
204 | if agent.max_speed is not None:
205 | speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1]))
206 | if speed > agent.max_speed:
207 | agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) +
208 | np.square(agent.state.p_vel[1])) * agent.max_speed
209 | agent.state.p_pos += agent.state.p_vel * self.dt
210 |
211 | def update_landmark_pos(self):
212 | # for landmark in self.landmarks:
213 | # if not landmark.movable: continue
214 | # if landmark.state.hold_action > 0:
215 | # landmark.state.hold_action -= 1
216 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
217 | # else:
218 | # landmark.state.hold_action = np.random.randint(0,10)
219 | #
220 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
221 | # while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1):
222 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
223 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
224 | # # print(temp_pos)
225 | # landmark.state.p_pos = temp_pos
226 |
227 | for landmark in self.landmarks:
228 | if not landmark.movable: continue
229 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
230 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
231 | while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1):
232 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
233 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
234 | # print(temp_pos)
235 | landmark.state.p_pos = temp_pos
236 |
237 |
238 | def update_agent_state(self, agent):
239 | # set communication state (directly f or now)
240 | if agent.silent:
241 | agent.state.c = np.zeros(self.dim_c)
242 | else:
243 | # noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0
244 | # agent.state.c = agent.action.c + noise
245 | agent.state.c = agent.action.c
246 |
247 | # def update_agent_pos(self):
248 | # # set communication state (directly for now)
249 | # for i, agent in enumerate(self.agents):
250 | # if not agent.movable: continue
251 | # noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
252 | # agent.state.p_vel = (agent.action.u + noise) * (1 - self.damping)
253 | # print(agent.state.p_vel)
254 | # if agent.max_speed is not None:
255 | # speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1]))
256 | # if speed > agent.max_speed:
257 | # print('高于最大速度')
258 | # agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) +
259 | # np.square(agent.state.p_vel[1])) * agent.max_speed
260 | # agent.state.p_pos += agent.state.p_vel * self.dt
261 |
262 | def update_world_state(self):
263 | # 根据基站提供的方案,用户进行选择
264 |
265 | bw = np.zeros((len(self.agents), self.dim_c))
266 | disVec = np.zeros((len(self.agents), self.dim_c))
267 | height = np.ones((len(self.agents), self.dim_c)) * 200 # 无人机的飞行高度
268 | for index, agent in enumerate(self.agents):
269 | bw[index] = np.array(list(self.max_band * i for i in agent.action.c))
270 | # 计算无人机到各个用户的欧式距离
271 | for i, landmark in enumerate(self.landmarks):
272 | disVec[index][i] = np.linalg.norm(agent.state.p_pos - landmark.state.p_pos, 2) * 1000
273 | # 距离超过定值时,无法进行通信,将带宽分配为0
274 | # bw[index][i] = 0.1 if disVec[index][i] > 0.025*8 else bw[index][i]
275 | # 接收到的信号功率
276 | signal_pow = self.pow_UAVs * self.gain_UAV / (disVec ** self.los_exp_UAV + height ** 2)
277 |
278 | # 接收端噪声功率
279 | noise_pow = bw * self.noise_pdf + 1e-9
280 | # 接收端信噪比
281 | SNR = signal_pow / noise_pow
282 |
283 | SNR[SNR < np.sqrt(10)] = 0 #SNR小于5dB时,无法通信,将其强制置零
284 |
285 |
286 | # for i in range(self.dim_c):
287 | # for j in range(len(self.agents)):
288 | # if 10*np.log10(SNR[j][i]) < 15:
289 | # bw[j][i] = 0
290 |
291 | # 信息传输速率
292 | Rate = bw * np.log2(np.ones((len(self.agents), self.dim_c)) + SNR)
293 |
294 | ## 保证每个用户只与一个无人机建立通信链路
295 | mask = np.zeros((len(self.agents), self.dim_c)) #被遮掉的位置为0
296 | # mask = -1 * np.ones((len(self.agents), self.dim_c)) #被遮掉的位置为负数
297 |
298 | for i, landmark in enumerate(self.landmarks):
299 | if (landmark.assign_index is not None) and (landmark.assign_index != np.argmax(Rate[:, i])):
300 | landmark.switch_cout = 1
301 | landmark.assign_index = np.argmax(Rate[:, i])
302 | else:
303 | landmark.assign_index = np.argmax(Rate[:, i])
304 |
305 | mask[landmark.assign_index][i] = 1
306 |
307 |
308 | self.bandwidth = mask * bw
309 | self.Rate = mask * Rate
310 | self.SNR = 10*np.log10(mask * SNR + 1e-10)
311 |
312 | for i, agent in enumerate(self.agents):
313 | agent.state.assign = self.bandwidth[i, :]/self.max_band
314 | agent.state.UAV_Rate = self.Rate[i, :]
315 | agent.state.SNRforUser = SNR[i,:]
316 |
317 | # for i, landmark in enumerate(self.landmarks):
318 | # band = self.bandwidth.transpose()
319 | # landmark.assign_index = np.nonzero(band[i])
320 |
321 | # get collision forces for any contact between two entities
322 | def get_collision_force(self, entity_a, entity_b):
323 | if (not entity_a.collide) or (not entity_b.collide):
324 | return [None, None] # not a collider
325 | if (entity_a is entity_b):
326 | return [None, None] # don't collide against itself
327 | # compute actual distance between entities
328 | delta_pos = entity_a.state.p_pos - entity_b.state.p_pos
329 | dist = np.sqrt(np.sum(np.square(delta_pos)))
330 | # minimum allowable distance
331 | dist_min = entity_a.size + entity_b.size
332 | # softmax penetration
333 | k = self.contact_margin
334 | penetration = np.logaddexp(0, -(dist - dist_min)/k)*k
335 | force = self.contact_force * delta_pos / dist * penetration
336 | force_a = +force if entity_a.movable else None
337 | force_b = -force if entity_b.movable else None
338 | return [force_a, force_b]
--------------------------------------------------------------------------------
/multiagent/environment_uav.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | from gym.envs.registration import EnvSpec
4 | import numpy as np
5 | from multiagent.multi_discrete import MultiDiscrete
6 |
7 | # environment for all agents in the multiagent world
8 | # currently code assumes that no agents will be created/destroyed at runtime!
9 | class MultiAgentEnv(gym.Env):
10 | metadata = {
11 | 'render.modes' : ['human', 'rgb_array']
12 | }
13 |
14 | def __init__(self, world, reset_callback=None, reward_callback=None,
15 | observation_callback=None, info_callback=None,
16 | done_callback=None, shared_viewer=True):
17 |
18 | self.world = world
19 | self.agents = self.world.policy_agents
20 | # set required vectorized gym env property
21 | self.n = len(world.policy_agents)
22 | # scenario callbacks
23 | self.reset_callback = reset_callback
24 | self.reward_callback = reward_callback
25 | self.observation_callback = observation_callback
26 | self.info_callback = info_callback
27 | self.done_callback = done_callback
28 | # environment parameters
29 | self.discrete_action_space = True
30 | # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
31 | self.discrete_action_input = False
32 | # if true, even the action is continuous, action will be performed discretely
33 | self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False
34 | # if true, every agent has the same reward
35 | self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False
36 | self.time = 0
37 |
38 | # configure spaces
39 | self.action_space = []
40 | self.observation_space = []
41 | for agent in self.agents:
42 | total_action_space = []
43 | # physical action space
44 | if self.discrete_action_space:
45 | u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
46 | else:
47 | u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
48 | if agent.movable:
49 | total_action_space.append(u_action_space)
50 | # communication action space
51 | if self.discrete_action_space:
52 | c_action_space = spaces.Discrete(world.dim_c)
53 | else:
54 | c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32)
55 | if not agent.silent:
56 | total_action_space.append(c_action_space)
57 | # total action space
58 | if len(total_action_space) > 1:
59 | # all action spaces are discrete, so simplify to MultiDiscrete action space
60 | if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
61 | act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
62 | else:
63 | act_space = spaces.Tuple(total_action_space)
64 | self.action_space.append(act_space)
65 | else:
66 | self.action_space.append(total_action_space[0])
67 | # observation space
68 | obs_dim = len(observation_callback(agent, self.world))
69 | self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32))
70 | # agent.action.c = np.zeros(self.world.dim_c)
71 |
72 | # rendering
73 | self.shared_viewer = shared_viewer
74 | if self.shared_viewer:
75 | self.viewers = [None]
76 | else:
77 | self.viewers = [None] * self.n
78 | self._reset_render()
79 |
80 | def step(self, action_n):
81 | obs_n = []
82 | reward_n = []
83 | done_n = []
84 | info_n = {'n': []}
85 | self.agents = self.world.policy_agents
86 | # set action for each agent
87 | for i, agent in enumerate(self.agents):
88 | self._set_action(action_n[i], agent, self.action_space[i])
89 |
90 | # advance world state
91 | self.world.step()
92 | # record observation for each agent
93 | for agent in self.agents:
94 | obs_n.append(self._get_obs(agent))
95 | reward_n.append(self._get_reward(agent))
96 | done_n.append(self._get_done(agent))
97 |
98 | info_n['n'].append(self._get_info(agent))
99 |
100 | # all agents get total reward in cooperative case
101 | reward = np.sum(reward_n)
102 | if self.shared_reward:
103 | reward_n = [reward] * self.n
104 |
105 | return obs_n, reward_n, done_n, info_n
106 |
107 | def reset(self):
108 | # reset world
109 | self.reset_callback(self.world)
110 | # reset renderer
111 | self._reset_render()
112 | # record observations for each agent
113 | obs_n = []
114 | self.agents = self.world.policy_agents
115 | for agent in self.agents:
116 | obs_n.append(self._get_obs(agent))
117 | return obs_n
118 |
119 | # get info used for benchmarking
120 | def _get_info(self, agent):
121 | if self.info_callback is None:
122 | return {}
123 | return self.info_callback(agent, self.world)
124 |
125 | # get observation for a particular agent
126 | def _get_obs(self, agent):
127 | if self.observation_callback is None:
128 | return np.zeros(0)
129 | return self.observation_callback(agent, self.world)
130 |
131 | # get dones for a particular agent
132 | # unused right now -- agents are allowed to go beyond the viewing screen
133 | def _get_done(self, agent):
134 | if self.done_callback is None:
135 | return False
136 | return self.done_callback(agent, self.world)
137 |
138 | # get reward for a particular agent
139 | def _get_reward(self, agent):
140 | if self.reward_callback is None:
141 | return 0.0
142 | return self.reward_callback(agent, self.world)
143 |
144 | # set env action for a particular agent
145 | def _set_action(self, action, agent, action_space, time=None):
146 | agent.action.u = np.zeros(self.world.dim_p)
147 | agent.action.c = np.zeros(self.world.dim_c)
148 | # process action
149 | if isinstance(action_space, MultiDiscrete):
150 | act = []
151 | size = action_space.high - action_space.low + 1
152 | index = 0
153 | for s in size:
154 | act.append(action[index:(index+s)])
155 | index += s
156 | action = act
157 | else:
158 | action = [action]
159 |
160 |
161 | if agent.movable:
162 | # physical action
163 | if self.discrete_action_input:
164 | agent.action.u = np.zeros(self.world.dim_p)
165 | # process discrete action
166 | if action[0] == 1: agent.action.u[0] = -1.0
167 | if action[0] == 2: agent.action.u[0] = +1.0
168 | if action[0] == 3: agent.action.u[1] = -1.0
169 | if action[0] == 4: agent.action.u[1] = +1.0
170 | else:
171 | if self.force_discrete_action:
172 | d = np.argmax(action[0])
173 | action[0][:] = 0.0
174 | action[0][d] = 1.0
175 | if self.discrete_action_space:
176 | agent.action.u[0] = action[0][1] - action[0][2]
177 | agent.action.u[1] = action[0][3] - action[0][4]
178 | else:
179 | agent.action.u = action[0]
180 | sensitivity = 5
181 | if agent.accel is not None:
182 | sensitivity = agent.accel
183 | agent.action.u *= sensitivity
184 | action = action[1:]
185 | if not agent.silent:
186 | # communication action
187 | if self.discrete_action_input:
188 | agent.action.c = np.zeros(self.world.dim_c)
189 | agent.action.c[action[0]] = 1.0
190 | else:
191 | agent.action.c = action[0]
192 | action = action[1:]
193 | # make sure we used all elements of action
194 | assert len(action) == 0
195 |
196 | # reset rendering assets
197 | def _reset_render(self):
198 | self.render_geoms = None
199 | self.render_geoms_xform = None
200 | # self.render_geoms_uav = None
201 | # self.render_geoms_xform_uav = None
202 |
203 | # render environment
204 | def render(self, mode='human'):
205 | # if mode == 'human':
206 | # alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
207 | # message = ''
208 | # for agent in self.world.agents:
209 | # comm = []
210 | # for other in self.world.agents:
211 | # if other is agent: continue
212 | # if np.all(other.state.c == 0):
213 | # word = '_'
214 | # else:
215 | # word = alphabet[np.argmax(other.state.c)]
216 | # message += (other.name + ' to ' + agent.name + ': ' + word + ' ')
217 | # print(message)
218 |
219 | for i in range(len(self.viewers)):
220 | # create viewers (if necessary)
221 | if self.viewers[i] is None:
222 | # import rendering only if we need it (and don't import for headless machines)
223 | #from gym.envs.classic_control import rendering
224 | from multiagent import rendering
225 | self.viewers[i] = rendering.Viewer(700,700)
226 |
227 | # create rendering geometry
228 | if self.render_geoms is None:
229 | # import rendering only if we need it (and don't import for headless machines)
230 | #from gym.envs.classic_control import rendering
231 | from multiagent import rendering
232 | self.render_geoms = []
233 | self.render_geoms_xform = []
234 | self.render_geoms_uav = []
235 | self.render_geoms_xform_uav = []
236 | self.render_geoms_line = []
237 |
238 |
239 | range_color = [0.25, 0.25, 0.75]
240 | for entity in self.world.entities:
241 | geom = rendering.make_circle(entity.size)
242 | xform = rendering.Transform()
243 |
244 | if 'agent' in entity.name:
245 | geom.set_color(*entity.color, alpha=0.5)
246 |
247 | geom_uav = rendering.make_circle(entity.size*4)
248 | xform_uav = rendering.Transform()
249 | geom_uav.set_color(*range_color, alpha=0.25)
250 | geom_uav.add_attr(xform_uav)
251 |
252 | self.render_geoms_uav.append(geom_uav)
253 | self.render_geoms_xform_uav.append(xform_uav)
254 |
255 | else:
256 | geom.set_color(*entity.color)
257 |
258 | geom_line = rendering.Line(entity.state.p_pos, self.world.agents[entity.assign_index].state.p_pos)
259 | self.render_geoms_line.append(geom_line)
260 |
261 | geom.add_attr(xform)
262 | self.render_geoms.append(geom)
263 | self.render_geoms_xform.append(xform)
264 |
265 | # add geoms to viewer
266 | for viewer in self.viewers:
267 | viewer.geoms = []
268 | for geom in self.render_geoms:
269 | viewer.add_geom(geom)
270 | for geom_uav in self.render_geoms_uav:
271 | viewer.add_geom(geom_uav)
272 | for geom_line in self.render_geoms_line:
273 | viewer.add_geom(geom_line)
274 |
275 | results = []
276 | for i in range(len(self.viewers)):
277 | from multiagent import rendering
278 | # update bounds to center around agent
279 | cam_range = 1
280 | if self.shared_viewer:
281 | pos = np.zeros(self.world.dim_p)
282 | else:
283 | pos = self.agents[i].state.p_pos
284 | self.viewers[i].set_bounds(pos[0]-cam_range, pos[0]+cam_range, pos[1]-cam_range, pos[1]+cam_range)
285 | # update geometry positions
286 | for e, entity in enumerate(self.world.entities):
287 | self.render_geoms_xform[e].set_translation(*entity.state.p_pos)
288 | for a, agent in enumerate(self.world.policy_agents):
289 | self.render_geoms_xform_uav[a].set_translation(*agent.state.p_pos)
290 | for l, landmark in enumerate(self.world.landmarks):
291 | self.render_geoms_line[l].start = landmark.state.p_pos
292 | self.render_geoms_line[l].end = self.world.agents[landmark.assign_index].state.p_pos
293 |
294 | # render to display or array
295 | results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array'))
296 |
297 | return results
298 |
299 | # create receptor field locations in local coordinate frame
300 | def _make_receptor_locations(self, agent):
301 | receptor_type = 'polar'
302 | range_min = 0.05 * 2.0
303 | range_max = 1.00
304 | dx = []
305 | # circular receptive field
306 | if receptor_type == 'polar':
307 | for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False):
308 | for distance in np.linspace(range_min, range_max, 3):
309 | dx.append(distance * np.array([np.cos(angle), np.sin(angle)]))
310 | # add origin
311 | dx.append(np.array([0.0, 0.0]))
312 | # grid receptive field
313 | if receptor_type == 'grid':
314 | for x in np.linspace(-range_max, +range_max, 5):
315 | for y in np.linspace(-range_max, +range_max, 5):
316 | dx.append(np.array([x,y]))
317 | return dx
--------------------------------------------------------------------------------
/multiagent/multi_discrete.py:
--------------------------------------------------------------------------------
1 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
2 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
3 |
4 | import numpy as np
5 |
6 | import gym
7 | from gym.spaces import prng
8 |
9 | class MultiDiscrete(gym.Space):
10 | """
11 | - The multi-discrete action space consists of a series of discrete action spaces with different parameters
12 | - It can be adapted to both a Discrete action space or a continuous (Box) action space
13 | - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
14 | - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
15 | where the discrete action space can take any integers from `min` to `max` (both inclusive)
16 | Note: A value of 0 always need to represent the NOOP action.
17 | e.g. Nintendo Game Controller
18 | - Can be conceptualized as 3 discrete action spaces:
19 | 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
20 | 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
21 | 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
22 | - Can be initialized as
23 | MultiDiscrete([ [0,4], [0,1], [0,1] ])
24 | """
25 | def __init__(self, array_of_param_array):
26 | self.low = np.array([x[0] for x in array_of_param_array])
27 | self.high = np.array([x[1] for x in array_of_param_array])
28 | self.num_discrete_space = self.low.shape[0]
29 |
30 | def sample(self):
31 | """ Returns a array with one sample from each discrete action space """
32 | # For each row: round(random .* (max - min) + min, 0)
33 | random_array = prng.np_random.rand(self.num_discrete_space)
34 | return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
35 | def contains(self, x):
36 | return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
37 |
38 | @property
39 | def shape(self):
40 | return self.num_discrete_space
41 | def __repr__(self):
42 | return "MultiDiscrete" + str(self.num_discrete_space)
43 | def __eq__(self, other):
44 | return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)
--------------------------------------------------------------------------------
/multiagent/policy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from pyglet.window import key
3 |
4 | # individual agent policy
5 | class Policy(object):
6 | def __init__(self):
7 | pass
8 | def action(self, obs):
9 | raise NotImplementedError()
10 |
11 | # interactive policy based on keyboard input
12 | # hard-coded to deal only with movement, not communication
13 | class InteractivePolicy(Policy):
14 | def __init__(self, env, agent_index):
15 | super(InteractivePolicy, self).__init__()
16 | self.env = env
17 | # hard-coded keyboard events
18 | self.move = [False for i in range(4)]
19 | self.comm = [False for i in range(env.world.dim_c)]
20 | # register keyboard events with this environment's window
21 | env.viewers[agent_index].window.on_key_press = self.key_press
22 | env.viewers[agent_index].window.on_key_release = self.key_release
23 |
24 | def action(self, obs):
25 | # ignore observation and just act based on keyboard events
26 | if self.env.discrete_action_input:
27 | u = 0
28 | if self.move[0]: u = 1
29 | if self.move[1]: u = 2
30 | if self.move[2]: u = 4
31 | if self.move[3]: u = 3
32 | else:
33 | u = np.zeros(5) # 5-d because of no-move action
34 | if self.move[0]: u[1] += 1.0
35 | if self.move[1]: u[2] += 1.0
36 | if self.move[3]: u[3] += 1.0
37 | if self.move[2]: u[4] += 1.0
38 | if True not in self.move:
39 | u[0] += 1.0
40 | return np.concatenate([u, np.zeros(self.env.world.dim_c)])
41 |
42 | # keyboard event callbacks
43 | def key_press(self, k, mod):
44 | if k==key.LEFT: self.move[0] = True
45 | if k==key.RIGHT: self.move[1] = True
46 | if k==key.UP: self.move[2] = True
47 | if k==key.DOWN: self.move[3] = True
48 | def key_release(self, k, mod):
49 | if k==key.LEFT: self.move[0] = False
50 | if k==key.RIGHT: self.move[1] = False
51 | if k==key.UP: self.move[2] = False
52 | if k==key.DOWN: self.move[3] = False
53 |
--------------------------------------------------------------------------------
/multiagent/rendering.py:
--------------------------------------------------------------------------------
1 | """
2 | 2D rendering framework
3 | """
4 | from __future__ import division
5 | import os
6 | import six
7 | import sys
8 |
9 | if "Apple" in sys.version:
10 | if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
11 | os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
12 | # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
13 |
14 | from gym.utils import reraise
15 | from gym import error
16 |
17 | try:
18 | import pyglet
19 | except ImportError as e:
20 | reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
21 |
22 | try:
23 | from pyglet.gl import *
24 | except ImportError as e:
25 | reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python '")
26 |
27 | import math
28 | import numpy as np
29 |
30 | RAD2DEG = 57.29577951308232
31 |
32 | def get_display(spec):
33 | """Convert a display specification (such as :0) into an actual Display
34 | object.
35 |
36 | Pyglet only supports multiple Displays on Linux.
37 | """
38 | if spec is None:
39 | return None
40 | elif isinstance(spec, six.string_types):
41 | return pyglet.canvas.Display(spec)
42 | else:
43 | raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
44 |
45 | class Viewer(object):
46 | def __init__(self, width, height, display=None):
47 | display = get_display(display)
48 |
49 | self.width = width
50 | self.height = height
51 |
52 | self.window = pyglet.window.Window(width=width, height=height, display=display)
53 | self.window.on_close = self.window_closed_by_user
54 | self.geoms = []
55 | self.onetime_geoms = []
56 | self.transform = Transform()
57 |
58 | glEnable(GL_BLEND)
59 | # glEnable(GL_MULTISAMPLE)
60 | glEnable(GL_LINE_SMOOTH)
61 | # glHint(GL_LINE_SMOOTH_HINT, GL_DONT_CARE)
62 | glHint(GL_LINE_SMOOTH_HINT, GL_NICEST)
63 | glLineWidth(2.0)
64 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
65 |
66 | def close(self):
67 | self.window.close()
68 |
69 | def window_closed_by_user(self):
70 | self.close()
71 |
72 | def set_bounds(self, left, right, bottom, top):
73 | assert right > left and top > bottom
74 | scalex = self.width/(right-left)
75 | scaley = self.height/(top-bottom)
76 | self.transform = Transform(
77 | translation=(-left*scalex, -bottom*scaley),
78 | scale=(scalex, scaley))
79 |
80 | def add_geom(self, geom):
81 | self.geoms.append(geom)
82 |
83 | def add_onetime(self, geom):
84 | self.onetime_geoms.append(geom)
85 |
86 | def render(self, return_rgb_array=False):
87 | glClearColor(1,1,1,1)
88 | self.window.clear()
89 | self.window.switch_to()
90 | self.window.dispatch_events()
91 | self.transform.enable()
92 | for geom in self.geoms:
93 | geom.render()
94 | for geom in self.onetime_geoms:
95 | geom.render()
96 | self.transform.disable()
97 | arr = None
98 | if return_rgb_array:
99 | buffer = pyglet.image.get_buffer_manager().get_color_buffer()
100 | image_data = buffer.get_image_data()
101 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
102 | # In https://github.com/openai/gym-http-api/issues/2, we
103 | # discovered that someone using Xmonad on Arch was having
104 | # a window of size 598 x 398, though a 600 x 400 window
105 | # was requested. (Guess Xmonad was preserving a pixel for
106 | # the boundary.) So we use the buffer height/width rather
107 | # than the requested one.
108 | arr = arr.reshape(buffer.height, buffer.width, 4)
109 | arr = arr[::-1,:,0:3]
110 | self.window.flip()
111 | self.onetime_geoms = []
112 | return arr
113 |
114 | # Convenience
115 | def draw_circle(self, radius=10, res=30, filled=True, **attrs):
116 | geom = make_circle(radius=radius, res=res, filled=filled)
117 | _add_attrs(geom, attrs)
118 | self.add_onetime(geom)
119 | return geom
120 |
121 | def draw_polygon(self, v, filled=True, **attrs):
122 | geom = make_polygon(v=v, filled=filled)
123 | _add_attrs(geom, attrs)
124 | self.add_onetime(geom)
125 | return geom
126 |
127 | def draw_polyline(self, v, **attrs):
128 | geom = make_polyline(v=v)
129 | _add_attrs(geom, attrs)
130 | self.add_onetime(geom)
131 | return geom
132 |
133 | def draw_line(self, start, end, **attrs):
134 | geom = Line(start, end)
135 | _add_attrs(geom, attrs)
136 | self.add_onetime(geom)
137 | return geom
138 |
139 | def get_array(self):
140 | self.window.flip()
141 | image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
142 | self.window.flip()
143 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
144 | arr = arr.reshape(self.height, self.width, 4)
145 | return arr[::-1,:,0:3]
146 |
147 | def _add_attrs(geom, attrs):
148 | if "color" in attrs:
149 | geom.set_color(*attrs["color"])
150 | if "linewidth" in attrs:
151 | geom.set_linewidth(attrs["linewidth"])
152 |
153 | class Geom(object):
154 | def __init__(self):
155 | self._color=Color((0, 0, 0, 1.0))
156 | self.attrs = [self._color]
157 | def render(self):
158 | for attr in reversed(self.attrs):
159 | attr.enable()
160 | self.render1()
161 | for attr in self.attrs:
162 | attr.disable()
163 | def render1(self):
164 | raise NotImplementedError
165 | def add_attr(self, attr):
166 | self.attrs.append(attr)
167 | def set_color(self, r, g, b, alpha=1):
168 | self._color.vec4 = (r, g, b, alpha)
169 |
170 | class Attr(object):
171 | def enable(self):
172 | raise NotImplementedError
173 | def disable(self):
174 | pass
175 |
176 | class Transform(Attr):
177 | def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
178 | self.set_translation(*translation)
179 | self.set_rotation(rotation)
180 | self.set_scale(*scale)
181 | def enable(self):
182 | glPushMatrix()
183 | glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
184 | glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
185 | glScalef(self.scale[0], self.scale[1], 1)
186 | def disable(self):
187 | glPopMatrix()
188 | def set_translation(self, newx, newy):
189 | self.translation = (float(newx), float(newy))
190 | def set_rotation(self, new):
191 | self.rotation = float(new)
192 | def set_scale(self, newx, newy):
193 | self.scale = (float(newx), float(newy))
194 |
195 | class Color(Attr):
196 | def __init__(self, vec4):
197 | self.vec4 = vec4
198 | def enable(self):
199 | glColor4f(*self.vec4)
200 |
201 | class LineStyle(Attr):
202 | def __init__(self, style):
203 | self.style = style
204 | def enable(self):
205 | glEnable(GL_LINE_STIPPLE)
206 | glLineStipple(1, self.style)
207 | def disable(self):
208 | glDisable(GL_LINE_STIPPLE)
209 |
210 | class LineWidth(Attr):
211 | def __init__(self, stroke):
212 | self.stroke = stroke
213 | def enable(self):
214 | glLineWidth(self.stroke)
215 |
216 | class Point(Geom):
217 | def __init__(self):
218 | Geom.__init__(self)
219 | def render1(self):
220 | glBegin(GL_POINTS) # draw point
221 | glVertex3f(0.0, 0.0, 0.0)
222 | glEnd()
223 |
224 | class FilledPolygon(Geom):
225 | def __init__(self, v):
226 | Geom.__init__(self)
227 | self.v = v
228 | def render1(self):
229 | if len(self.v) == 4 : glBegin(GL_QUADS)
230 | elif len(self.v) > 4 : glBegin(GL_POLYGON)
231 | else: glBegin(GL_TRIANGLES)
232 | for p in self.v:
233 | glVertex3f(p[0], p[1],0) # draw each vertex
234 | glEnd()
235 |
236 | color = (self._color.vec4[0] * 0.5, self._color.vec4[1] * 0.5, self._color.vec4[2] * 0.5, self._color.vec4[3] * 0.5)
237 | glColor4f(*color)
238 | glBegin(GL_LINE_LOOP)
239 | for p in self.v:
240 | glVertex3f(p[0], p[1],0) # draw each vertex
241 | glEnd()
242 |
243 | def make_circle(radius=10, res=30, filled=True):
244 | points = []
245 | for i in range(res):
246 | ang = 2*math.pi*i / res
247 | points.append((math.cos(ang)*radius, math.sin(ang)*radius))
248 | if filled:
249 | return FilledPolygon(points)
250 | else:
251 | return PolyLine(points, True)
252 |
253 | def make_line(start,end):
254 | return Line(start, end)
255 |
256 |
257 | def make_polygon(v, filled=True):
258 | if filled: return FilledPolygon(v)
259 | else: return PolyLine(v, True)
260 |
261 | def make_polyline(v):
262 | return PolyLine(v, False)
263 |
264 | def make_capsule(length, width):
265 | l, r, t, b = 0, length, width/2, -width/2
266 | box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
267 | circ0 = make_circle(width/2)
268 | circ1 = make_circle(width/2)
269 | circ1.add_attr(Transform(translation=(length, 0)))
270 | geom = Compound([box, circ0, circ1])
271 | return geom
272 |
273 | class Compound(Geom):
274 | def __init__(self, gs):
275 | Geom.__init__(self)
276 | self.gs = gs
277 | for g in self.gs:
278 | g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
279 | def render1(self):
280 | for g in self.gs:
281 | g.render()
282 |
283 | class PolyLine(Geom):
284 | def __init__(self, v, close):
285 | Geom.__init__(self)
286 | self.v = v
287 | self.close = close
288 | self.linewidth = LineWidth(1)
289 | self.add_attr(self.linewidth)
290 | def render1(self):
291 | glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
292 | for p in self.v:
293 | glVertex3f(p[0], p[1],0) # draw each vertex
294 | glEnd()
295 | def set_linewidth(self, x):
296 | self.linewidth.stroke = x
297 |
298 | class Line(Geom):
299 | def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
300 | Geom.__init__(self)
301 | self.start = start
302 | self.end = end
303 | self.linewidth = LineWidth(1)
304 | self.add_attr(self.linewidth)
305 |
306 | def render1(self):
307 | glBegin(GL_LINES)
308 | glVertex2f(*self.start)
309 | glVertex2f(*self.end)
310 | glEnd()
311 |
312 | class Image(Geom):
313 | def __init__(self, fname, width, height):
314 | Geom.__init__(self)
315 | self.width = width
316 | self.height = height
317 | img = pyglet.image.load(fname)
318 | self.img = img
319 | self.flip = False
320 | def render1(self):
321 | self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
322 |
323 | # ================================================================
324 |
325 | class SimpleImageViewer(object):
326 | def __init__(self, display=None):
327 | self.window = None
328 | self.isopen = False
329 | self.display = display
330 | def imshow(self, arr):
331 | if self.window is None:
332 | height, width, channels = arr.shape
333 | self.window = pyglet.window.Window(width=width, height=height, display=self.display)
334 | self.width = width
335 | self.height = height
336 | self.isopen = True
337 | assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape"
338 | image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3)
339 | self.window.clear()
340 | self.window.switch_to()
341 | self.window.dispatch_events()
342 | image.blit(0,0)
343 | self.window.flip()
344 | def close(self):
345 | if self.isopen:
346 | self.window.close()
347 | self.isopen = False
348 | def __del__(self):
349 | self.close()
--------------------------------------------------------------------------------
/multiagent/scenario.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # defines scenario upon which the world is built
4 | class BaseScenario(object):
5 | # create elements of the world
6 | def make_world(self):
7 | raise NotImplementedError()
8 | # create initial conditions of the world
9 | def reset_world(self, world):
10 | raise NotImplementedError()
11 |
--------------------------------------------------------------------------------
/multiagent/scenarios/__init__.py:
--------------------------------------------------------------------------------
1 | import imp
2 | import os.path as osp
3 |
4 |
5 | def load(name):
6 | pathname = osp.join(osp.dirname(__file__), name)
7 | return imp.load_source('', pathname)
8 |
--------------------------------------------------------------------------------
/multiagent/scenarios/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/scenarios/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/multiagent/scenarios/__pycache__/simple_adversary.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/scenarios/__pycache__/simple_adversary.cpython-36.pyc
--------------------------------------------------------------------------------
/multiagent/scenarios/__pycache__/simple_collaborative_comm.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/multiagent/scenarios/__pycache__/simple_collaborative_comm.cpython-36.pyc
--------------------------------------------------------------------------------
/multiagent/scenarios/simple.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 | class Scenario(BaseScenario):
5 | def make_world(self):
6 | world = World()
7 | # add agents
8 | world.agents = [Agent() for i in range(1)]
9 | for i, agent in enumerate(world.agents):
10 | agent.name = 'agent %d' % i
11 | agent.collide = False
12 | agent.silent = True
13 | # add landmarks
14 | world.landmarks = [Landmark() for i in range(1)]
15 | for i, landmark in enumerate(world.landmarks):
16 | landmark.name = 'landmark %d' % i
17 | landmark.collide = False
18 | landmark.movable = False
19 | # make initial conditions
20 | self.reset_world(world)
21 | return world
22 |
23 | def reset_world(self, world):
24 | # random properties for agents
25 | for i, agent in enumerate(world.agents):
26 | agent.color = np.array([0.25,0.25,0.25])
27 | # random properties for landmarks
28 | for i, landmark in enumerate(world.landmarks):
29 | landmark.color = np.array([0.75,0.75,0.75])
30 | world.landmarks[0].color = np.array([0.75,0.25,0.25])
31 | # set random initial states
32 | for agent in world.agents:
33 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
34 | agent.state.p_vel = np.zeros(world.dim_p)
35 | agent.state.c = np.zeros(world.dim_c)
36 | for i, landmark in enumerate(world.landmarks):
37 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
38 | landmark.state.p_vel = np.zeros(world.dim_p)
39 |
40 | def reward(self, agent, world):
41 | dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
42 | return -dist2
43 |
44 | def observation(self, agent, world):
45 | # get positions of all entities in this agent's reference frame
46 | entity_pos = []
47 | for entity in world.landmarks:
48 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
49 | return np.concatenate([agent.state.p_vel] + entity_pos)
50 |
--------------------------------------------------------------------------------
/multiagent/scenarios/simple_adversary.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 |
8 | def make_world(self):
9 | world = World()
10 | # set any world properties first
11 | world.dim_c = 2
12 | num_agents = 3
13 | world.num_agents = num_agents
14 | num_adversaries = 1
15 | num_landmarks = num_agents - 1
16 | # add agents
17 | world.agents = [Agent() for i in range(num_agents)]
18 | for i, agent in enumerate(world.agents):
19 | agent.name = 'agent %d' % i
20 | agent.collide = False
21 | agent.silent = True
22 | agent.adversary = True if i < num_adversaries else False
23 | agent.size = 0.15
24 | # add landmarks
25 | world.landmarks = [Landmark() for i in range(num_landmarks)]
26 | for i, landmark in enumerate(world.landmarks):
27 | landmark.name = 'landmark %d' % i
28 | landmark.collide = False
29 | landmark.movable = False
30 | landmark.size = 0.08
31 | # make initial conditions
32 | self.reset_world(world)
33 | return world
34 |
35 | def reset_world(self, world):
36 | # random properties for agents
37 | world.agents[0].color = np.array([0.85, 0.35, 0.35])
38 | for i in range(1, world.num_agents):
39 | world.agents[i].color = np.array([0.35, 0.35, 0.85])
40 | # random properties for landmarks
41 | for i, landmark in enumerate(world.landmarks):
42 | landmark.color = np.array([0.15, 0.15, 0.15])
43 | # set goal landmark
44 | goal = np.random.choice(world.landmarks)
45 | goal.color = np.array([0.15, 0.65, 0.15])
46 | for agent in world.agents:
47 | agent.goal_a = goal
48 | # set random initial states
49 | for agent in world.agents:
50 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
51 | agent.state.p_vel = np.zeros(world.dim_p)
52 | agent.state.c = np.zeros(world.dim_c)
53 | for i, landmark in enumerate(world.landmarks):
54 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
55 | landmark.state.p_vel = np.zeros(world.dim_p)
56 |
57 | def benchmark_data(self, agent, world):
58 | # returns data for benchmarking purposes
59 | if agent.adversary:
60 | return np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
61 | else:
62 | dists = []
63 | for l in world.landmarks:
64 | dists.append(np.sum(np.square(agent.state.p_pos - l.state.p_pos)))
65 | dists.append(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
66 | return tuple(dists)
67 |
68 | # return all agents that are not adversaries
69 | def good_agents(self, world):
70 | return [agent for agent in world.agents if not agent.adversary]
71 |
72 | # return all adversarial agents
73 | def adversaries(self, world):
74 | return [agent for agent in world.agents if agent.adversary]
75 |
76 | def reward(self, agent, world):
77 | # Agents are rewarded based on minimum agent distance to each landmark
78 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
79 |
80 | def agent_reward(self, agent, world):
81 | # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
82 | shaped_reward = True
83 | shaped_adv_reward = True
84 |
85 | # Calculate negative reward for adversary
86 | adversary_agents = self.adversaries(world)
87 | if shaped_adv_reward: # distance-based adversary reward
88 | adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
89 | else: # proximity-based adversary reward (binary)
90 | adv_rew = 0
91 | for a in adversary_agents:
92 | if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
93 | adv_rew -= 5
94 |
95 | # Calculate positive reward for agents
96 | good_agents = self.good_agents(world)
97 | if shaped_reward: # distance-based agent reward
98 | pos_rew = -min(
99 | [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
100 | else: # proximity-based agent reward (binary)
101 | pos_rew = 0
102 | if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \
103 | < 2 * agent.goal_a.size:
104 | pos_rew += 5
105 | pos_rew -= min(
106 | [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
107 | return pos_rew + adv_rew
108 |
109 | def adversary_reward(self, agent, world):
110 | # Rewarded based on proximity to the goal landmark
111 | shaped_reward = True
112 | if shaped_reward: # distance-based reward
113 | return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
114 | else: # proximity-based reward (binary)
115 | adv_rew = 0
116 | if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size:
117 | adv_rew += 5
118 | return adv_rew
119 |
120 |
121 | def observation(self, agent, world):
122 | # get positions of all entities in this agent's reference frame
123 | entity_pos = []
124 | for entity in world.landmarks:
125 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
126 | # entity colors
127 | entity_color = []
128 | for entity in world.landmarks:
129 | entity_color.append(entity.color)
130 | # communication of all other agents
131 | other_pos = []
132 | for other in world.agents:
133 | if other is agent: continue
134 | other_pos.append(other.state.p_pos - agent.state.p_pos)
135 |
136 | if not agent.adversary:
137 | return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos)
138 | else:
139 | return np.concatenate(entity_pos + other_pos)
140 |
--------------------------------------------------------------------------------
/multiagent/scenarios/simple_collaborative_comm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core_uav import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 10
10 | num_uav = 3
11 | world.collaborative = True # whether agents share rewards
12 | # add agents
13 | world.agents = [Agent() for i in range(num_uav)]
14 | for i, agent in enumerate(world.agents):
15 | agent.name = 'agent %d' % i
16 | agent.collide = True
17 | # add landmarks
18 | world.landmarks = [Landmark() for i in range(world.dim_c)]
19 | for i, landmark in enumerate(world.landmarks):
20 | landmark.name = 'landmark %d' % i
21 | landmark.collide = False
22 | landmark.movable = True
23 |
24 | # make initial conditions
25 | self.reset_world(world)
26 | return world
27 |
28 | def reset_world(self, world):
29 | # set color for landmarks
30 | # world.landmarks[0].color = np.array([0.25, 0.25, 0.75])
31 | # world.landmarks[1].color = np.array([0.25, 0.25, 0.75])
32 | # world.landmarks[2].color = np.array([0.25, 0.25, 0.75])
33 | # world.landmarks[3].color = np.array([0.25, 0.25, 0.75])
34 | # world.landmarks[4].color = np.array([0.25, 0.25, 0.75])
35 |
36 | # set random initial states
37 | # for i, agent in enumerate(world.agents):
38 | # agent.color = np.array([0.35, 0.35, 0.85])
39 | # # random properties for landmarks
40 | # for i, landmark in enumerate(world.landmarks):
41 | # landmark.color = np.array([0.25, 0.25, 0.25])
42 | # set random initial states
43 | for i, agent in enumerate(world.agents):
44 | agent.color = np.array([[0.35, 0.35, 0.85], [0.25, 0.75, 0.25], [0.5, 0.15, 0.35]])[i]
45 | # agent.state.p_pos = np.array([[0.35, 0.35], [-0.35, -0.35]])[i]
46 | agent.state.p_pos = np.array([[0., 0.5], [-0.43, -0.25], [0.43, -0.25]])[i]
47 | # agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
48 | agent.state.p_vel = np.zeros(world.dim_p)
49 | agent.state.c = np.zeros(world.dim_c)
50 | agent.state.assign = np.zeros(world.dim_c)
51 | agent.state.SNRforUser = np.zeros(world.dim_c)
52 |
53 | for i, landmark in enumerate(world.landmarks):
54 | # landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
55 |
56 | # landmark.state.p_pos = np.array([[-0.8154612 , -0.24502182], [0.89699076, -0.14581629],
57 | # [-0.324932, 0.60295496], [0.62360916, -0.52245444],
58 | # [0.65862443, 0.23426809]])[i]
59 |
60 | landmark.state.p_pos = np.array([[-0.8154612, -0.24502182], [0.89699076, -0.14581629],
61 | [-0.3249325, 0.60295496], [0.62360916, -0.52245444],
62 | [0.65862443, 0.23426809], [0.29590076, 0.54571629],
63 | [-0.89699076, -0.74682629], [0.84629571, -0.98582649],
64 | [-0.89699076, 0.14581629], [-0.14699076, -0.75584629]])[i]
65 |
66 | # landmark.state.p_pos = np.array([[0.89699076, -0.14581629], [-0.8154612, -0.24502182],
67 | # [0.65862443, 0.23426809], [-0.3249325, 0.60295496],
68 | # [0.62360916, -0.52245444], [0.29590076, 0.54571629],
69 | # [-0.89699076, -0.74682629], [-0.89699076, 0.14581629],
70 | # [0.84629571, -0.98582649], [-0.14699076, -0.75584629]])[i]
71 |
72 | landmark.state.p_vel = np.zeros(world.dim_p)
73 | # landmark.state.p_vel = np.random.uniform(-1, +1, world.dim_p)
74 | landmark.color = np.array([0.25, 0.25, 0.25])
75 | landmark.assign_index = None
76 | landmark.switch_cout = 0
77 |
78 | def benchmark_data(self, agent, world):
79 | rew = 0
80 | collisions = 0
81 | occupied_landmarks = 0
82 | min_dists = 0
83 | for l in world.landmarks:
84 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
85 | min_dists += min(dists)
86 | rew -= min(dists)
87 | if min(dists) < 0.1:
88 | occupied_landmarks += 1
89 | if agent.collide:
90 | for a in world.agents:
91 | if self.is_collision(a, agent):
92 | rew -= 1
93 | collisions += 1
94 | return rew, collisions, min_dists, occupied_landmarks
95 |
96 | def is_collision(self, agent1, agent2):
97 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
98 | dist = np.sqrt(np.sum(np.square(delta_pos)))
99 | dist_min = agent1.size + agent2.size
100 | return True if dist < dist_min else False
101 |
102 | def reward(self, agent, world):
103 | p = 0.7
104 | try:
105 |
106 | # reward = 10*np.log(p*np.min(world.Rate[np.nonzero(world.Rate)])+(1-p)*np.average(world.Rate[np.nonzero(world.Rate)]))
107 | # reward = 10 * np.log(np.min(world.Rate[np.nonzero(world.Rate)]))
108 |
109 | # 下面的方式寻找最小信息率的用户比较保险,避免当所有的rate均为零时,上一种方式会出错
110 | min_rate = None
111 | for i in range(world.dim_c):
112 | min_rate = np.max(world.Rate[:,i]) if min_rate is None else min(min_rate, np.max(world.Rate[:,i]))
113 | reward = 10*np.log(min_rate+1e-9)
114 |
115 | # reward = np.log(np.min(agent.state.UAV_Rate[np.nonzero(agent.state.UAV_Rate)]))
116 | # reward = np.log(np.sum(world.Rate))
117 | except ValueError:
118 | reward = 0
119 |
120 | # reward = np.log(np.sum(world.Rate))
121 |
122 | for i in range(world.dim_c):
123 | if np.max(world.SNR[:, i]) < 15:
124 | reward -= 10
125 | #print('SNR not saftistied')
126 |
127 | for agent in world.agents:
128 | if not (-1 <= agent.state.p_pos[0] <= 1 and -1 <= agent.state.p_pos[1] <= 1):
129 | reward -= 100
130 | # print(entity.name)
131 | # print(entity.state.p_pos)
132 | # raise Exception('超出边界')
133 |
134 | # # 用户的切换次数作为负奖励
135 | # for landmark in world.landmarks:
136 | # reward -= landmark.switch_cout
137 | # landmark.switch_cout = 0
138 |
139 | if agent.collide:
140 | for a in world.agents:
141 | if a.name is not agent.name:
142 | if self.is_collision(a, agent):
143 | reward -= 100
144 |
145 | return reward
146 |
147 | def observation(self, agent, world):
148 | # get positions of all entities in this agent's reference frame
149 | entity_pos = []
150 | for entity in world.landmarks: # world.entities:
151 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
152 | # entity colors
153 | entity_color = []
154 | for entity in world.landmarks: # world.entities:
155 | entity_color.append(entity.color)
156 | # communication of all other agents
157 | other_comm = []
158 | other_pos = []
159 | assign = []
160 | SNRforUser = []
161 | for other in world.agents:
162 | SNRforUser.append(other.state.SNRforUser)
163 | assign.append(other.state.assign)
164 | if other is agent: continue
165 | other_comm.append(other.state.c)
166 | other_pos.append(other.state.p_pos - agent.state.p_pos)
167 |
168 | maxSNR = np.max(SNRforUser)
169 | normalized_SNR = SNRforUser.copy()
170 | for i, item in enumerate(SNRforUser):
171 | if maxSNR != 0: normalized_SNR[i] = item / maxSNR
172 | # return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_comm) # original code
173 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + assign + normalized_SNR)
--------------------------------------------------------------------------------
/multiagent/scenarios/simple_crypto.py:
--------------------------------------------------------------------------------
1 | """
2 | Scenario:
3 | 1 speaker, 2 listeners (one of which is an adversary). Good agents rewarded for proximity to goal, and distance from
4 | adversary to goal. Adversary is rewarded for its distance to the goal.
5 | """
6 |
7 |
8 | import numpy as np
9 | from multiagent.core import World, Agent, Landmark
10 | from multiagent.scenario import BaseScenario
11 | import random
12 |
13 |
14 | class CryptoAgent(Agent):
15 | def __init__(self):
16 | super(CryptoAgent, self).__init__()
17 | self.key = None
18 |
19 | class Scenario(BaseScenario):
20 |
21 | def make_world(self):
22 | world = World()
23 | # set any world properties first
24 | num_agents = 3
25 | num_adversaries = 1
26 | num_landmarks = 2
27 | world.dim_c = 4
28 | # add agents
29 | world.agents = [CryptoAgent() for i in range(num_agents)]
30 | for i, agent in enumerate(world.agents):
31 | agent.name = 'agent %d' % i
32 | agent.collide = False
33 | agent.adversary = True if i < num_adversaries else False
34 | agent.speaker = True if i == 2 else False
35 | agent.movable = False
36 | # add landmarks
37 | world.landmarks = [Landmark() for i in range(num_landmarks)]
38 | for i, landmark in enumerate(world.landmarks):
39 | landmark.name = 'landmark %d' % i
40 | landmark.collide = False
41 | landmark.movable = False
42 | # make initial conditions
43 | self.reset_world(world)
44 | return world
45 |
46 |
47 | def reset_world(self, world):
48 | # random properties for agents
49 | for i, agent in enumerate(world.agents):
50 | agent.color = np.array([0.25, 0.25, 0.25])
51 | if agent.adversary:
52 | agent.color = np.array([0.75, 0.25, 0.25])
53 | agent.key = None
54 | # random properties for landmarks
55 | color_list = [np.zeros(world.dim_c) for i in world.landmarks]
56 | for i, color in enumerate(color_list):
57 | color[i] += 1
58 | for color, landmark in zip(color_list, world.landmarks):
59 | landmark.color = color
60 | # set goal landmark
61 | goal = np.random.choice(world.landmarks)
62 | world.agents[1].color = goal.color
63 | world.agents[2].key = np.random.choice(world.landmarks).color
64 |
65 | for agent in world.agents:
66 | agent.goal_a = goal
67 |
68 | # set random initial states
69 | for agent in world.agents:
70 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
71 | agent.state.p_vel = np.zeros(world.dim_p)
72 | agent.state.c = np.zeros(world.dim_c)
73 | for i, landmark in enumerate(world.landmarks):
74 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
75 | landmark.state.p_vel = np.zeros(world.dim_p)
76 |
77 |
78 | def benchmark_data(self, agent, world):
79 | # returns data for benchmarking purposes
80 | return (agent.state.c, agent.goal_a.color)
81 |
82 | # return all agents that are not adversaries
83 | def good_listeners(self, world):
84 | return [agent for agent in world.agents if not agent.adversary and not agent.speaker]
85 |
86 | # return all agents that are not adversaries
87 | def good_agents(self, world):
88 | return [agent for agent in world.agents if not agent.adversary]
89 |
90 | # return all adversarial agents
91 | def adversaries(self, world):
92 | return [agent for agent in world.agents if agent.adversary]
93 |
94 | def reward(self, agent, world):
95 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
96 |
97 | def agent_reward(self, agent, world):
98 | # Agents rewarded if Bob can reconstruct message, but adversary (Eve) cannot
99 | good_listeners = self.good_listeners(world)
100 | adversaries = self.adversaries(world)
101 | good_rew = 0
102 | adv_rew = 0
103 | for a in good_listeners:
104 | if (a.state.c == np.zeros(world.dim_c)).all():
105 | continue
106 | else:
107 | good_rew -= np.sum(np.square(a.state.c - agent.goal_a.color))
108 | for a in adversaries:
109 | if (a.state.c == np.zeros(world.dim_c)).all():
110 | continue
111 | else:
112 | adv_l1 = np.sum(np.square(a.state.c - agent.goal_a.color))
113 | adv_rew += adv_l1
114 | return adv_rew + good_rew
115 |
116 | def adversary_reward(self, agent, world):
117 | # Adversary (Eve) is rewarded if it can reconstruct original goal
118 | rew = 0
119 | if not (agent.state.c == np.zeros(world.dim_c)).all():
120 | rew -= np.sum(np.square(agent.state.c - agent.goal_a.color))
121 | return rew
122 |
123 |
124 | def observation(self, agent, world):
125 | # goal color
126 | goal_color = np.zeros(world.dim_color)
127 | if agent.goal_a is not None:
128 | goal_color = agent.goal_a.color
129 |
130 | # get positions of all entities in this agent's reference frame
131 | entity_pos = []
132 | for entity in world.landmarks:
133 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
134 | # communication of all other agents
135 | comm = []
136 | for other in world.agents:
137 | if other is agent or (other.state.c is None) or not other.speaker: continue
138 | comm.append(other.state.c)
139 |
140 | confer = np.array([0])
141 |
142 | if world.agents[2].key is None:
143 | confer = np.array([1])
144 | key = np.zeros(world.dim_c)
145 | goal_color = np.zeros(world.dim_c)
146 | else:
147 | key = world.agents[2].key
148 |
149 | prnt = False
150 | # speaker
151 | if agent.speaker:
152 | if prnt:
153 | print('speaker')
154 | print(agent.state.c)
155 | print(np.concatenate([goal_color] + [key] + [confer] + [np.random.randn(1)]))
156 | return np.concatenate([goal_color] + [key])
157 | # listener
158 | if not agent.speaker and not agent.adversary:
159 | if prnt:
160 | print('listener')
161 | print(agent.state.c)
162 | print(np.concatenate([key] + comm + [confer]))
163 | return np.concatenate([key] + comm)
164 | if not agent.speaker and agent.adversary:
165 | if prnt:
166 | print('adversary')
167 | print(agent.state.c)
168 | print(np.concatenate(comm + [confer]))
169 | return np.concatenate(comm)
170 |
--------------------------------------------------------------------------------
/multiagent/scenarios/simple_push.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 2
10 | num_agents = 2
11 | num_adversaries = 1
12 | num_landmarks = 2
13 | # add agents
14 | world.agents = [Agent() for i in range(num_agents)]
15 | for i, agent in enumerate(world.agents):
16 | agent.name = 'agent %d' % i
17 | agent.collide = True
18 | agent.silent = True
19 | if i < num_adversaries:
20 | agent.adversary = True
21 | else:
22 | agent.adversary = False
23 | # add landmarks
24 | world.landmarks = [Landmark() for i in range(num_landmarks)]
25 | for i, landmark in enumerate(world.landmarks):
26 | landmark.name = 'landmark %d' % i
27 | landmark.collide = False
28 | landmark.movable = False
29 | # make initial conditions
30 | self.reset_world(world)
31 | return world
32 |
33 | def reset_world(self, world):
34 | # random properties for landmarks
35 | for i, landmark in enumerate(world.landmarks):
36 | landmark.color = np.array([0.1, 0.1, 0.1])
37 | landmark.color[i + 1] += 0.8
38 | landmark.index = i
39 | # set goal landmark
40 | goal = np.random.choice(world.landmarks)
41 | for i, agent in enumerate(world.agents):
42 | agent.goal_a = goal
43 | agent.color = np.array([0.25, 0.25, 0.25])
44 | if agent.adversary:
45 | agent.color = np.array([0.75, 0.25, 0.25])
46 | else:
47 | j = goal.index
48 | agent.color[j + 1] += 0.5
49 | # set random initial states
50 | for agent in world.agents:
51 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
52 | agent.state.p_vel = np.zeros(world.dim_p)
53 | agent.state.c = np.zeros(world.dim_c)
54 | for i, landmark in enumerate(world.landmarks):
55 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
56 | landmark.state.p_vel = np.zeros(world.dim_p)
57 |
58 | def reward(self, agent, world):
59 | # Agents are rewarded based on minimum agent distance to each landmark
60 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
61 |
62 | def agent_reward(self, agent, world):
63 | # the distance to the goal
64 | return -np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
65 |
66 | def adversary_reward(self, agent, world):
67 | # keep the nearest good agents away from the goal
68 | agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary]
69 | pos_rew = min(agent_dist)
70 | #nearest_agent = world.good_agents[np.argmin(agent_dist)]
71 | #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos)))
72 | neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos)))
73 | #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents])
74 | return pos_rew - neg_rew
75 |
76 | def observation(self, agent, world):
77 | # get positions of all entities in this agent's reference frame
78 | entity_pos = []
79 | for entity in world.landmarks: # world.entities:
80 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
81 | # entity colors
82 | entity_color = []
83 | for entity in world.landmarks: # world.entities:
84 | entity_color.append(entity.color)
85 | # communication of all other agents
86 | comm = []
87 | other_pos = []
88 | for other in world.agents:
89 | if other is agent: continue
90 | comm.append(other.state.c)
91 | other_pos.append(other.state.p_pos - agent.state.p_pos)
92 | if not agent.adversary:
93 | return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos)
94 | else:
95 | #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos # randomize position of other agents in adversary network
96 | return np.concatenate([agent.state.p_vel] + entity_pos + other_pos)
97 |
--------------------------------------------------------------------------------
/multiagent/scenarios/simple_reference.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 10
10 | world.collaborative = True # whether agents share rewards
11 | # add agents
12 | world.agents = [Agent() for i in range(2)]
13 | for i, agent in enumerate(world.agents):
14 | agent.name = 'agent %d' % i
15 | agent.collide = False
16 | # add landmarks
17 | world.landmarks = [Landmark() for i in range(3)]
18 | for i, landmark in enumerate(world.landmarks):
19 | landmark.name = 'landmark %d' % i
20 | landmark.collide = False
21 | landmark.movable = False
22 | # make initial conditions
23 | self.reset_world(world)
24 | return world
25 |
26 | def reset_world(self, world):
27 | # assign goals to agents
28 | for agent in world.agents:
29 | agent.goal_a = None
30 | agent.goal_b = None
31 | # want other agent to go to the goal landmark
32 | world.agents[0].goal_a = world.agents[1]
33 | world.agents[0].goal_b = np.random.choice(world.landmarks)
34 | world.agents[1].goal_a = world.agents[0]
35 | world.agents[1].goal_b = np.random.choice(world.landmarks)
36 | # random properties for agents
37 | for i, agent in enumerate(world.agents):
38 | agent.color = np.array([0.25,0.25,0.25])
39 | # random properties for landmarks
40 | world.landmarks[0].color = np.array([0.75,0.25,0.25])
41 | world.landmarks[1].color = np.array([0.25,0.75,0.25])
42 | world.landmarks[2].color = np.array([0.25,0.25,0.75])
43 | # special colors for goals
44 | world.agents[0].goal_a.color = world.agents[0].goal_b.color
45 | world.agents[1].goal_a.color = world.agents[1].goal_b.color
46 | # set random initial states
47 | for agent in world.agents:
48 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
49 | agent.state.p_vel = np.zeros(world.dim_p)
50 | agent.state.c = np.zeros(world.dim_c)
51 | for i, landmark in enumerate(world.landmarks):
52 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
53 | landmark.state.p_vel = np.zeros(world.dim_p)
54 |
55 | def reward(self, agent, world):
56 | if agent.goal_a is None or agent.goal_b is None:
57 | return 0.0
58 | dist2 = np.sum(np.square(agent.goal_a.state.p_pos - agent.goal_b.state.p_pos))
59 | return -dist2
60 |
61 | def observation(self, agent, world):
62 | # goal color
63 | goal_color = [np.zeros(world.dim_color), np.zeros(world.dim_color)]
64 | if agent.goal_b is not None:
65 | goal_color[1] = agent.goal_b.color
66 |
67 | # get positions of all entities in this agent's reference frame
68 | entity_pos = []
69 | for entity in world.landmarks:
70 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
71 | # entity colors
72 | entity_color = []
73 | for entity in world.landmarks:
74 | entity_color.append(entity.color)
75 | # communication of all other agents
76 | comm = []
77 | for other in world.agents:
78 | if other is agent: continue
79 | comm.append(other.state.c)
80 | return np.concatenate([agent.state.p_vel] + entity_pos + [goal_color[1]] + comm)
81 |
--------------------------------------------------------------------------------
/multiagent/scenarios/simple_speaker_listener.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 3
10 | num_landmarks = 3
11 | world.collaborative = True
12 | # add agents
13 | world.agents = [Agent() for i in range(2)]
14 | for i, agent in enumerate(world.agents):
15 | agent.name = 'agent %d' % i
16 | agent.collide = False
17 | agent.size = 0.075
18 | # speaker
19 | world.agents[0].movable = False
20 | # listener
21 | world.agents[1].silent = True
22 | # add landmarks
23 | world.landmarks = [Landmark() for i in range(num_landmarks)]
24 | for i, landmark in enumerate(world.landmarks):
25 | landmark.name = 'landmark %d' % i
26 | landmark.collide = False
27 | landmark.movable = False
28 | landmark.size = 0.04
29 | # make initial conditions
30 | self.reset_world(world)
31 | return world
32 |
33 | def reset_world(self, world):
34 | # assign goals to agents
35 | for agent in world.agents:
36 | agent.goal_a = None
37 | agent.goal_b = None
38 | # want listener to go to the goal landmark
39 | world.agents[0].goal_a = world.agents[1]
40 | world.agents[0].goal_b = np.random.choice(world.landmarks)
41 | # random properties for agents
42 | for i, agent in enumerate(world.agents):
43 | agent.color = np.array([0.25,0.25,0.25])
44 | # random properties for landmarks
45 | world.landmarks[0].color = np.array([0.65,0.15,0.15])
46 | world.landmarks[1].color = np.array([0.15,0.65,0.15])
47 | world.landmarks[2].color = np.array([0.15,0.15,0.65])
48 | # special colors for goals
49 | world.agents[0].goal_a.color = world.agents[0].goal_b.color + np.array([0.45, 0.45, 0.45])
50 | # set random initial states
51 | for agent in world.agents:
52 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
53 | agent.state.p_vel = np.zeros(world.dim_p)
54 | agent.state.c = np.zeros(world.dim_c)
55 | for i, landmark in enumerate(world.landmarks):
56 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
57 | landmark.state.p_vel = np.zeros(world.dim_p)
58 |
59 | def benchmark_data(self, agent, world):
60 | # returns data for benchmarking purposes
61 | return self.reward(agent, reward)
62 |
63 | def reward(self, agent, world):
64 | # squared distance from listener to landmark
65 | a = world.agents[0]
66 | dist2 = np.sum(np.square(a.goal_a.state.p_pos - a.goal_b.state.p_pos))
67 | return -dist2
68 |
69 | def observation(self, agent, world):
70 | # goal color
71 | goal_color = np.zeros(world.dim_color)
72 | if agent.goal_b is not None:
73 | goal_color = agent.goal_b.color
74 |
75 | # get positions of all entities in this agent's reference frame
76 | entity_pos = []
77 | for entity in world.landmarks:
78 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
79 |
80 | # communication of all other agents
81 | comm = []
82 | for other in world.agents:
83 | if other is agent or (other.state.c is None): continue
84 | comm.append(other.state.c)
85 |
86 | # speaker
87 | if not agent.movable:
88 | return np.concatenate([goal_color])
89 | # listener
90 | if agent.silent:
91 | return np.concatenate([agent.state.p_vel] + entity_pos + comm)
92 |
93 |
--------------------------------------------------------------------------------
/multiagent/scenarios/simple_spread.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 | def make_world(self):
8 | world = World()
9 | # set any world properties first
10 | world.dim_c = 2
11 | num_agents = 2
12 | num_landmarks = 3
13 | world.collaborative = True
14 | # add agents
15 | world.agents = [Agent() for i in range(num_agents)]
16 | for i, agent in enumerate(world.agents):
17 | agent.name = 'agent %d' % i
18 | agent.collide = True
19 | agent.silent = True
20 | agent.size = 0.15
21 | # add landmarks
22 | world.landmarks = [Landmark() for i in range(num_landmarks)]
23 | for i, landmark in enumerate(world.landmarks):
24 | landmark.name = 'landmark %d' % i
25 | landmark.collide = False
26 | landmark.movable = False
27 | # make initial conditions
28 | self.reset_world(world)
29 | return world
30 |
31 | def reset_world(self, world):
32 | # random properties for agents
33 | for i, agent in enumerate(world.agents):
34 | agent.color = np.array([0.35, 0.35, 0.85])
35 | # random properties for landmarks
36 | for i, landmark in enumerate(world.landmarks):
37 | landmark.color = np.array([0.25, 0.25, 0.25])
38 | # set random initial states
39 | for agent in world.agents:
40 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
41 | agent.state.p_vel = np.zeros(world.dim_p)
42 | agent.state.c = np.zeros(world.dim_c)
43 | for i, landmark in enumerate(world.landmarks):
44 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
45 | landmark.state.p_vel = np.zeros(world.dim_p)
46 |
47 | def benchmark_data(self, agent, world):
48 | rew = 0
49 | collisions = 0
50 | occupied_landmarks = 0
51 | min_dists = 0
52 | for l in world.landmarks:
53 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
54 | min_dists += min(dists)
55 | rew -= min(dists)
56 | if min(dists) < 0.1:
57 | occupied_landmarks += 1
58 | if agent.collide:
59 | for a in world.agents:
60 | if self.is_collision(a, agent):
61 | rew -= 1
62 | collisions += 1
63 | return (rew, collisions, min_dists, occupied_landmarks)
64 |
65 |
66 | def is_collision(self, agent1, agent2):
67 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
68 | dist = np.sqrt(np.sum(np.square(delta_pos)))
69 | dist_min = agent1.size + agent2.size
70 | return True if dist < dist_min else False
71 |
72 | def reward(self, agent, world):
73 | # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
74 | rew = 0
75 | for l in world.landmarks:
76 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
77 | rew -= min(dists)
78 | if agent.collide:
79 | for a in world.agents:
80 | if self.is_collision(a, agent):
81 | rew -= 1
82 | return rew
83 |
84 | def observation(self, agent, world):
85 | # get positions of all entities in this agent's reference frame
86 | entity_pos = []
87 | for entity in world.landmarks: # world.entities:
88 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
89 | # entity colors
90 | entity_color = []
91 | for entity in world.landmarks: # world.entities:
92 | entity_color.append(entity.color)
93 | # communication of all other agents
94 | comm = []
95 | other_pos = []
96 | for other in world.agents:
97 | if other is agent: continue
98 | comm.append(other.state.c)
99 | other_pos.append(other.state.p_pos - agent.state.p_pos)
100 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm)
101 |
--------------------------------------------------------------------------------
/multiagent/scenarios/simple_tag.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 | def make_world(self):
8 | world = World()
9 | # set any world properties first
10 | world.dim_c = 2
11 | num_good_agents = 1
12 | num_adversaries = 3
13 | num_agents = num_adversaries + num_good_agents
14 | num_landmarks = 2
15 | # add agents
16 | world.agents = [Agent() for i in range(num_agents)]
17 | for i, agent in enumerate(world.agents):
18 | agent.name = 'agent %d' % i
19 | agent.collide = True
20 | agent.silent = True
21 | agent.adversary = True if i < num_adversaries else False
22 | agent.size = 0.075 if agent.adversary else 0.05
23 | agent.accel = 3.0 if agent.adversary else 4.0
24 | #agent.accel = 20.0 if agent.adversary else 25.0
25 | agent.max_speed = 1.0 if agent.adversary else 1.3
26 | # add landmarks
27 | world.landmarks = [Landmark() for i in range(num_landmarks)]
28 | for i, landmark in enumerate(world.landmarks):
29 | landmark.name = 'landmark %d' % i
30 | landmark.collide = True
31 | landmark.movable = False
32 | landmark.size = 0.2
33 | landmark.boundary = False
34 | # make initial conditions
35 | self.reset_world(world)
36 | return world
37 |
38 |
39 | def reset_world(self, world):
40 | # random properties for agents
41 | for i, agent in enumerate(world.agents):
42 | agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35])
43 | # random properties for landmarks
44 | for i, landmark in enumerate(world.landmarks):
45 | landmark.color = np.array([0.25, 0.25, 0.25])
46 | # set random initial states
47 | for agent in world.agents:
48 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
49 | agent.state.p_vel = np.zeros(world.dim_p)
50 | agent.state.c = np.zeros(world.dim_c)
51 | for i, landmark in enumerate(world.landmarks):
52 | if not landmark.boundary:
53 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
54 | landmark.state.p_vel = np.zeros(world.dim_p)
55 |
56 |
57 | def benchmark_data(self, agent, world):
58 | # returns data for benchmarking purposes
59 | if agent.adversary:
60 | collisions = 0
61 | for a in self.good_agents(world):
62 | if self.is_collision(a, agent):
63 | collisions += 1
64 | return collisions
65 | else:
66 | return 0
67 |
68 |
69 | def is_collision(self, agent1, agent2):
70 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
71 | dist = np.sqrt(np.sum(np.square(delta_pos)))
72 | dist_min = agent1.size + agent2.size
73 | return True if dist < dist_min else False
74 |
75 | # return all agents that are not adversaries
76 | def good_agents(self, world):
77 | return [agent for agent in world.agents if not agent.adversary]
78 |
79 | # return all adversarial agents
80 | def adversaries(self, world):
81 | return [agent for agent in world.agents if agent.adversary]
82 |
83 |
84 | def reward(self, agent, world):
85 | # Agents are rewarded based on minimum agent distance to each landmark
86 | main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
87 | return main_reward
88 |
89 | def agent_reward(self, agent, world):
90 | # Agents are negatively rewarded if caught by adversaries
91 | rew = 0
92 | shape = False
93 | adversaries = self.adversaries(world)
94 | if shape: # reward can optionally be shaped (increased reward for increased distance from adversary)
95 | for adv in adversaries:
96 | rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
97 | if agent.collide:
98 | for a in adversaries:
99 | if self.is_collision(a, agent):
100 | rew -= 10
101 |
102 | # agents are penalized for exiting the screen, so that they can be caught by the adversaries
103 | def bound(x):
104 | if x < 0.9:
105 | return 0
106 | if x < 1.0:
107 | return (x - 0.9) * 10
108 | return min(np.exp(2 * x - 2), 10)
109 | for p in range(world.dim_p):
110 | x = abs(agent.state.p_pos[p])
111 | rew -= bound(x)
112 |
113 | return rew
114 |
115 | def adversary_reward(self, agent, world):
116 | # Adversaries are rewarded for collisions with agents
117 | rew = 0
118 | shape = False
119 | agents = self.good_agents(world)
120 | adversaries = self.adversaries(world)
121 | if shape: # reward can optionally be shaped (decreased reward for increased distance from agents)
122 | for adv in adversaries:
123 | rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents])
124 | if agent.collide:
125 | for ag in agents:
126 | for adv in adversaries:
127 | if self.is_collision(ag, adv):
128 | rew += 10
129 | return rew
130 |
131 | def observation(self, agent, world):
132 | # get positions of all entities in this agent's reference frame
133 | entity_pos = []
134 | for entity in world.landmarks:
135 | if not entity.boundary:
136 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
137 | # communication of all other agents
138 | comm = []
139 | other_pos = []
140 | other_vel = []
141 | for other in world.agents:
142 | if other is agent: continue
143 | comm.append(other.state.c)
144 | other_pos.append(other.state.p_pos - agent.state.p_pos)
145 | if not other.adversary:
146 | other_vel.append(other.state.p_vel)
147 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
148 |
--------------------------------------------------------------------------------
/multiagent/scenarios/simple_world_comm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 | def make_world(self):
8 | world = World()
9 | # set any world properties first
10 | world.dim_c = 4
11 | #world.damping = 1
12 | num_good_agents = 2
13 | num_adversaries = 4
14 | num_agents = num_adversaries + num_good_agents
15 | num_landmarks = 1
16 | num_food = 2
17 | num_forests = 2
18 | # add agents
19 | world.agents = [Agent() for i in range(num_agents)]
20 | for i, agent in enumerate(world.agents):
21 | agent.name = 'agent %d' % i
22 | agent.collide = True
23 | agent.leader = True if i == 0 else False
24 | agent.silent = True if i > 0 else False
25 | agent.adversary = True if i < num_adversaries else False
26 | agent.size = 0.075 if agent.adversary else 0.045
27 | agent.accel = 3.0 if agent.adversary else 4.0
28 | #agent.accel = 20.0 if agent.adversary else 25.0
29 | agent.max_speed = 1.0 if agent.adversary else 1.3
30 | # add landmarks
31 | world.landmarks = [Landmark() for i in range(num_landmarks)]
32 | for i, landmark in enumerate(world.landmarks):
33 | landmark.name = 'landmark %d' % i
34 | landmark.collide = True
35 | landmark.movable = False
36 | landmark.size = 0.2
37 | landmark.boundary = False
38 | world.food = [Landmark() for i in range(num_food)]
39 | for i, landmark in enumerate(world.food):
40 | landmark.name = 'food %d' % i
41 | landmark.collide = False
42 | landmark.movable = False
43 | landmark.size = 0.03
44 | landmark.boundary = False
45 | world.forests = [Landmark() for i in range(num_forests)]
46 | for i, landmark in enumerate(world.forests):
47 | landmark.name = 'forest %d' % i
48 | landmark.collide = False
49 | landmark.movable = False
50 | landmark.size = 0.3
51 | landmark.boundary = False
52 | world.landmarks += world.food
53 | world.landmarks += world.forests
54 | #world.landmarks += self.set_boundaries(world) # world boundaries now penalized with negative reward
55 | # make initial conditions
56 | self.reset_world(world)
57 | return world
58 |
59 | def set_boundaries(self, world):
60 | boundary_list = []
61 | landmark_size = 1
62 | edge = 1 + landmark_size
63 | num_landmarks = int(edge * 2 / landmark_size)
64 | for x_pos in [-edge, edge]:
65 | for i in range(num_landmarks):
66 | l = Landmark()
67 | l.state.p_pos = np.array([x_pos, -1 + i * landmark_size])
68 | boundary_list.append(l)
69 |
70 | for y_pos in [-edge, edge]:
71 | for i in range(num_landmarks):
72 | l = Landmark()
73 | l.state.p_pos = np.array([-1 + i * landmark_size, y_pos])
74 | boundary_list.append(l)
75 |
76 | for i, l in enumerate(boundary_list):
77 | l.name = 'boundary %d' % i
78 | l.collide = True
79 | l.movable = False
80 | l.boundary = True
81 | l.color = np.array([0.75, 0.75, 0.75])
82 | l.size = landmark_size
83 | l.state.p_vel = np.zeros(world.dim_p)
84 |
85 | return boundary_list
86 |
87 |
88 | def reset_world(self, world):
89 | # random properties for agents
90 | for i, agent in enumerate(world.agents):
91 | agent.color = np.array([0.45, 0.95, 0.45]) if not agent.adversary else np.array([0.95, 0.45, 0.45])
92 | agent.color -= np.array([0.3, 0.3, 0.3]) if agent.leader else np.array([0, 0, 0])
93 | # random properties for landmarks
94 | for i, landmark in enumerate(world.landmarks):
95 | landmark.color = np.array([0.25, 0.25, 0.25])
96 | for i, landmark in enumerate(world.food):
97 | landmark.color = np.array([0.15, 0.15, 0.65])
98 | for i, landmark in enumerate(world.forests):
99 | landmark.color = np.array([0.6, 0.9, 0.6])
100 | # set random initial states
101 | for agent in world.agents:
102 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
103 | agent.state.p_vel = np.zeros(world.dim_p)
104 | agent.state.c = np.zeros(world.dim_c)
105 | for i, landmark in enumerate(world.landmarks):
106 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
107 | landmark.state.p_vel = np.zeros(world.dim_p)
108 | for i, landmark in enumerate(world.food):
109 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
110 | landmark.state.p_vel = np.zeros(world.dim_p)
111 | for i, landmark in enumerate(world.forests):
112 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
113 | landmark.state.p_vel = np.zeros(world.dim_p)
114 |
115 | def benchmark_data(self, agent, world):
116 | if agent.adversary:
117 | collisions = 0
118 | for a in self.good_agents(world):
119 | if self.is_collision(a, agent):
120 | collisions += 1
121 | return collisions
122 | else:
123 | return 0
124 |
125 |
126 | def is_collision(self, agent1, agent2):
127 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
128 | dist = np.sqrt(np.sum(np.square(delta_pos)))
129 | dist_min = agent1.size + agent2.size
130 | return True if dist < dist_min else False
131 |
132 |
133 | # return all agents that are not adversaries
134 | def good_agents(self, world):
135 | return [agent for agent in world.agents if not agent.adversary]
136 |
137 | # return all adversarial agents
138 | def adversaries(self, world):
139 | return [agent for agent in world.agents if agent.adversary]
140 |
141 |
142 | def reward(self, agent, world):
143 | # Agents are rewarded based on minimum agent distance to each landmark
144 | #boundary_reward = -10 if self.outside_boundary(agent) else 0
145 | main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
146 | return main_reward
147 |
148 | def outside_boundary(self, agent):
149 | if agent.state.p_pos[0] > 1 or agent.state.p_pos[0] < -1 or agent.state.p_pos[1] > 1 or agent.state.p_pos[1] < -1:
150 | return True
151 | else:
152 | return False
153 |
154 |
155 | def agent_reward(self, agent, world):
156 | # Agents are rewarded based on minimum agent distance to each landmark
157 | rew = 0
158 | shape = False
159 | adversaries = self.adversaries(world)
160 | if shape:
161 | for adv in adversaries:
162 | rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
163 | if agent.collide:
164 | for a in adversaries:
165 | if self.is_collision(a, agent):
166 | rew -= 5
167 | def bound(x):
168 | if x < 0.9:
169 | return 0
170 | if x < 1.0:
171 | return (x - 0.9) * 10
172 | return min(np.exp(2 * x - 2), 10) # 1 + (x - 1) * (x - 1)
173 |
174 | for p in range(world.dim_p):
175 | x = abs(agent.state.p_pos[p])
176 | rew -= 2 * bound(x)
177 |
178 | for food in world.food:
179 | if self.is_collision(agent, food):
180 | rew += 2
181 | rew += 0.05 * min([np.sqrt(np.sum(np.square(food.state.p_pos - agent.state.p_pos))) for food in world.food])
182 |
183 | return rew
184 |
185 | def adversary_reward(self, agent, world):
186 | # Agents are rewarded based on minimum agent distance to each landmark
187 | rew = 0
188 | shape = True
189 | agents = self.good_agents(world)
190 | adversaries = self.adversaries(world)
191 | if shape:
192 | rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in agents])
193 | if agent.collide:
194 | for ag in agents:
195 | for adv in adversaries:
196 | if self.is_collision(ag, adv):
197 | rew += 5
198 | return rew
199 |
200 |
201 | def observation2(self, agent, world):
202 | # get positions of all entities in this agent's reference frame
203 | entity_pos = []
204 | for entity in world.landmarks:
205 | if not entity.boundary:
206 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
207 |
208 | food_pos = []
209 | for entity in world.food:
210 | if not entity.boundary:
211 | food_pos.append(entity.state.p_pos - agent.state.p_pos)
212 | # communication of all other agents
213 | comm = []
214 | other_pos = []
215 | other_vel = []
216 | for other in world.agents:
217 | if other is agent: continue
218 | comm.append(other.state.c)
219 | other_pos.append(other.state.p_pos - agent.state.p_pos)
220 | if not other.adversary:
221 | other_vel.append(other.state.p_vel)
222 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
223 |
224 | def observation(self, agent, world):
225 | # get positions of all entities in this agent's reference frame
226 | entity_pos = []
227 | for entity in world.landmarks:
228 | if not entity.boundary:
229 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
230 |
231 | in_forest = [np.array([-1]), np.array([-1])]
232 | inf1 = False
233 | inf2 = False
234 | if self.is_collision(agent, world.forests[0]):
235 | in_forest[0] = np.array([1])
236 | inf1= True
237 | if self.is_collision(agent, world.forests[1]):
238 | in_forest[1] = np.array([1])
239 | inf2 = True
240 |
241 | food_pos = []
242 | for entity in world.food:
243 | if not entity.boundary:
244 | food_pos.append(entity.state.p_pos - agent.state.p_pos)
245 | # communication of all other agents
246 | comm = []
247 | other_pos = []
248 | other_vel = []
249 | for other in world.agents:
250 | if other is agent: continue
251 | comm.append(other.state.c)
252 | oth_f1 = self.is_collision(other, world.forests[0])
253 | oth_f2 = self.is_collision(other, world.forests[1])
254 | if (inf1 and oth_f1) or (inf2 and oth_f2) or (not inf1 and not oth_f1 and not inf2 and not oth_f2) or agent.leader: #without forest vis
255 | other_pos.append(other.state.p_pos - agent.state.p_pos)
256 | if not other.adversary:
257 | other_vel.append(other.state.p_vel)
258 | else:
259 | other_pos.append([0, 0])
260 | if not other.adversary:
261 | other_vel.append([0, 0])
262 |
263 | # to tell the pred when the prey are in the forest
264 | prey_forest = []
265 | ga = self.good_agents(world)
266 | for a in ga:
267 | if any([self.is_collision(a, f) for f in world.forests]):
268 | prey_forest.append(np.array([1]))
269 | else:
270 | prey_forest.append(np.array([-1]))
271 | # to tell leader when pred are in forest
272 | prey_forest_lead = []
273 | for f in world.forests:
274 | if any([self.is_collision(a, f) for a in ga]):
275 | prey_forest_lead.append(np.array([1]))
276 | else:
277 | prey_forest_lead.append(np.array([-1]))
278 |
279 | comm = [world.agents[0].state.c]
280 |
281 | if agent.adversary and not agent.leader:
282 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm)
283 | if agent.leader:
284 | return np.concatenate(
285 | [agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm)
286 | else:
287 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + in_forest + other_vel)
288 |
289 |
290 |
--------------------------------------------------------------------------------
/obs_img.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import cv2
4 |
5 | def set_value_in_img(img, pos_x, pos_y, value):
6 | """
7 | draw 3*3 rectangle in img
8 | :param img:
9 | :param pos_x:
10 | :param pos_y:
11 | :param value:
12 | :return:
13 | """
14 | img_obs_size_x = 300
15 | img_obs_size_y = 300
16 |
17 | img[pos_x, pos_y] = value
18 |
19 |
20 | if __name__ == '__main__':
21 | img_obs_size_x = 300
22 | img_obs_size_y = 300
23 | # 个体img:所有己方单位位置
24 | obs_img = np.full((img_obs_size_x, img_obs_size_y, 3), 255, dtype=np.int32)
25 | # print(obs_img)
26 | a = plt.imshow(obs_img)
27 | # plt.show(a)
28 |
29 | for i in range(300):
30 | set_value_in_img(obs_img, 150, 150, [48, 158, 67])
31 | # print(obs_img)
32 | b = plt.imshow(obs_img, filternorm=1, filterrad=4, interpolation='lanczos')
33 | plt.show(b)
34 | print(obs_img[150][150])
--------------------------------------------------------------------------------
/plot_reward.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import matplotlib.pyplot as plt
3 | import seaborn as sns
4 |
5 | f = open('./learning_curves/exp_torch_4/rewards.pkl','rb')
6 | f1 = open('./learning_curves/exp_torch_5/rewards.pkl','rb')
7 | f2 = open('./learning_curves/exp_torch_8/rewards.pkl','rb')
8 |
9 | rewards = pickle.load(f)
10 | rewards1 = pickle.load(f1)
11 | rewards2 = pickle.load(f2)
12 | plt.plot(rewards)
13 | plt.plot(rewards1)
14 | plt.plot(rewards2)
15 | plt.grid()
16 | plt.xlabel('epoch')
17 | plt.ylabel('reward')
18 | plt.title('Rewards')
19 | plt.legend(['4', '5', '6'])
20 | plt.show()
21 |
--------------------------------------------------------------------------------
/replay_buffer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 |
4 | class ReplayBuffer(object):
5 | def __init__(self, size):
6 | """Create Prioritized Replay buffer.
7 |
8 | Parameters
9 | ----------
10 | size: int
11 | Max number of transitions to store in the buffer. When the buffer
12 | overflows the old memories are dropped.
13 | """
14 | self._storage = []
15 | self._maxsize = int(size)
16 | self._next_idx = 0
17 |
18 | def __len__(self):
19 | return len(self._storage)
20 |
21 | def clear(self):
22 | self._storage = []
23 | self._next_idx = 0
24 |
25 | def add(self, obs_t, action, reward, obs_tp1, done):
26 | data = (obs_t, action, reward, obs_tp1, done)
27 |
28 | if self._next_idx >= len(self._storage):
29 | self._storage.append(data)
30 | else:
31 | self._storage[self._next_idx] = data
32 | self._next_idx = (self._next_idx + 1) % self._maxsize
33 |
34 | def _encode_sample(self, idxes, agent_idx):
35 | obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
36 | for i in idxes:
37 | data = self._storage[i]
38 | obs_t, action, reward, obs_tp1, done = data
39 | obses_t.append(np.concatenate(obs_t[:]))
40 | actions.append(action)
41 | rewards.append(reward[agent_idx])
42 | obses_tp1.append(np.concatenate(obs_tp1[:]))
43 | dones.append(done[agent_idx])
44 | return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones)
45 |
46 | def make_index(self, batch_size):
47 | return [random.randint(0, len(self._storage) - 1) for _ in range(batch_size)]
48 |
49 | def make_latest_index(self, batch_size):
50 | idx = [(self._next_idx - 1 - i) % self._maxsize for i in range(batch_size)]
51 | np.random.shuffle(idx)
52 | return idx
53 |
54 | def sample_index(self, idxes):
55 | return self._encode_sample(idxes)
56 |
57 | def sample(self, batch_size, agent_idx):
58 | """Sample a batch of experiences.
59 |
60 | Parameters
61 | ----------
62 | batch_size: int
63 | How many transitions to sample.
64 |
65 | Returns
66 | -------
67 | obs_batch: np.array
68 | batch of observations
69 | act_batch: np.array
70 | batch of actions executed given obs_batch
71 | rew_batch: np.array
72 | rewards received as results of executing act_batch
73 | next_obs_batch: np.array
74 | next set of observations seen after executing act_batch
75 | done_mask: np.array
76 | done_mask[i] = 1 if executing act_batch[i] resulted in
77 | the end of an episode and 0 otherwise.
78 | """
79 | if batch_size > 0:
80 | idxes = self.make_index(batch_size)
81 | else:
82 | idxes = range(0, len(self._storage))
83 | return self._encode_sample(idxes, agent_idx)
84 |
85 | def collect(self):
86 | return self.sample(-1)
87 |
--------------------------------------------------------------------------------
/scenario-options/band-loc/core_uav.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # physical/external base state of all entites
4 | class EntityState(object):
5 | def __init__(self):
6 | # physical position
7 | self.p_pos = None
8 | # physical velocity
9 | self.p_vel = None
10 | self.hold_action = 0
11 |
12 | # state of agents (including communication and internal/mental state)
13 | class AgentState(EntityState):
14 | def __init__(self):
15 | super(AgentState, self).__init__()
16 | # communication allocation
17 | self.c = None
18 | self.UAV_Rate = None
19 | self.assign = None
20 | self.SNRforUser = None
21 |
22 | # action of the agent
23 | class Action(object):
24 | def __init__(self):
25 | # physical action
26 | self.u = None
27 | # communication action
28 | self.c = None
29 |
30 | # properties and state of physical world entity
31 | class Entity(object):
32 | def __init__(self):
33 | # name
34 | self.name = ''
35 | # properties:
36 | self.size = 0.0250
37 | # entity can move / be pushed
38 | self.movable = False
39 | # entity collides with others
40 | self.collide = True
41 | # material density (affects mass)
42 | self.density = 25.0
43 | # color
44 | self.color = None
45 | # max speed and accel
46 | self.max_speed = 10
47 | self.accel = None
48 | # state
49 | self.state = EntityState()
50 | # mass
51 | self.initial_mass = 1
52 |
53 | @property
54 | def mass(self):
55 | return self.initial_mass
56 |
57 | # properties of landmark entities
58 | class Landmark(Entity):
59 | def __init__(self):
60 | super(Landmark, self).__init__()
61 | self.assign_index = None
62 | self.switch_cout = 0
63 |
64 | # properties of agent entities
65 | class Agent(Entity):
66 | def __init__(self):
67 | super(Agent, self).__init__()
68 | # agents are movable by default
69 | self.movable = True
70 | # cannot send communication signals
71 | self.silent = False
72 | # cannot observe the world
73 | self.blind = False
74 | # physical motor noise amount
75 | self.u_noise = None
76 | # communication noise amount
77 | self.c_noise = None
78 | # control range
79 | self.u_range = 1.0
80 | # state
81 | self.state = AgentState()
82 | # action
83 | self.action = Action()
84 | # script behavior to execute
85 | self.action_callback = None
86 |
87 | # multi-agent world
88 | class World(object):
89 | def __init__(self):
90 | # list of agents and entities (can change at execution-time!)
91 | self.agents = []
92 | self.landmarks = []
93 | # communication channel dimensionality
94 | self.dim_c = 0
95 | # position dimensionality
96 | self.dim_p = 2
97 | # color dimensionality
98 | self.dim_color = 3
99 | # simulation timestep
100 | self.dt = 0.1
101 | # physical damping
102 | self.damping = 0.25
103 | # contact response parameters
104 | self.contact_force = 1e+2
105 | self.contact_margin = 1e-3
106 | self.collaborative = False
107 |
108 | self.pow_UAVs = 100 # 无人机发射功率
109 | self.pow_GBS = None # 地面基站发射功率
110 | self.gain_UAV = 10**(-40/10) # 单位db 无人机链路单位距离下的信道增益
111 | self.gain_GBS = 60 # 单位db 地面基站链路单位距离下的信道增益
112 | self.los_exp_UAV = 2 # 无人机链路衰减指数
113 | self.los_exp_GBS = 3 # 地面基站链路衰减指数
114 | self.max_band = 10e6 # 最大可用带宽
115 | self.noise_pdf = 10**(-167/10) # 噪声功率谱密度 dBm
116 |
117 | self.bandwidth = None # 带宽
118 | self.Rate = None
119 | self.SNR = None
120 |
121 | # return all entities in the world
122 | @property
123 | def entities(self):
124 | return self.agents + self.landmarks
125 |
126 | # return all agents controllable by external policies
127 | @property
128 | def policy_agents(self):
129 | return [agent for agent in self.agents if agent.action_callback is None]
130 |
131 | # return all agents controlled by world scripts
132 | @property
133 | def scripted_agents(self):
134 | return [agent for agent in self.agents if agent.action_callback is not None]
135 |
136 | # update state of the world
137 | def step(self):
138 | # set actions for scripted agents
139 | for agent in self.scripted_agents:
140 | agent.action = agent.action_callback(agent, self)
141 |
142 | # gather forces applied to entities
143 | p_force = [None] * len(self.entities)
144 | # apply agent physical controls
145 | p_force = self.apply_action_force(p_force)
146 | # apply environment forces
147 | # p_force = self.apply_environment_force(p_force)
148 | # integrate physical state
149 | self.integrate_state(p_force)
150 |
151 | self.update_landmark_pos()
152 |
153 | # self.update_agent_pos()
154 |
155 | # update agent state
156 | for agent in self.agents:
157 | self.update_agent_state(agent)
158 | self.update_world_state()
159 |
160 | # dict_state = []
161 | # for i, landmark in enumerate(self.landmarks):
162 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos)
163 | # dict_state.append({'User {}'.format(i): landmark.state.p_pos})
164 | #
165 | # for i, agent in enumerate(self.agents):
166 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos)
167 | # dict_state.append({'UAV {}'.format(i): agent.state.p_pos, 'band_asign': agent.state.c})
168 | # with open('./state_results/states.txt','a') as f:
169 | # f.write(str(dict_state)+'\n')
170 |
171 | # gather agent action forces
172 | def apply_action_force(self, p_force):
173 | # set applied forces
174 | for i,agent in enumerate(self.agents):
175 | if agent.movable:
176 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
177 | p_force[i] = agent.action.u + noise
178 | return p_force
179 |
180 | # gather physical forces acting on entities
181 | def apply_environment_force(self, p_force):
182 | # simple (but inefficient) collision response
183 | for a,entity_a in enumerate(self.entities):
184 | for b,entity_b in enumerate(self.entities):
185 | if(b <= a): continue
186 | [f_a, f_b] = self.get_collision_force(entity_a, entity_b)
187 | if(f_a is not None):
188 | if(p_force[a] is None): p_force[a] = 0.0
189 | p_force[a] = f_a + p_force[a]
190 | if(f_b is not None):
191 | if(p_force[b] is None): p_force[b] = 0.0
192 | p_force[b] = f_b + p_force[b]
193 | return p_force
194 |
195 | # integrate physical state
196 | def integrate_state(self, p_force):
197 | for i, agent in enumerate(self.agents):
198 | if not agent.movable: continue
199 | # noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
200 | # agent.state.p_vel = (agent.action.u + noise) * (1 - self.damping)
201 | agent.state.p_vel = agent.state.p_vel * (1 - self.damping)
202 | if (p_force[i] is not None):
203 | agent.state.p_vel += (p_force[i] / agent.mass) * self.dt
204 | if agent.max_speed is not None:
205 | speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1]))
206 | if speed > agent.max_speed:
207 | agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) +
208 | np.square(agent.state.p_vel[1])) * agent.max_speed
209 | agent.state.p_pos += agent.state.p_vel * self.dt
210 |
211 | def update_landmark_pos(self):
212 | # for landmark in self.landmarks:
213 | # if not landmark.movable: continue
214 | # if landmark.state.hold_action > 0:
215 | # landmark.state.hold_action -= 1
216 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
217 | # else:
218 | # landmark.state.hold_action = np.random.randint(0,10)
219 | #
220 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
221 | # while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1):
222 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
223 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
224 | # # print(temp_pos)
225 | # landmark.state.p_pos = temp_pos
226 |
227 | for landmark in self.landmarks:
228 | if not landmark.movable: continue
229 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
230 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
231 | while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1):
232 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
233 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
234 | # print(temp_pos)
235 | landmark.state.p_pos = temp_pos
236 |
237 |
238 | def update_agent_state(self, agent):
239 | # set communication state (directly f or now)
240 | if agent.silent:
241 | agent.state.c = np.zeros(self.dim_c)
242 | else:
243 | # noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0
244 | # agent.state.c = agent.action.c + noise
245 | agent.state.c = agent.action.c
246 |
247 | # def update_agent_pos(self):
248 | # # set communication state (directly for now)
249 | # for i, agent in enumerate(self.agents):
250 | # if not agent.movable: continue
251 | # noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
252 | # agent.state.p_vel = (agent.action.u + noise) * (1 - self.damping)
253 | # print(agent.state.p_vel)
254 | # if agent.max_speed is not None:
255 | # speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1]))
256 | # if speed > agent.max_speed:
257 | # print('高于最大速度')
258 | # agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) +
259 | # np.square(agent.state.p_vel[1])) * agent.max_speed
260 | # agent.state.p_pos += agent.state.p_vel * self.dt
261 |
262 | def update_world_state(self):
263 | # 根据基站提供的方案,用户进行选择
264 |
265 | bw = np.zeros((len(self.agents), self.dim_c))
266 | disVec = np.zeros((len(self.agents), self.dim_c))
267 | height = np.ones((len(self.agents), self.dim_c)) * 200 # 无人机的飞行高度
268 | for index, agent in enumerate(self.agents):
269 | bw[index] = np.array(list(self.max_band * i for i in agent.action.c))
270 | # 计算无人机到各个用户的欧式距离
271 | for i, landmark in enumerate(self.landmarks):
272 | disVec[index][i] = np.linalg.norm(agent.state.p_pos - landmark.state.p_pos, 2) * 1000
273 | # 距离超过定值时,无法进行通信,将带宽分配为0
274 | # bw[index][i] = 0.1 if disVec[index][i] > 0.025*8 else bw[index][i]
275 | # 接收到的信号功率
276 | signal_pow = self.pow_UAVs * self.gain_UAV / (disVec ** self.los_exp_UAV + height ** 2)
277 |
278 | # 接收端噪声功率
279 | noise_pow = bw * self.noise_pdf + 1e-9
280 | # 接收端信噪比
281 | SNR = signal_pow / noise_pow
282 |
283 | SNR[SNR < np.sqrt(10)] = 0 #SNR小于5dB时,无法通信,将其强制置零
284 |
285 |
286 | # for i in range(self.dim_c):
287 | # for j in range(len(self.agents)):
288 | # if 10*np.log10(SNR[j][i]) < 15:
289 | # bw[j][i] = 0
290 |
291 | # 信息传输速率
292 | Rate = bw * np.log2(np.ones((len(self.agents), self.dim_c)) + SNR)
293 |
294 | ## 保证每个用户只与一个无人机建立通信链路
295 | mask = np.zeros((len(self.agents), self.dim_c)) #被遮掉的位置为0
296 | # mask = -1 * np.ones((len(self.agents), self.dim_c)) #被遮掉的位置为负数
297 |
298 | for i, landmark in enumerate(self.landmarks):
299 | if (landmark.assign_index is not None) and (landmark.assign_index != np.argmax(Rate[:, i])):
300 | landmark.switch_cout = 1
301 | landmark.assign_index = np.argmax(Rate[:, i])
302 | else:
303 | landmark.assign_index = np.argmax(Rate[:, i])
304 |
305 | mask[landmark.assign_index][i] = 1
306 |
307 |
308 | self.bandwidth = mask * bw
309 | self.Rate = mask * Rate
310 | self.SNR = 10*np.log10(mask * SNR + 1e-10)
311 |
312 | for i, agent in enumerate(self.agents):
313 | agent.state.assign = self.bandwidth[i, :]/self.max_band
314 | agent.state.UAV_Rate = self.Rate[i, :]
315 | agent.state.SNRforUser = SNR[i,:]
316 |
317 | # for i, landmark in enumerate(self.landmarks):
318 | # band = self.bandwidth.transpose()
319 | # landmark.assign_index = np.nonzero(band[i])
320 |
321 | # get collision forces for any contact between two entities
322 | def get_collision_force(self, entity_a, entity_b):
323 | if (not entity_a.collide) or (not entity_b.collide):
324 | return [None, None] # not a collider
325 | if (entity_a is entity_b):
326 | return [None, None] # don't collide against itself
327 | # compute actual distance between entities
328 | delta_pos = entity_a.state.p_pos - entity_b.state.p_pos
329 | dist = np.sqrt(np.sum(np.square(delta_pos)))
330 | # minimum allowable distance
331 | dist_min = entity_a.size + entity_b.size
332 | # softmax penetration
333 | k = self.contact_margin
334 | penetration = np.logaddexp(0, -(dist - dist_min)/k)*k
335 | force = self.contact_force * delta_pos / dist * penetration
336 | force_a = +force if entity_a.movable else None
337 | force_b = -force if entity_b.movable else None
338 | return [force_a, force_b]
--------------------------------------------------------------------------------
/scenario-options/band-loc/environment_uav.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | from gym.envs.registration import EnvSpec
4 | import numpy as np
5 | from multiagent.multi_discrete import MultiDiscrete
6 |
7 | # environment for all agents in the multiagent world
8 | # currently code assumes that no agents will be created/destroyed at runtime!
9 | class MultiAgentEnv(gym.Env):
10 | metadata = {
11 | 'render.modes' : ['human', 'rgb_array']
12 | }
13 |
14 | def __init__(self, world, reset_callback=None, reward_callback=None,
15 | observation_callback=None, info_callback=None,
16 | done_callback=None, shared_viewer=True):
17 |
18 | self.world = world
19 | self.agents = self.world.policy_agents
20 | # set required vectorized gym env property
21 | self.n = len(world.policy_agents)
22 | # scenario callbacks
23 | self.reset_callback = reset_callback
24 | self.reward_callback = reward_callback
25 | self.observation_callback = observation_callback
26 | self.info_callback = info_callback
27 | self.done_callback = done_callback
28 | # environment parameters
29 | self.discrete_action_space = True
30 | # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
31 | self.discrete_action_input = False
32 | # if true, even the action is continuous, action will be performed discretely
33 | self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False
34 | # if true, every agent has the same reward
35 | self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False
36 | self.time = 0
37 |
38 | # configure spaces
39 | self.action_space = []
40 | self.observation_space = []
41 | for agent in self.agents:
42 | total_action_space = []
43 | # physical action space
44 | if self.discrete_action_space:
45 | u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
46 | else:
47 | u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
48 | if agent.movable:
49 | total_action_space.append(u_action_space)
50 | # communication action space
51 | if self.discrete_action_space:
52 | c_action_space = spaces.Discrete(world.dim_c)
53 | else:
54 | c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32)
55 | if not agent.silent:
56 | total_action_space.append(c_action_space)
57 | # total action space
58 | if len(total_action_space) > 1:
59 | # all action spaces are discrete, so simplify to MultiDiscrete action space
60 | if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
61 | act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
62 | else:
63 | act_space = spaces.Tuple(total_action_space)
64 | self.action_space.append(act_space)
65 | else:
66 | self.action_space.append(total_action_space[0])
67 | # observation space
68 | obs_dim = len(observation_callback(agent, self.world))
69 | self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32))
70 | # agent.action.c = np.zeros(self.world.dim_c)
71 |
72 | # rendering
73 | self.shared_viewer = shared_viewer
74 | if self.shared_viewer:
75 | self.viewers = [None]
76 | else:
77 | self.viewers = [None] * self.n
78 | self._reset_render()
79 |
80 | def step(self, action_n):
81 | obs_n = []
82 | reward_n = []
83 | done_n = []
84 | info_n = {'n': []}
85 | self.agents = self.world.policy_agents
86 | # set action for each agent
87 | for i, agent in enumerate(self.agents):
88 | self._set_action(action_n[i], agent, self.action_space[i])
89 |
90 | # advance world state
91 | self.world.step()
92 | # record observation for each agent
93 | for agent in self.agents:
94 | obs_n.append(self._get_obs(agent))
95 | reward_n.append(self._get_reward(agent))
96 | done_n.append(self._get_done(agent))
97 |
98 | info_n['n'].append(self._get_info(agent))
99 |
100 | # all agents get total reward in cooperative case
101 | reward = np.sum(reward_n)
102 | if self.shared_reward:
103 | reward_n = [reward] * self.n
104 |
105 | return obs_n, reward_n, done_n, info_n
106 |
107 | def reset(self):
108 | # reset world
109 | self.reset_callback(self.world)
110 | # reset renderer
111 | self._reset_render()
112 | # record observations for each agent
113 | obs_n = []
114 | self.agents = self.world.policy_agents
115 | for agent in self.agents:
116 | obs_n.append(self._get_obs(agent))
117 | return obs_n
118 |
119 | # get info used for benchmarking
120 | def _get_info(self, agent):
121 | if self.info_callback is None:
122 | return {}
123 | return self.info_callback(agent, self.world)
124 |
125 | # get observation for a particular agent
126 | def _get_obs(self, agent):
127 | if self.observation_callback is None:
128 | return np.zeros(0)
129 | return self.observation_callback(agent, self.world)
130 |
131 | # get dones for a particular agent
132 | # unused right now -- agents are allowed to go beyond the viewing screen
133 | def _get_done(self, agent):
134 | if self.done_callback is None:
135 | return False
136 | return self.done_callback(agent, self.world)
137 |
138 | # get reward for a particular agent
139 | def _get_reward(self, agent):
140 | if self.reward_callback is None:
141 | return 0.0
142 | return self.reward_callback(agent, self.world)
143 |
144 | # set env action for a particular agent
145 | def _set_action(self, action, agent, action_space, time=None):
146 | agent.action.u = np.zeros(self.world.dim_p)
147 | agent.action.c = np.zeros(self.world.dim_c)
148 | # process action
149 | if isinstance(action_space, MultiDiscrete):
150 | act = []
151 | size = action_space.high - action_space.low + 1
152 | index = 0
153 | for s in size:
154 | act.append(action[index:(index+s)])
155 | index += s
156 | action = act
157 | else:
158 | action = [action]
159 |
160 |
161 | if agent.movable:
162 | # physical action
163 | if self.discrete_action_input:
164 | agent.action.u = np.zeros(self.world.dim_p)
165 | # process discrete action
166 | if action[0] == 1: agent.action.u[0] = -1.0
167 | if action[0] == 2: agent.action.u[0] = +1.0
168 | if action[0] == 3: agent.action.u[1] = -1.0
169 | if action[0] == 4: agent.action.u[1] = +1.0
170 | else:
171 | if self.force_discrete_action:
172 | d = np.argmax(action[0])
173 | action[0][:] = 0.0
174 | action[0][d] = 1.0
175 | if self.discrete_action_space:
176 | agent.action.u[0] += action[0][1] - action[0][2]
177 | agent.action.u[1] += action[0][3] - action[0][4]
178 | else:
179 | agent.action.u = action[0]
180 | sensitivity = 5
181 | if agent.accel is not None:
182 | sensitivity = agent.accel
183 | agent.action.u *= sensitivity
184 | action = action[1:]
185 | if not agent.silent:
186 | # communication action
187 | if self.discrete_action_input:
188 | agent.action.c = np.zeros(self.world.dim_c)
189 | agent.action.c[action[0]] = 1.0
190 | else:
191 | agent.action.c = action[0]
192 | action = action[1:]
193 | # make sure we used all elements of action
194 | assert len(action) == 0
195 |
196 | # reset rendering assets
197 | def _reset_render(self):
198 | self.render_geoms = None
199 | self.render_geoms_xform = None
200 | # self.render_geoms_uav = None
201 | # self.render_geoms_xform_uav = None
202 |
203 | # render environment
204 | def render(self, mode='human'):
205 | # if mode == 'human':
206 | # alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
207 | # message = ''
208 | # for agent in self.world.agents:
209 | # comm = []
210 | # for other in self.world.agents:
211 | # if other is agent: continue
212 | # if np.all(other.state.c == 0):
213 | # word = '_'
214 | # else:
215 | # word = alphabet[np.argmax(other.state.c)]
216 | # message += (other.name + ' to ' + agent.name + ': ' + word + ' ')
217 | # print(message)
218 |
219 | for i in range(len(self.viewers)):
220 | # create viewers (if necessary)
221 | if self.viewers[i] is None:
222 | # import rendering only if we need it (and don't import for headless machines)
223 | #from gym.envs.classic_control import rendering
224 | from multiagent import rendering
225 | self.viewers[i] = rendering.Viewer(700,700)
226 |
227 | # create rendering geometry
228 | if self.render_geoms is None:
229 | # import rendering only if we need it (and don't import for headless machines)
230 | #from gym.envs.classic_control import rendering
231 | from multiagent import rendering
232 | self.render_geoms = []
233 | self.render_geoms_xform = []
234 | self.render_geoms_uav = []
235 | self.render_geoms_xform_uav = []
236 | self.render_geoms_line = []
237 |
238 |
239 | range_color = [0.25, 0.25, 0.75]
240 | for entity in self.world.entities:
241 | geom = rendering.make_circle(entity.size)
242 | xform = rendering.Transform()
243 |
244 | if 'agent' in entity.name:
245 | geom.set_color(*entity.color, alpha=0.5)
246 |
247 | geom_uav = rendering.make_circle(entity.size*4)
248 | xform_uav = rendering.Transform()
249 | geom_uav.set_color(*range_color, alpha=0.25)
250 | geom_uav.add_attr(xform_uav)
251 |
252 | self.render_geoms_uav.append(geom_uav)
253 | self.render_geoms_xform_uav.append(xform_uav)
254 |
255 | else:
256 | geom.set_color(*entity.color)
257 |
258 | geom_line = rendering.Line(entity.state.p_pos, self.world.agents[entity.assign_index].state.p_pos)
259 | self.render_geoms_line.append(geom_line)
260 |
261 | geom.add_attr(xform)
262 | self.render_geoms.append(geom)
263 | self.render_geoms_xform.append(xform)
264 |
265 | # add geoms to viewer
266 | for viewer in self.viewers:
267 | viewer.geoms = []
268 | for geom in self.render_geoms:
269 | viewer.add_geom(geom)
270 | for geom_uav in self.render_geoms_uav:
271 | viewer.add_geom(geom_uav)
272 | for geom_line in self.render_geoms_line:
273 | viewer.add_geom(geom_line)
274 |
275 | results = []
276 | for i in range(len(self.viewers)):
277 | from multiagent import rendering
278 | # update bounds to center around agent
279 | cam_range = 1
280 | if self.shared_viewer:
281 | pos = np.zeros(self.world.dim_p)
282 | else:
283 | pos = self.agents[i].state.p_pos
284 | self.viewers[i].set_bounds(pos[0]-cam_range, pos[0]+cam_range, pos[1]-cam_range, pos[1]+cam_range)
285 | # update geometry positions
286 | for e, entity in enumerate(self.world.entities):
287 | self.render_geoms_xform[e].set_translation(*entity.state.p_pos)
288 | for a, agent in enumerate(self.world.policy_agents):
289 | self.render_geoms_xform_uav[a].set_translation(*agent.state.p_pos)
290 | for l, landmark in enumerate(self.world.landmarks):
291 | self.render_geoms_line[l].start = landmark.state.p_pos
292 | self.render_geoms_line[l].end = self.world.agents[landmark.assign_index].state.p_pos
293 |
294 | # render to display or array
295 | results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array'))
296 |
297 | return results
298 |
299 | # create receptor field locations in local coordinate frame
300 | def _make_receptor_locations(self, agent):
301 | receptor_type = 'polar'
302 | range_min = 0.05 * 2.0
303 | range_max = 1.00
304 | dx = []
305 | # circular receptive field
306 | if receptor_type == 'polar':
307 | for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False):
308 | for distance in np.linspace(range_min, range_max, 3):
309 | dx.append(distance * np.array([np.cos(angle), np.sin(angle)]))
310 | # add origin
311 | dx.append(np.array([0.0, 0.0]))
312 | # grid receptive field
313 | if receptor_type == 'grid':
314 | for x in np.linspace(-range_max, +range_max, 5):
315 | for y in np.linspace(-range_max, +range_max, 5):
316 | dx.append(np.array([x,y]))
317 | return dx
--------------------------------------------------------------------------------
/scenario-options/band-loc/simple_collaborative_comm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core_uav import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 10
10 | num_uav = 3
11 | world.collaborative = True # whether agents share rewards
12 | # add agents
13 | world.agents = [Agent() for i in range(num_uav)]
14 | for i, agent in enumerate(world.agents):
15 | agent.name = 'agent %d' % i
16 | agent.collide = True
17 | # add landmarks
18 | world.landmarks = [Landmark() for i in range(world.dim_c)]
19 | for i, landmark in enumerate(world.landmarks):
20 | landmark.name = 'landmark %d' % i
21 | landmark.collide = False
22 | landmark.movable = True
23 |
24 | # make initial conditions
25 | self.reset_world(world)
26 | return world
27 |
28 | def reset_world(self, world):
29 | # set color for landmarks
30 | # world.landmarks[0].color = np.array([0.25, 0.25, 0.75])
31 | # world.landmarks[1].color = np.array([0.25, 0.25, 0.75])
32 | # world.landmarks[2].color = np.array([0.25, 0.25, 0.75])
33 | # world.landmarks[3].color = np.array([0.25, 0.25, 0.75])
34 | # world.landmarks[4].color = np.array([0.25, 0.25, 0.75])
35 |
36 | # set random initial states
37 | # for i, agent in enumerate(world.agents):
38 | # agent.color = np.array([0.35, 0.35, 0.85])
39 | # # random properties for landmarks
40 | # for i, landmark in enumerate(world.landmarks):
41 | # landmark.color = np.array([0.25, 0.25, 0.25])
42 | # set random initial states
43 | for i, agent in enumerate(world.agents):
44 | agent.color = np.array([[0.35, 0.35, 0.85], [0.25, 0.75, 0.25], [0.5, 0.15, 0.35]])[i]
45 | # agent.state.p_pos = np.array([[0.35, 0.35], [-0.35, -0.35]])[i]
46 | agent.state.p_pos = np.array([[0., 0.5], [-0.43, -0.25], [0.43, -0.25]])[i]
47 | # agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
48 | agent.state.p_vel = np.zeros(world.dim_p)
49 | agent.state.c = np.zeros(world.dim_c)
50 | agent.state.assign = np.zeros(world.dim_c)
51 | agent.state.SNRforUser = np.zeros(world.dim_c)
52 |
53 | for i, landmark in enumerate(world.landmarks):
54 | # landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
55 |
56 | # landmark.state.p_pos = np.array([[-0.8154612 , -0.24502182], [0.89699076, -0.14581629],
57 | # [-0.324932, 0.60295496], [0.62360916, -0.52245444],
58 | # [0.65862443, 0.23426809]])[i]
59 |
60 | landmark.state.p_pos = np.array([[-0.8154612, -0.24502182], [0.89699076, -0.14581629],
61 | [-0.3249325, 0.60295496], [0.62360916, -0.52245444],
62 | [0.65862443, 0.23426809], [0.29590076, 0.54571629],
63 | [-0.89699076, -0.74682629], [0.84629571, -0.98582649],
64 | [-0.89699076, 0.14581629], [-0.14699076, -0.75584629]])[i]
65 |
66 | # landmark.state.p_pos = np.array([[0.89699076, -0.14581629], [-0.8154612, -0.24502182],
67 | # [0.65862443, 0.23426809], [-0.3249325, 0.60295496],
68 | # [0.62360916, -0.52245444], [0.29590076, 0.54571629],
69 | # [-0.89699076, -0.74682629], [-0.89699076, 0.14581629],
70 | # [0.84629571, -0.98582649], [-0.14699076, -0.75584629]])[i]
71 |
72 | landmark.state.p_vel = np.zeros(world.dim_p)
73 | # landmark.state.p_vel = np.random.uniform(-1, +1, world.dim_p)
74 | landmark.color = np.array([0.25, 0.25, 0.25])
75 | landmark.assign_index = None
76 | landmark.switch_cout = 0
77 |
78 | def benchmark_data(self, agent, world):
79 | rew = 0
80 | collisions = 0
81 | occupied_landmarks = 0
82 | min_dists = 0
83 | for l in world.landmarks:
84 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
85 | min_dists += min(dists)
86 | rew -= min(dists)
87 | if min(dists) < 0.1:
88 | occupied_landmarks += 1
89 | if agent.collide:
90 | for a in world.agents:
91 | if self.is_collision(a, agent):
92 | rew -= 1
93 | collisions += 1
94 | return rew, collisions, min_dists, occupied_landmarks
95 |
96 | def is_collision(self, agent1, agent2):
97 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
98 | dist = np.sqrt(np.sum(np.square(delta_pos)))
99 | dist_min = agent1.size + agent2.size
100 | return True if dist < dist_min else False
101 |
102 | def reward(self, agent, world):
103 | p = 0.7
104 | try:
105 |
106 | # reward = 10*np.log(p*np.min(world.Rate[np.nonzero(world.Rate)])+(1-p)*np.average(world.Rate[np.nonzero(world.Rate)]))
107 | # reward = 10 * np.log(np.min(world.Rate[np.nonzero(world.Rate)]))
108 |
109 | # 下面的方式寻找最小信息率的用户比较保险,避免当所有的rate均为零时,上一种方式会出错
110 | min_rate = None
111 | for i in range(world.dim_c):
112 | min_rate = np.max(world.Rate[:,i]) if min_rate is None else min(min_rate, np.max(world.Rate[:,i]))
113 | reward = 10*np.log(min_rate+1e-9)
114 |
115 | # reward = np.log(np.min(agent.state.UAV_Rate[np.nonzero(agent.state.UAV_Rate)]))
116 | # reward = np.log(np.sum(world.Rate))
117 | except ValueError:
118 | reward = 0
119 |
120 | # reward = np.log(np.sum(world.Rate))
121 |
122 | for i in range(world.dim_c):
123 | if np.max(world.SNR[:, i]) < 15:
124 | reward -= 10
125 | #print('SNR not saftistied')
126 |
127 | for agent in world.agents:
128 | if not (-1 <= agent.state.p_pos[0] <= 1 and -1 <= agent.state.p_pos[1] <= 1):
129 | reward -= 100
130 | # print(entity.name)
131 | # print(entity.state.p_pos)
132 | # raise Exception('超出边界')
133 |
134 | # # 用户的切换次数作为负奖励
135 | # for landmark in world.landmarks:
136 | # reward -= landmark.switch_cout
137 | # landmark.switch_cout = 0
138 |
139 | if agent.collide:
140 | for a in world.agents:
141 | if a.name is not agent.name:
142 | if self.is_collision(a, agent):
143 | reward -= 100
144 |
145 | return reward
146 |
147 | def observation(self, agent, world):
148 | # get positions of all entities in this agent's reference frame
149 | entity_pos = []
150 | for entity in world.landmarks: # world.entities:
151 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
152 | # entity colors
153 | entity_color = []
154 | for entity in world.landmarks: # world.entities:
155 | entity_color.append(entity.color)
156 | # communication of all other agents
157 | other_comm = []
158 | other_pos = []
159 | assign = []
160 | SNRforUser = []
161 | for other in world.agents:
162 | SNRforUser.append(other.state.SNRforUser)
163 | assign.append(other.state.assign)
164 | if other is agent: continue
165 | other_comm.append(other.state.c)
166 | other_pos.append(other.state.p_pos - agent.state.p_pos)
167 |
168 | maxSNR = np.max(SNRforUser)
169 | normalized_SNR = SNRforUser.copy()
170 | for i, item in enumerate(SNRforUser):
171 | if maxSNR != 0: normalized_SNR[i] = item / maxSNR
172 | # return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_comm) # original code
173 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + assign + normalized_SNR)
--------------------------------------------------------------------------------
/scenario-options/pow-band-loc.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wnight963/UAV_Optim_Pytorch/3630ce0dde64b7f6b1311c213b4e962db5d8be31/scenario-options/pow-band-loc.7z
--------------------------------------------------------------------------------
/scenario-options/pow-band-loc/core_uav.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # physical/external base state of all entites
4 | class EntityState(object):
5 | def __init__(self):
6 | # physical position
7 | self.p_pos = None
8 | # physical velocity
9 | self.p_vel = None
10 | self.hold_action = 0
11 |
12 | # state of agents (including communication and internal/mental state)
13 | class AgentState(EntityState):
14 | def __init__(self):
15 | super(AgentState, self).__init__()
16 | # communication allocation
17 | self.c = None
18 | self.UAV_Rate = None
19 | self.SNRforUser = None
20 | self.assign = None
21 |
22 | self.assign_pow = None
23 |
24 | # action of the agent
25 | class Action(object):
26 | def __init__(self):
27 | # physical action
28 | self.u = None
29 | # communication action
30 | self.c = None
31 |
32 | self.pow = None
33 |
34 | # properties and state of physical world entity
35 | class Entity(object):
36 | def __init__(self):
37 | # name
38 | self.name = ''
39 | # properties:
40 | self.size = 0.0250
41 | # entity can move / be pushed
42 | self.movable = False
43 | # entity collides with others
44 | self.collide = True
45 | # material density (affects mass)
46 | self.density = 25.0
47 | # color
48 | self.color = None
49 | # max speed and accel
50 | self.max_speed = 10
51 | self.accel = None
52 | # state
53 | self.state = EntityState()
54 | # mass
55 | self.initial_mass = 1
56 |
57 | @property
58 | def mass(self):
59 | return self.initial_mass
60 |
61 | # properties of landmark entities
62 | class Landmark(Entity):
63 | def __init__(self):
64 | super(Landmark, self).__init__()
65 | self.assign_index = None
66 | self.switch_cout = 0
67 |
68 | # properties of agent entities
69 | class Agent(Entity):
70 | def __init__(self):
71 | super(Agent, self).__init__()
72 | # agents are movable by default
73 | self.movable = True
74 | # cannot send communication signals
75 | self.silent = False
76 | # cannot observe the world
77 | self.blind = False
78 | # physical motor noise amount
79 | self.u_noise = None
80 | # communication noise amount
81 | self.c_noise = None
82 | # control range
83 | self.u_range = 1.0
84 | # state
85 | self.state = AgentState()
86 | # action
87 | self.action = Action()
88 | # script behavior to execute
89 | self.action_callback = None
90 |
91 | # multi-agent world
92 | class World(object):
93 | def __init__(self):
94 | # list of agents and entities (can change at execution-time!)
95 | self.agents = []
96 | self.landmarks = []
97 | # communication channel dimensionality
98 | self.dim_c = 0
99 | # position dimensionality
100 | self.dim_p = 2
101 |
102 | self.dim_pow = 0
103 |
104 | # color dimensionality
105 | self.dim_color = 3
106 | # simulation timestep
107 | self.dt = 0.1
108 | # physical damping
109 | self.damping = 0.25
110 | # contact response parameters
111 | self.contact_force = 1e+2
112 | self.contact_margin = 1e-3
113 | self.collaborative = False
114 |
115 | self.pow_UAVs = 1000 # 无人机发射功率
116 | self.pow_GBS = None # 地面基站发射功率
117 | self.gain_UAV = 10**(-40/10) # 单位db 无人机链路单位距离下的信道增益
118 | self.gain_GBS = 60 # 单位db 地面基站链路单位距离下的信道增益
119 | self.los_exp_UAV = 2 # 无人机链路衰减指数
120 | self.los_exp_GBS = 3 # 地面基站链路衰减指数
121 | self.max_band = 10e6 # 最大可用带宽
122 | self.noise_pdf = 10**(-167/10) # 噪声功率谱密度 dBm
123 |
124 | self.bandwidth = None # 带宽
125 | self.tx_pow = None # 发射功率
126 | self.Rate = None
127 | self.SNR = None
128 |
129 | # return all entities in the world
130 | @property
131 | def entities(self):
132 | return self.agents + self.landmarks
133 |
134 | # return all agents controllable by external policies
135 | @property
136 | def policy_agents(self):
137 | return [agent for agent in self.agents if agent.action_callback is None]
138 |
139 | # return all agents controlled by world scripts
140 | @property
141 | def scripted_agents(self):
142 | return [agent for agent in self.agents if agent.action_callback is not None]
143 |
144 | # update state of the world
145 | def step(self):
146 | # set actions for scripted agents
147 | for agent in self.scripted_agents:
148 | agent.action = agent.action_callback(agent, self)
149 |
150 | # gather forces applied to entities
151 | p_force = [None] * len(self.entities)
152 | # apply agent physical controls
153 | p_force = self.apply_action_force(p_force)
154 | # apply environment forces
155 | # p_force = self.apply_environment_force(p_force)
156 | # integrate physical state
157 | self.integrate_state(p_force)
158 |
159 | self.update_landmark_pos()
160 |
161 | # self.update_agent_pos()
162 |
163 | # update agent state
164 | for agent in self.agents:
165 | self.update_agent_state(agent)
166 | self.update_world_state()
167 |
168 | # dict_state = []
169 | # for i, landmark in enumerate(self.landmarks):
170 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos)
171 | # dict_state.append({'User {}'.format(i): landmark.state.p_pos})
172 | #
173 | # for i, agent in enumerate(self.agents):
174 | # # Message = 'Users {} locats in {}'.format(i, landmark.state.p_pos)
175 | # dict_state.append({'UAV {}'.format(i): agent.state.p_pos, 'band_asign': agent.state.c})
176 | # with open('./state_results/states.txt','a') as f:
177 | # f.write(str(dict_state)+'\n')
178 |
179 | # gather agent action forces
180 | def apply_action_force(self, p_force):
181 | # set applied forces
182 | for i,agent in enumerate(self.agents):
183 | if agent.movable:
184 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
185 | p_force[i] = agent.action.u + noise
186 | return p_force
187 |
188 | # gather physical forces acting on entities
189 | def apply_environment_force(self, p_force):
190 | # simple (but inefficient) collision response
191 | for a,entity_a in enumerate(self.entities):
192 | for b,entity_b in enumerate(self.entities):
193 | if(b <= a): continue
194 | [f_a, f_b] = self.get_collision_force(entity_a, entity_b)
195 | if(f_a is not None):
196 | if(p_force[a] is None): p_force[a] = 0.0
197 | p_force[a] = f_a + p_force[a]
198 | if(f_b is not None):
199 | if(p_force[b] is None): p_force[b] = 0.0
200 | p_force[b] = f_b + p_force[b]
201 | return p_force
202 |
203 | # integrate physical state
204 | def integrate_state(self, p_force):
205 | for i, agent in enumerate(self.agents):
206 | if not agent.movable: continue
207 | agent.state.p_vel = agent.state.p_vel * (1 - self.damping)
208 | if (p_force[i] is not None):
209 | agent.state.p_vel += (p_force[i] / agent.mass) * self.dt
210 | if agent.max_speed is not None:
211 | speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1]))
212 | if speed > agent.max_speed:
213 | agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) +
214 | np.square(agent.state.p_vel[1])) * agent.max_speed
215 | agent.state.p_pos += agent.state.p_vel * self.dt
216 |
217 | def update_landmark_pos(self):
218 | # for landmark in self.landmarks:
219 | # if not landmark.movable: continue
220 | # if landmark.state.hold_action > 0:
221 | # landmark.state.hold_action -= 1
222 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
223 | # else:
224 | # landmark.state.hold_action = np.random.randint(0,10)
225 | #
226 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
227 | # while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1):
228 | # landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
229 | # temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
230 | # # print(temp_pos)
231 | # landmark.state.p_pos = temp_pos
232 |
233 | for landmark in self.landmarks:
234 | if not landmark.movable: continue
235 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
236 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
237 | while not (-1 <= temp_pos[0] <= 1 and -1 <= temp_pos[1] <= 1):
238 | landmark.state.p_vel = np.random.uniform(-1, +1, self.dim_p) * 0.5
239 | temp_pos = landmark.state.p_pos + landmark.state.p_vel * self.dt
240 | # print(temp_pos)
241 | landmark.state.p_pos = temp_pos
242 |
243 |
244 | def update_agent_state(self, agent):
245 | # set communication state (directly for now)
246 | if agent.silent:
247 | agent.state.c = np.zeros(self.dim_c)
248 | else:
249 | # noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0
250 | # agent.state.c = agent.action.c + noise
251 | agent.state.c = agent.action.c
252 |
253 | def update_agent_pos(self):
254 | # set communication state (directly for now)
255 | for i, agent in enumerate(self.agents):
256 | if not agent.movable: continue
257 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
258 | agent.state.p_vel = (agent.action.u + noise) * (1 - self.damping)
259 | print(agent.state.p_vel)
260 | if agent.max_speed is not None:
261 | speed = np.sqrt(np.square(agent.state.p_vel[0]) + np.square(agent.state.p_vel[1]))
262 | if speed > agent.max_speed:
263 | print('高于最大速度')
264 | agent.state.p_vel = agent.state.p_vel / np.sqrt(np.square(agent.state.p_vel[0]) +
265 | np.square(agent.state.p_vel[1])) * agent.max_speed
266 | agent.state.p_pos += agent.state.p_vel * self.dt
267 |
268 | def update_world_state(self):
269 | # 根据基站提供的方案,用户进行选择
270 |
271 | bw = np.zeros((len(self.agents), self.dim_c))
272 | power = np.zeros((len(self.agents), self.dim_pow))
273 | disVec = np.zeros((len(self.agents), self.dim_c))
274 | height = np.ones((len(self.agents), self.dim_c)) * 200 # 无人机的飞行高度
275 | for index, agent in enumerate(self.agents):
276 | bw[index] = np.array(list(self.max_band * i for i in agent.action.c))
277 | power[index] = np.array(list(self.pow_UAVs * i for i in agent.action.pow))
278 | # 计算无人机到各个用户的欧式距离
279 | for i, landmark in enumerate(self.landmarks):
280 | disVec[index][i] = np.linalg.norm(agent.state.p_pos - landmark.state.p_pos, 2) * 1000
281 | # 距离超过定值时,无法进行通信,将带宽分配为0
282 | # bw[index][i] = 0.1 if disVec[index][i] > 0.025*8 else bw[index][i]
283 | # 接收到的信号功率
284 | signal_pow = power * self.gain_UAV / (disVec ** self.los_exp_UAV + height ** 2)
285 |
286 | # 接收端噪声功率
287 | noise_pow = bw * self.noise_pdf + 1e-9
288 | # 接收端信噪比
289 | SNR = signal_pow / noise_pow
290 |
291 | SNR[SNR < np.sqrt(10)] = 0 #SNR小于5dB时,无法通信,将其强制置零
292 |
293 |
294 | # for i in range(self.dim_c):
295 | # for j in range(len(self.agents)):
296 | # if 10*np.log10(SNR[j][i]) < 15:
297 | # bw[j][i] = 0
298 |
299 | # 信息传输速率
300 | Rate = bw * np.log2(np.ones((len(self.agents), self.dim_c)) + SNR)
301 |
302 | ## 保证每个用户只与一个无人机建立通信链路
303 | mask = np.zeros((len(self.agents), self.dim_c)) #被遮掉的位置为0
304 | # mask = -1 * np.ones((len(self.agents), self.dim_c)) #被遮掉的位置为负数
305 |
306 | for i, landmark in enumerate(self.landmarks):
307 | if (landmark.assign_index is not None) and (landmark.assign_index != np.argmax(Rate[:, i])):
308 | landmark.switch_cout = 1
309 | landmark.assign_index = np.argmax(Rate[:, i])
310 | else:
311 | landmark.assign_index = np.argmax(Rate[:, i])
312 |
313 | mask[landmark.assign_index][i] = 1
314 |
315 |
316 | self.bandwidth = mask * bw
317 | self.tx_pow = mask * power
318 | self.Rate = mask * Rate
319 | self.SNR = 10*np.log10(mask * SNR + 1e-10)
320 |
321 | for i, agent in enumerate(self.agents):
322 | agent.state.assign = self.bandwidth[i, :]/self.max_band
323 | agent.state.UAV_Rate = self.Rate[i, :]
324 | agent.state.SNRforUser = SNR[i, :]
325 | agent.state.assign_pow = self.tx_pow[i, :]/self.pow_UAVs
326 |
327 | # for i, landmark in enumerate(self.landmarks):
328 | # band = self.bandwidth.transpose()
329 | # landmark.assign_index = np.nonzero(band[i])
330 |
331 |
332 |
333 | # get collision forces for any contact between two entities
334 | def get_collision_force(self, entity_a, entity_b):
335 | if (not entity_a.collide) or (not entity_b.collide):
336 | return [None, None] # not a collider
337 | if (entity_a is entity_b):
338 | return [None, None] # don't collide against itself
339 | # compute actual distance between entities
340 | delta_pos = entity_a.state.p_pos - entity_b.state.p_pos
341 | dist = np.sqrt(np.sum(np.square(delta_pos)))
342 | # minimum allowable distance
343 | dist_min = entity_a.size + entity_b.size
344 | # softmax penetration
345 | k = self.contact_margin
346 | penetration = np.logaddexp(0, -(dist - dist_min)/k)*k
347 | force = self.contact_force * delta_pos / dist * penetration
348 | force_a = +force if entity_a.movable else None
349 | force_b = -force if entity_b.movable else None
350 | return [force_a, force_b]
--------------------------------------------------------------------------------
/scenario-options/pow-band-loc/environment_uav.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | from gym.envs.registration import EnvSpec
4 | import numpy as np
5 | from multiagent.multi_discrete import MultiDiscrete
6 |
7 | # environment for all agents in the multiagent world
8 | # currently code assumes that no agents will be created/destroyed at runtime!
9 | class MultiAgentEnv(gym.Env):
10 | metadata = {
11 | 'render.modes' : ['human', 'rgb_array']
12 | }
13 |
14 | def __init__(self, world, reset_callback=None, reward_callback=None,
15 | observation_callback=None, info_callback=None,
16 | done_callback=None, shared_viewer=True):
17 |
18 | self.world = world
19 | self.agents = self.world.policy_agents
20 | # set required vectorized gym env property
21 | self.n = len(world.policy_agents)
22 | # scenario callbacks
23 | self.reset_callback = reset_callback
24 | self.reward_callback = reward_callback
25 | self.observation_callback = observation_callback
26 | self.info_callback = info_callback
27 | self.done_callback = done_callback
28 | # environment parameters
29 | self.discrete_action_space = True
30 | # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
31 | self.discrete_action_input = False
32 | # if true, even the action is continuous, action will be performed discretely
33 | self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False
34 | # if true, every agent has the same reward
35 | self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False
36 | self.time = 0
37 |
38 | # configure spaces
39 | self.action_space = []
40 | self.observation_space = []
41 | for agent in self.agents:
42 | total_action_space = []
43 | # physical action space
44 | if self.discrete_action_space:
45 | u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
46 | else:
47 | u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
48 | if agent.movable:
49 | total_action_space.append(u_action_space)
50 | # communication action space
51 | if self.discrete_action_space:
52 | c_action_space = spaces.Discrete(world.dim_c)
53 | pow_action_space = spaces.Discrete(world.dim_pow)
54 | else:
55 | c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32)
56 | pow_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_pow,), dtype=np.float32)
57 | if not agent.silent:
58 | total_action_space.append(c_action_space)
59 | total_action_space.append(pow_action_space)
60 | # total action space
61 | if len(total_action_space) > 1:
62 | # all action spaces are discrete, so simplify to MultiDiscrete action space
63 | if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
64 | act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
65 | else:
66 | act_space = spaces.Tuple(total_action_space)
67 | self.action_space.append(act_space)
68 | else:
69 | self.action_space.append(total_action_space[0])
70 | # observation space
71 | obs_dim = len(observation_callback(agent, self.world))
72 | self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32))
73 | # agent.action.c = np.zeros(self.world.dim_c)
74 | # agent.action.pow = np.zeros(self.world.dim_pow)
75 |
76 | # rendering
77 | self.shared_viewer = shared_viewer
78 | if self.shared_viewer:
79 | self.viewers = [None]
80 | else:
81 | self.viewers = [None] * self.n
82 | self._reset_render()
83 |
84 | def step(self, action_n):
85 | obs_n = []
86 | reward_n = []
87 | done_n = []
88 | info_n = {'n': []}
89 | self.agents = self.world.policy_agents
90 | # set action for each agent
91 | for i, agent in enumerate(self.agents):
92 | self._set_action(action_n[i], agent, self.action_space[i])
93 |
94 | # advance world state
95 | self.world.step()
96 | # record observation for each agent
97 | for agent in self.agents:
98 | obs_n.append(self._get_obs(agent))
99 | reward_n.append(self._get_reward(agent))
100 | done_n.append(self._get_done(agent))
101 |
102 | info_n['n'].append(self._get_info(agent))
103 |
104 | # all agents get total reward in cooperative case
105 | reward = np.sum(reward_n)
106 | if self.shared_reward:
107 | reward_n = [reward] * self.n
108 |
109 | return obs_n, reward_n, done_n, info_n
110 |
111 | def reset(self):
112 | # reset world
113 | self.reset_callback(self.world)
114 | # reset renderer
115 | self._reset_render()
116 | # record observations for each agent
117 | obs_n = []
118 | self.agents = self.world.policy_agents
119 | for agent in self.agents:
120 | obs_n.append(self._get_obs(agent))
121 | return obs_n
122 |
123 | # get info used for benchmarking
124 | def _get_info(self, agent):
125 | if self.info_callback is None:
126 | return {}
127 | return self.info_callback(agent, self.world)
128 |
129 | # get observation for a particular agent
130 | def _get_obs(self, agent):
131 | if self.observation_callback is None:
132 | return np.zeros(0)
133 | return self.observation_callback(agent, self.world)
134 |
135 | # get dones for a particular agent
136 | # unused right now -- agents are allowed to go beyond the viewing screen
137 | def _get_done(self, agent):
138 | if self.done_callback is None:
139 | return False
140 | return self.done_callback(agent, self.world)
141 |
142 | # get reward for a particular agent
143 | def _get_reward(self, agent):
144 | if self.reward_callback is None:
145 | return 0.0
146 | return self.reward_callback(agent, self.world)
147 |
148 | # set env action for a particular agent
149 | def _set_action(self, action, agent, action_space, time=None):
150 | agent.action.u = np.zeros(self.world.dim_p)
151 | agent.action.c = np.zeros(self.world.dim_c)
152 | # process action
153 | if isinstance(action_space, MultiDiscrete):
154 | act = []
155 | size = action_space.high - action_space.low + 1
156 | index = 0
157 | for s in size:
158 | act.append(action[index:(index+s)])
159 | index += s
160 | action = act
161 | else:
162 | action = [action]
163 |
164 | if agent.movable:
165 | # physical action
166 | if self.discrete_action_input:
167 | agent.action.u = np.zeros(self.world.dim_p)
168 | # process discrete action
169 | if action[0] == 1: agent.action.u[0] = -1.0
170 | if action[0] == 2: agent.action.u[0] = +1.0
171 | if action[0] == 3: agent.action.u[1] = -1.0
172 | if action[0] == 4: agent.action.u[1] = +1.0
173 | else:
174 | if self.force_discrete_action:
175 | d = np.argmax(action[0])
176 | action[0][:] = 0.0
177 | action[0][d] = 1.0
178 | if self.discrete_action_space:
179 | agent.action.u[0] += action[0][1] - action[0][2]
180 | agent.action.u[1] += action[0][3] - action[0][4]
181 | else:
182 | agent.action.u = action[0]
183 | sensitivity = 5
184 | if agent.accel is not None:
185 | sensitivity = agent.accel
186 | agent.action.u *= sensitivity
187 | action = action[1:]
188 | if not agent.silent:
189 | # communication action
190 | if self.discrete_action_input:
191 | agent.action.c = np.zeros(self.world.dim_c)
192 | agent.action.pow = np.zeros(self.world.dim_pow)
193 | agent.action.c[action[0]] = 1.0
194 | agent.action.pow[action[1]] = 1.0
195 | else:
196 | agent.action.c = action[0]
197 | agent.action.pow = action[0]
198 |
199 | action = action[2:] #因为前面设置了两个动作,c和pow,所以这里从2开始往后切。
200 | # make sure we used all elements of action
201 | assert len(action) == 0
202 |
203 | # reset rendering assets
204 | def _reset_render(self):
205 | self.render_geoms = None
206 | self.render_geoms_xform = None
207 | # self.render_geoms_uav = None
208 | # self.render_geoms_xform_uav = None
209 |
210 | # render environment
211 | def render(self, mode='human'):
212 | # if mode == 'human':
213 | # alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
214 | # message = ''
215 | # for agent in self.world.agents:
216 | # comm = []
217 | # for other in self.world.agents:
218 | # if other is agent: continue
219 | # if np.all(other.state.c == 0):
220 | # word = '_'
221 | # else:
222 | # word = alphabet[np.argmax(other.state.c)]
223 | # message += (other.name + ' to ' + agent.name + ': ' + word + ' ')
224 | # print(message)
225 |
226 | for i in range(len(self.viewers)):
227 | # create viewers (if necessary)
228 | if self.viewers[i] is None:
229 | # import rendering only if we need it (and don't import for headless machines)
230 | #from gym.envs.classic_control import rendering
231 | from multiagent import rendering
232 | self.viewers[i] = rendering.Viewer(700,700)
233 |
234 | # create rendering geometry
235 | if self.render_geoms is None:
236 | # import rendering only if we need it (and don't import for headless machines)
237 | #from gym.envs.classic_control import rendering
238 | from multiagent import rendering
239 | self.render_geoms = []
240 | self.render_geoms_xform = []
241 | self.render_geoms_uav = []
242 | self.render_geoms_xform_uav = []
243 | self.render_geoms_line = []
244 |
245 |
246 | range_color = [0.25, 0.25, 0.75]
247 | for entity in self.world.entities:
248 | geom = rendering.make_circle(entity.size)
249 | xform = rendering.Transform()
250 |
251 | if 'agent' in entity.name:
252 | geom.set_color(*entity.color, alpha=0.5)
253 |
254 | geom_uav = rendering.make_circle(entity.size*4)
255 | xform_uav = rendering.Transform()
256 | geom_uav.set_color(*range_color, alpha=0.25)
257 | geom_uav.add_attr(xform_uav)
258 |
259 | self.render_geoms_uav.append(geom_uav)
260 | self.render_geoms_xform_uav.append(xform_uav)
261 |
262 | else:
263 | geom.set_color(*entity.color)
264 |
265 | geom_line = rendering.Line(entity.state.p_pos, self.world.agents[entity.assign_index].state.p_pos)
266 | self.render_geoms_line.append(geom_line)
267 |
268 | geom.add_attr(xform)
269 | self.render_geoms.append(geom)
270 | self.render_geoms_xform.append(xform)
271 |
272 | # add geoms to viewer
273 | for viewer in self.viewers:
274 | viewer.geoms = []
275 | for geom in self.render_geoms:
276 | viewer.add_geom(geom)
277 | for geom_uav in self.render_geoms_uav:
278 | viewer.add_geom(geom_uav)
279 | for geom_line in self.render_geoms_line:
280 | viewer.add_geom(geom_line)
281 |
282 | results = []
283 | for i in range(len(self.viewers)):
284 | from multiagent import rendering
285 | # update bounds to center around agent
286 | cam_range = 1
287 | if self.shared_viewer:
288 | pos = np.zeros(self.world.dim_p)
289 | else:
290 | pos = self.agents[i].state.p_pos
291 | self.viewers[i].set_bounds(pos[0]-cam_range, pos[0]+cam_range, pos[1]-cam_range, pos[1]+cam_range)
292 | # update geometry positions
293 | for e, entity in enumerate(self.world.entities):
294 | self.render_geoms_xform[e].set_translation(*entity.state.p_pos)
295 | for a, agent in enumerate(self.world.policy_agents):
296 | self.render_geoms_xform_uav[a].set_translation(*agent.state.p_pos)
297 | for l, landmark in enumerate(self.world.landmarks):
298 | self.render_geoms_line[l].start = landmark.state.p_pos
299 | self.render_geoms_line[l].end = self.world.agents[landmark.assign_index].state.p_pos
300 |
301 | # render to display or array
302 | results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array'))
303 |
304 | return results
305 |
306 | # create receptor field locations in local coordinate frame
307 | def _make_receptor_locations(self, agent):
308 | receptor_type = 'polar'
309 | range_min = 0.05 * 2.0
310 | range_max = 1.00
311 | dx = []
312 | # circular receptive field
313 | if receptor_type == 'polar':
314 | for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False):
315 | for distance in np.linspace(range_min, range_max, 3):
316 | dx.append(distance * np.array([np.cos(angle), np.sin(angle)]))
317 | # add origin
318 | dx.append(np.array([0.0, 0.0]))
319 | # grid receptive field
320 | if receptor_type == 'grid':
321 | for x in np.linspace(-range_max, +range_max, 5):
322 | for y in np.linspace(-range_max, +range_max, 5):
323 | dx.append(np.array([x,y]))
324 | return dx
--------------------------------------------------------------------------------
/scenario-options/pow-band-loc/simple_collaborative_comm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core_uav import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 10
10 | world.dim_pow = 10
11 | num_uav = 3
12 | world.collaborative = True # whether agents share rewards
13 | # add agents
14 | world.agents = [Agent() for i in range(num_uav)]
15 | for i, agent in enumerate(world.agents):
16 | agent.name = 'agent %d' % i
17 | agent.collide = True
18 | # add landmarks
19 | world.landmarks = [Landmark() for i in range(world.dim_c)]
20 | for i, landmark in enumerate(world.landmarks):
21 | landmark.name = 'landmark %d' % i
22 | landmark.collide = False
23 | landmark.movable = True
24 |
25 | # make initial conditions
26 | self.reset_world(world)
27 | return world
28 |
29 | def reset_world(self, world):
30 | # set color for landmarks
31 | # world.landmarks[0].color = np.array([0.25, 0.25, 0.75])
32 | # world.landmarks[1].color = np.array([0.25, 0.25, 0.75])
33 | # world.landmarks[2].color = np.array([0.25, 0.25, 0.75])
34 | # world.landmarks[3].color = np.array([0.25, 0.25, 0.75])
35 | # world.landmarks[4].color = np.array([0.25, 0.25, 0.75])
36 |
37 | # set random initial states
38 | # for i, agent in enumerate(world.agents):
39 | # agent.color = np.array([0.35, 0.35, 0.85])
40 | # # random properties for landmarks
41 | # for i, landmark in enumerate(world.landmarks):
42 | # landmark.color = np.array([0.25, 0.25, 0.25])
43 | # set random initial states
44 | for i, agent in enumerate(world.agents):
45 | agent.color = np.array([[0.35, 0.35, 0.85], [0.25, 0.75, 0.25], [0.5, 0.15, 0.35]])[i]
46 | # agent.state.p_pos = np.array([[0.35, 0.35], [-0.35, -0.35]])[i]
47 | agent.state.p_pos = np.array([[0., 0.5], [-0.43, -0.25], [0.43, -0.25]])[i]
48 | # agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
49 | agent.state.p_vel = np.zeros(world.dim_p)
50 | agent.state.c = np.zeros(world.dim_c)
51 |
52 | agent.state.assign = np.zeros(world.dim_c)
53 | agent.state.SNRforUser = np.zeros(world.dim_c)
54 | agent.state.assign_pow = np.zeros(world.dim_c)
55 |
56 | for i, landmark in enumerate(world.landmarks):
57 | # landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
58 | # landmark.state.p_pos = np.array([[-0.8154612 , -0.24502182], [0.89699076, -0.14581629],
59 | # [-0.324932, 0.60295496], [0.62360916, -0.52245444],
60 | # [0.65862443, 0.23426809]])[i]
61 |
62 | landmark.state.p_pos = np.array([[-0.8154612, -0.24502182], [0.89699076, -0.14581629],
63 | [-0.3249325, 0.60295496], [0.62360916, -0.52245444],
64 | [0.65862443, 0.23426809], [0.29590076, 0.54571629],
65 | [-0.89699076, -0.74682629], [0.84629571, -0.98582649],
66 | [-0.89699076, 0.14581629], [-0.14699076, -0.75584629]])[i]
67 |
68 | landmark.state.p_vel = np.zeros(world.dim_p)
69 | # landmark.state.p_vel = np.random.uniform(-1, +1, world.dim_p)
70 | landmark.color = np.array([0.25, 0.25, 0.25])
71 | landmark.assign_index = None
72 | landmark.switch_cout = 0
73 |
74 | def benchmark_data(self, agent, world):
75 | rew = 0
76 | collisions = 0
77 | occupied_landmarks = 0
78 | min_dists = 0
79 | for l in world.landmarks:
80 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
81 | min_dists += min(dists)
82 | rew -= min(dists)
83 | if min(dists) < 0.1:
84 | occupied_landmarks += 1
85 | if agent.collide:
86 | for a in world.agents:
87 | if self.is_collision(a, agent):
88 | rew -= 1
89 | collisions += 1
90 | return rew, collisions, min_dists, occupied_landmarks
91 |
92 | def is_collision(self, agent1, agent2):
93 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
94 | dist = np.sqrt(np.sum(np.square(delta_pos)))
95 | dist_min = agent1.size + agent2.size
96 | return True if dist < dist_min else False
97 |
98 | def reward(self, agent, world):
99 | try:
100 | min_rate = None
101 | for i in range(world.dim_c):
102 | min_rate = np.max(world.Rate[:, i]) if min_rate is None else min(min_rate, np.max(world.Rate[:, i]))
103 | reward = 10*np.log(min_rate+1e-9)
104 | # reward = 10*np.log(np.min(world.Rate[np.nonzero(world.Rate)]))
105 | # reward = np.log(np.min(agent.state.UAV_Rate[np.nonzero(agent.state.UAV_Rate)]))
106 | # reward = np.log(np.sum(world.Rate))
107 | except ValueError:
108 | reward = 0
109 |
110 | # reward = np.log(np.sum(world.Rate))
111 |
112 | for i in range(world.dim_c):
113 | if np.max(world.SNR[:, i]) < 15:
114 | reward -= 10
115 | #print('SNR not saftistied')
116 |
117 | for agent in world.agents:
118 | if not (-1 <= agent.state.p_pos[0] <= 1 and -1 <= agent.state.p_pos[1] <= 1):
119 | reward -= 100
120 | # print(entity.name)
121 | # print(entity.state.p_pos)
122 | # raise Exception('超出边界')
123 |
124 | # 用户的切换次数作为负奖励
125 | for landmark in world.landmarks:
126 | reward -= landmark.switch_cout
127 | landmark.switch_cout = 0
128 |
129 | if agent.collide:
130 | for a in world.agents:
131 | if a.name is not agent.name:
132 | if self.is_collision(a, agent):
133 | reward -= 100
134 |
135 | return reward
136 |
137 | def observation(self, agent, world):
138 | # get positions of all entities in this agent's reference frame
139 | entity_pos = []
140 | for entity in world.landmarks: # world.entities:
141 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
142 | # entity colors
143 | entity_color = []
144 | for entity in world.landmarks: # world.entities:
145 | entity_color.append(entity.color)
146 | # communication of all other agents
147 | comm = []
148 | other_pos = []
149 | assign = []
150 | SNRforUser = []
151 | assign_pow = []
152 | for other in world.agents:
153 | assign.append(other.state.assign)
154 | SNRforUser.append(other.state.SNRforUser)
155 | assign_pow.append(other.state.assign_pow)
156 | if other is agent: continue
157 | comm.append(other.state.c)
158 | other_pos.append(other.state.p_pos - agent.state.p_pos)
159 | # print(assign)
160 | # print(assign_pow)
161 | # return np.concatenate(
162 | # [agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm)
163 |
164 | # return np.concatenate(
165 | # [agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + assign)
166 |
167 | maxSNR = np.max(SNRforUser)
168 | normalized_SNR = SNRforUser.copy()
169 | for i, item in enumerate(SNRforUser):
170 | if maxSNR != 0: normalized_SNR[i] = item / maxSNR
171 |
172 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + assign + normalized_SNR+ assign_pow)
173 |
--------------------------------------------------------------------------------