├── MADDPG
├── .idea
│ ├── encodings.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── openai-maddpg.iml
│ ├── vcs.xml
│ └── workspace.xml
├── maddpg
│ ├── .DS_Store
│ ├── .vscode
│ │ └── settings.json
│ ├── LICENSE.txt
│ ├── README.md
│ ├── checkpoint
│ ├── maddpg.egg-info
│ │ ├── PKG-INFO
│ │ ├── SOURCES.txt
│ │ ├── dependency_links.txt
│ │ ├── not-zip-safe
│ │ ├── requires.txt
│ │ └── top_level.txt
│ ├── maddpg
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ └── __init__.cpython-35.pyc
│ │ ├── common
│ │ │ ├── __pycache__
│ │ │ │ ├── distributions.cpython-35.pyc
│ │ │ │ └── tf_util.cpython-35.pyc
│ │ │ ├── distributions.py
│ │ │ └── tf_util.py
│ │ └── trainer
│ │ │ ├── __pycache__
│ │ │ ├── maddpg.cpython-35.pyc
│ │ │ └── replay_buffer.cpython-35.pyc
│ │ │ ├── maddpg.py
│ │ │ └── replay_buffer.py
│ └── setup.py
├── multiagent-particle-envs
│ ├── .DS_Store
│ ├── .vscode
│ │ └── settings.json
│ ├── LICENSE.txt
│ ├── README.md
│ ├── bin
│ │ ├── __init__.py
│ │ └── interactive.py
│ ├── multiagent.egg-info
│ │ ├── PKG-INFO
│ │ ├── SOURCES.txt
│ │ ├── dependency_links.txt
│ │ ├── not-zip-safe
│ │ ├── requires.txt
│ │ └── top_level.txt
│ ├── multiagent
│ │ ├── .DS_Store
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-35.pyc
│ │ │ ├── core.cpython-35.pyc
│ │ │ ├── environment.cpython-35.pyc
│ │ │ ├── multi_discrete.cpython-35.pyc
│ │ │ ├── rendering.cpython-35.pyc
│ │ │ └── scenario.cpython-35.pyc
│ │ ├── core.py
│ │ ├── environment-tmp.py
│ │ ├── environment.py
│ │ ├── multi_discrete.py
│ │ ├── policy.py
│ │ ├── rendering.py
│ │ ├── scenario.py
│ │ └── scenarios
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ ├── __init__.cpython-35.pyc
│ │ │ ├── competition_3v3.cpython-35.pyc
│ │ │ ├── simple.cpython-35.pyc
│ │ │ ├── simple_tag_v1.cpython-35.pyc
│ │ │ └── simple_tag_yuan_v2.cpython-35.pyc
│ │ │ ├── angle_3v3.py
│ │ │ ├── competition_3v3-tmp.py
│ │ │ ├── competition_3v3.py
│ │ │ ├── simple.py
│ │ │ ├── simple_adversary.py
│ │ │ ├── simple_crypto.py
│ │ │ ├── simple_push.py
│ │ │ ├── simple_reference.py
│ │ │ ├── simple_speaker_listener.py
│ │ │ ├── simple_spread.py
│ │ │ ├── simple_tag_v1.py
│ │ │ └── simple_world_comm.py
│ └── setup.py
└── reward setting
├── MADQN
├── .gitignore
├── LICENSE.txt
├── README.md
├── bin
│ ├── __init__.py
│ └── interactive.py
├── dqn.py
├── dqn_tag.py
├── make_env.py
├── multiagent
│ ├── __init__.py
│ ├── core.py
│ ├── environment.py
│ ├── multi_discrete.py
│ ├── policy.py
│ ├── rendering.py
│ ├── scenario.py
│ └── scenarios
│ │ ├── __init__.py
│ │ ├── simple.py
│ │ ├── simple_adversary.py
│ │ ├── simple_crypto.py
│ │ ├── simple_push.py
│ │ ├── simple_reference.py
│ │ ├── simple_speaker_listener.py
│ │ ├── simple_spread.py
│ │ ├── simple_tag.py
│ │ ├── simple_tag_v1.py
│ │ └── simple_world_comm.py
├── readme.txt
├── setup.py
└── test
│ └── results
│ └── dqn_1v2
│ └── save
│ ├── run_parameters.json
│ ├── tag-dqn_21500_0.h5
│ ├── tag-dqn_21500_1.h5
│ └── tag-dqn_21500_2.h5
├── README.md
├── Rule-coupled vs Random.gif
└── Rule-coupled vs Selfplay.gif
/MADDPG/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/MADDPG/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/MADDPG/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/MADDPG/.idea/openai-maddpg.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/MADDPG/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/MADDPG/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 | 1000
42 |
43 |
44 |
45 |
46 |
47 |
48 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 | 1547086714070
129 |
130 |
131 | 1547086714070
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
--------------------------------------------------------------------------------
/MADDPG/maddpg/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/.DS_Store
--------------------------------------------------------------------------------
/MADDPG/maddpg/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.pythonPath": "/Users/xunyunliu/anaconda3/envs/tf/bin/python"
3 | }
--------------------------------------------------------------------------------
/MADDPG/maddpg/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 OpenAI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MADDPG/maddpg/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/MADDPG/maddpg/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "/home/airc/python_code/openai-maddpg/maddpg/experiments"
2 | all_model_checkpoint_paths: "/home/airc/python_code/openai-maddpg/maddpg/experiments"
3 |
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 1.0
2 | Name: maddpg
3 | Version: 0.0.1
4 | Summary: Multi-Agent Deep Deterministic Policy Gradient
5 | Home-page: https://github.com/openai/maddpg
6 | Author: Igor Mordatch
7 | Author-email: mordatch@openai.com
8 | License: UNKNOWN
9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 |
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | README.md
2 | setup.py
3 | maddpg/__init__.py
4 | maddpg.egg-info/PKG-INFO
5 | maddpg.egg-info/SOURCES.txt
6 | maddpg.egg-info/dependency_links.txt
7 | maddpg.egg-info/not-zip-safe
8 | maddpg.egg-info/requires.txt
9 | maddpg.egg-info/top_level.txt
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/not-zip-safe:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | gym
2 | numpy-stl
3 |
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | maddpg
2 |
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/__init__.py:
--------------------------------------------------------------------------------
1 | class AgentTrainer(object):
2 | def __init__(self, name, model, obs_shape, act_space, args):
3 | raise NotImplemented()
4 |
5 | def action(self, obs):
6 | raise NotImplemented()
7 |
8 | def process_experience(self, obs, act, rew, new_obs, done, terminal):
9 | raise NotImplemented()
10 |
11 | def preupdate(self):
12 | raise NotImplemented()
13 |
14 | def update(self, agents):
15 | raise NotImplemented()
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/maddpg/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/common/__pycache__/distributions.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/maddpg/common/__pycache__/distributions.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/common/__pycache__/tf_util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/maddpg/common/__pycache__/tf_util.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/trainer/__pycache__/maddpg.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/maddpg/trainer/__pycache__/maddpg.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/trainer/__pycache__/replay_buffer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/maddpg/trainer/__pycache__/replay_buffer.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/trainer/maddpg.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 | import tensorflow as tf
4 | import maddpg.common.tf_util as U
5 |
6 | from maddpg.common.distributions import make_pdtype
7 | from maddpg import AgentTrainer
8 | from maddpg.trainer.replay_buffer import ReplayBuffer
9 |
10 |
11 | def discount_with_dones(rewards, dones, gamma):
12 | discounted = []
13 | r = 0
14 | for reward, done in zip(rewards[::-1], dones[::-1]):
15 | r = reward + gamma*r
16 | r = r*(1.-done)
17 | discounted.append(r)
18 | return discounted[::-1]
19 |
20 | def make_update_exp(vals, target_vals):
21 | polyak = 1.0 - 1e-2
22 | expression = []
23 | for var, var_target in zip(sorted(vals, key=lambda v: v.name), sorted(target_vals, key=lambda v: v.name)):
24 | expression.append(var_target.assign(polyak * var_target + (1.0-polyak) * var))
25 | expression = tf.group(*expression)
26 | return U.function([], [], updates=[expression])
27 |
28 | def p_train(make_obs_ph_n, act_space_n, p_index, p_func, q_func, optimizer, grad_norm_clipping=None, local_q_func=False, num_units=64, scope="trainer", reuse=None):
29 | with tf.variable_scope(scope, reuse=reuse):
30 | # create distribtuions
31 | act_pdtype_n = [make_pdtype(act_space) for act_space in act_space_n]
32 |
33 | # set up placeholders
34 | obs_ph_n = make_obs_ph_n
35 | act_ph_n = [act_pdtype_n[i].sample_placeholder([None], name="action"+str(i)) for i in range(len(act_space_n))]
36 |
37 | p_input = obs_ph_n[p_index]
38 |
39 | p = p_func(p_input, int(act_pdtype_n[p_index].param_shape()[0]), scope="p_func", num_units=num_units)
40 | p_func_vars = U.scope_vars(U.absolute_scope_name("p_func"))
41 |
42 | # wrap parameters in distribution
43 | act_pd = act_pdtype_n[p_index].pdfromflat(p)
44 |
45 | act_sample = act_pd.sample()
46 | p_reg = tf.reduce_mean(tf.square(act_pd.flatparam()))
47 |
48 | act_input_n = act_ph_n + []
49 | act_input_n[p_index] = act_pd.sample()
50 | q_input = tf.concat(obs_ph_n + act_input_n, 1)
51 | if local_q_func:
52 | q_input = tf.concat([obs_ph_n[p_index], act_input_n[p_index]], 1)
53 | q = q_func(q_input, 1, scope="q_func", reuse=True, num_units=num_units)[:,0]
54 | pg_loss = -tf.reduce_mean(q)
55 |
56 | loss = pg_loss + p_reg * 1e-3
57 |
58 | optimize_expr = U.minimize_and_clip(optimizer, loss, p_func_vars, grad_norm_clipping)
59 |
60 | # Create callable functions
61 | train = U.function(inputs=obs_ph_n + act_ph_n, outputs=loss, updates=[optimize_expr])
62 | act = U.function(inputs=[obs_ph_n[p_index]], outputs=act_sample)
63 |
64 | act_test = U.function(inputs=[obs_ph_n[p_index]], outputs=p)
65 |
66 | p_values = U.function([obs_ph_n[p_index]], p)
67 |
68 | # target network
69 | target_p = p_func(p_input, int(act_pdtype_n[p_index].param_shape()[0]), scope="target_p_func", num_units=num_units)
70 | target_p_func_vars = U.scope_vars(U.absolute_scope_name("target_p_func"))
71 | update_target_p = make_update_exp(p_func_vars, target_p_func_vars)
72 |
73 | target_act_sample = act_pdtype_n[p_index].pdfromflat(target_p).sample()
74 | target_act = U.function(inputs=[obs_ph_n[p_index]], outputs=target_act_sample)
75 |
76 | return act_test, act, train, update_target_p, {'p_values': p_values, 'target_act': target_act, 'p_vars': p_func_vars, 'target_p_vars': target_p_func_vars}
77 |
78 | def q_train(make_obs_ph_n, act_space_n, q_index, q_func, optimizer, grad_norm_clipping=None, local_q_func=False, scope="trainer", reuse=None, num_units=64):
79 | with tf.variable_scope(scope, reuse=reuse):
80 | # create distribtuions
81 | act_pdtype_n = [make_pdtype(act_space) for act_space in act_space_n]
82 |
83 | # set up placeholders
84 | obs_ph_n = make_obs_ph_n
85 | act_ph_n = [act_pdtype_n[i].sample_placeholder([None], name="action"+str(i)) for i in range(len(act_space_n))]
86 | target_ph = tf.placeholder(tf.float32, [None], name="target")
87 |
88 | q_input = tf.concat(obs_ph_n + act_ph_n, 1)
89 | if local_q_func:
90 | q_input = tf.concat([obs_ph_n[q_index], act_ph_n[q_index]], 1)
91 | q = q_func(q_input, 1, scope="q_func", num_units=num_units)[:,0]
92 | q_func_vars = U.scope_vars(U.absolute_scope_name("q_func"))
93 |
94 | q_loss = tf.reduce_mean(tf.square(q - target_ph))
95 |
96 | # viscosity solution to Bellman differential equation in place of an initial condition
97 | q_reg = tf.reduce_mean(tf.square(q))
98 | loss = q_loss #+ 1e-3 * q_reg
99 |
100 | optimize_expr = U.minimize_and_clip(optimizer, loss, q_func_vars, grad_norm_clipping)
101 |
102 | # Create callable functions
103 | train = U.function(inputs=obs_ph_n + act_ph_n + [target_ph], outputs=loss, updates=[optimize_expr])
104 | q_values = U.function(obs_ph_n + act_ph_n, q)
105 |
106 | # target network
107 | target_q = q_func(q_input, 1, scope="target_q_func", num_units=num_units)[:,0]
108 | target_q_func_vars = U.scope_vars(U.absolute_scope_name("target_q_func"))
109 | update_target_q = make_update_exp(q_func_vars, target_q_func_vars)
110 |
111 | target_q_values = U.function(obs_ph_n + act_ph_n, target_q)
112 |
113 | return train, update_target_q, {'q_values': q_values, 'target_q_values': target_q_values, 'q_vars': q_func_vars, 'target_q_vars': target_q_func_vars}
114 |
115 | class MADDPGAgentTrainer(AgentTrainer):
116 | def __init__(self, name, model, obs_shape_n, act_space_n, agent_index, args, local_q_func=False):
117 | self.name = name
118 | self.n = len(obs_shape_n)
119 | #self.n = args.num_adversaries########
120 |
121 | self.agent_index = agent_index
122 | self.args = args
123 | obs_ph_n = []
124 | for i in range(self.n):
125 | obs_ph_n.append(U.BatchInput(obs_shape_n[i], name="observation"+str(i)).get())
126 |
127 | # Create all the functions necessary to train the model
128 | self.q_train, self.q_update, self.q_debug = q_train(
129 | scope=self.name,
130 | make_obs_ph_n=obs_ph_n,
131 | act_space_n=act_space_n,
132 | q_index=agent_index,
133 | q_func=model,
134 | optimizer=tf.train.AdamOptimizer(learning_rate=args.lr),
135 | grad_norm_clipping=0.5,
136 | local_q_func=local_q_func,
137 | num_units=args.num_units
138 | )
139 | self.act_test, self.act, self.p_train, self.p_update, self.p_debug = p_train(
140 | scope=self.name,
141 | make_obs_ph_n=obs_ph_n,
142 | act_space_n=act_space_n,
143 | p_index=agent_index,
144 | p_func=model,
145 | q_func=model,
146 | optimizer=tf.train.AdamOptimizer(learning_rate=args.lr),
147 | grad_norm_clipping=0.5,
148 | local_q_func=local_q_func,
149 | num_units=args.num_units
150 | )
151 | # Create experience buffer
152 | self.replay_buffer = ReplayBuffer(1e6)
153 | self.max_replay_buffer_len = args.batch_size * args.max_episode_len
154 | self.replay_sample_index = None
155 |
156 | def action(self, obs):
157 | return self.act(obs[None])[0]
158 |
159 | def action_test(self, obs):
160 | return self.act_test(obs[None])[0]
161 |
162 | def experience(self, obs, act, rew, new_obs, done, terminal):
163 | # Store transition in the replay buffer.
164 | self.replay_buffer.add(obs, act, rew, new_obs, float(done))
165 |
166 | def preupdate(self):
167 | self.replay_sample_index = None
168 |
169 | def update(self, agents, t):
170 | if len(self.replay_buffer) < self.max_replay_buffer_len: # replay buffer is not large enough
171 | return
172 | if not t % 100 == 0: # only update every 100 steps
173 | return
174 |
175 | self.replay_sample_index = self.replay_buffer.make_index(self.args.batch_size)
176 | # collect replay sample from all agents
177 | obs_n = []
178 | obs_next_n = []
179 | act_n = []
180 | index = self.replay_sample_index
181 |
182 | for i in range(self.n):
183 | ####changed by liyuan
184 | #tmp_index = agents[i].replay_buffer.make_index(self.args.batch_size)
185 | #obs, act, rew, obs_next, done = agents[i].replay_buffer.sample_index(tmp_index)
186 |
187 | obs, act, rew, obs_next, done = agents[i].replay_buffer.sample_index(index)
188 | obs_n.append(obs)
189 | obs_next_n.append(obs_next)
190 | act_n.append(act)
191 | obs, act, rew, obs_next, done = self.replay_buffer.sample_index(index)
192 |
193 | # train q network
194 | num_sample = 1
195 | target_q = 0.0
196 | for j in range(num_sample):
197 | '''
198 | #####green nodes take dqn
199 | target_act_next_n = []
200 | for i in range(self.n):
201 | if i= len(self._storage):
29 | self._storage.append(data)
30 | else:
31 | self._storage[self._next_idx] = data
32 | self._next_idx = (self._next_idx + 1) % self._maxsize
33 |
34 | def _encode_sample(self, idxes):
35 | obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
36 | for i in idxes:
37 | data = self._storage[i]
38 | obs_t, action, reward, obs_tp1, done = data
39 | obses_t.append(np.array(obs_t, copy=False))
40 | actions.append(np.array(action, copy=False))
41 | rewards.append(reward)
42 | obses_tp1.append(np.array(obs_tp1, copy=False))
43 | dones.append(done)
44 | return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones)
45 |
46 | def make_index(self, batch_size):
47 | return [random.randint(0, len(self._storage) - 1) for _ in range(batch_size)]
48 |
49 | def make_latest_index(self, batch_size):
50 | idx = [(self._next_idx - 1 - i) % self._maxsize for i in range(batch_size)]
51 | np.random.shuffle(idx)
52 | return idx
53 |
54 | def sample_index(self, idxes):
55 | return self._encode_sample(idxes)
56 |
57 | def sample(self, batch_size):
58 | """Sample a batch of experiences.
59 |
60 | Parameters
61 | ----------
62 | batch_size: int
63 | How many transitions to sample.
64 |
65 | Returns
66 | -------
67 | obs_batch: np.array
68 | batch of observations
69 | act_batch: np.array
70 | batch of actions executed given obs_batch
71 | rew_batch: np.array
72 | rewards received as results of executing act_batch
73 | next_obs_batch: np.array
74 | next set of observations seen after executing act_batch
75 | done_mask: np.array
76 | done_mask[i] = 1 if executing act_batch[i] resulted in
77 | the end of an episode and 0 otherwise.
78 | """
79 | if batch_size > 0:
80 | idxes = self.make_index(batch_size)
81 | else:
82 | idxes = range(0, len(self._storage))
83 | return self._encode_sample(idxes)
84 |
85 | def collect(self):
86 | return self.sample(-1)
87 |
--------------------------------------------------------------------------------
/MADDPG/maddpg/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(name='maddpg',
4 | version='0.0.1',
5 | description='Multi-Agent Deep Deterministic Policy Gradient',
6 | url='https://github.com/openai/maddpg',
7 | author='Igor Mordatch',
8 | author_email='mordatch@openai.com',
9 | packages=find_packages(),
10 | include_package_data=True,
11 | zip_safe=False,
12 | install_requires=['gym', 'numpy-stl']
13 | )
14 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/.DS_Store
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.pythonPath": "/Users/xunyunliu/anaconda3/envs/tf/bin/python"
3 | }
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 OpenAI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/README.md:
--------------------------------------------------------------------------------
1 | **Status:** Archive (code is provided as-is, no updates expected)
2 |
3 | # Multi-Agent Particle Environment
4 |
5 | A simple multi-agent particle world with a continuous observation and discrete action space, along with some basic simulated physics.
6 | Used in the paper [Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments](https://arxiv.org/pdf/1706.02275.pdf).
7 |
8 | ## Getting started:
9 |
10 | - To install, `cd` into the root directory and type `pip install -e .`
11 |
12 | - To interactively view moving to landmark scenario (see others in ./scenarios/):
13 | `bin/interactive.py --scenario simple.py`
14 |
15 | - Known dependencies: Python (3.5.4), OpenAI gym (0.10.5), numpy (1.14.5)
16 |
17 | - To use the environments, look at the code for importing them in `make_env.py`.
18 |
19 |
20 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/bin/__init__.py
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/bin/interactive.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os,sys
3 | sys.path.insert(1, os.path.join(sys.path[0], '..'))
4 | import argparse
5 |
6 | from multiagent.environment import MultiAgentEnv
7 | from multiagent.policy import InteractivePolicy
8 | import multiagent.scenarios as scenarios
9 |
10 | if __name__ == '__main__':
11 | # parse arguments
12 | parser = argparse.ArgumentParser(description=None)
13 | parser.add_argument('-s', '--scenario', default='simple.py', help='Path of the scenario Python script.')
14 | args = parser.parse_args()
15 |
16 | # load scenario from script
17 | scenario = scenarios.load(args.scenario).Scenario()
18 | # create world
19 | world = scenario.make_world()
20 | # create multiagent environment
21 | env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer = False)
22 | # render call to create viewer window (necessary only for interactive policies)
23 | env.render()
24 | # create interactive policies for each agent
25 | policies = [InteractivePolicy(env,i) for i in range(env.n)]
26 | # execution loop
27 | obs_n = env.reset()
28 | while True:
29 | # query for action from each agent's policy
30 | act_n = []
31 | for i, policy in enumerate(policies):
32 | act_n.append(policy.action(obs_n[i]))
33 | # step environment
34 | obs_n, reward_n, done_n, _ = env.step(act_n)
35 | # render all agent views
36 | env.render()
37 | # display rewards
38 | #for agent in env.world.agents:
39 | # print(agent.name + " reward: %0.3f" % env._get_reward(agent))
40 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 1.0
2 | Name: multiagent
3 | Version: 0.0.1
4 | Summary: Multi-Agent Goal-Driven Communication Environment
5 | Home-page: https://github.com/openai/multiagent-public
6 | Author: Igor Mordatch
7 | Author-email: mordatch@openai.com
8 | License: UNKNOWN
9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | README.md
2 | setup.py
3 | bin/__init__.py
4 | bin/interactive.py
5 | multiagent/__init__.py
6 | multiagent/core.py
7 | multiagent/environment.py
8 | multiagent/multi_discrete.py
9 | multiagent/policy.py
10 | multiagent/rendering.py
11 | multiagent/scenario.py
12 | multiagent.egg-info/PKG-INFO
13 | multiagent.egg-info/SOURCES.txt
14 | multiagent.egg-info/dependency_links.txt
15 | multiagent.egg-info/not-zip-safe
16 | multiagent.egg-info/requires.txt
17 | multiagent.egg-info/top_level.txt
18 | multiagent/scenarios/__init__.py
19 | multiagent/scenarios/angle_3v3.py
20 | multiagent/scenarios/competition_3v3.py
21 | multiagent/scenarios/simple.py
22 | multiagent/scenarios/simple_adversary.py
23 | multiagent/scenarios/simple_crypto.py
24 | multiagent/scenarios/simple_push.py
25 | multiagent/scenarios/simple_reference.py
26 | multiagent/scenarios/simple_speaker_listener.py
27 | multiagent/scenarios/simple_spread.py
28 | multiagent/scenarios/simple_tag_v1.py
29 | multiagent/scenarios/simple_world_comm.py
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/not-zip-safe:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | gym
2 | numpy-stl
3 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | bin
2 | multiagent
3 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/.DS_Store
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 |
3 | # Multiagent envs
4 | # ----------------------------------------
5 |
6 | register(
7 | id='MultiagentSimple-v0',
8 | entry_point='multiagent.envs:SimpleEnv',
9 | # FIXME(cathywu) currently has to be exactly max_path_length parameters in
10 | # rllab run script
11 | max_episode_steps=100,
12 | )
13 |
14 | register(
15 | id='MultiagentSimpleSpeakerListener-v0',
16 | entry_point='multiagent.envs:SimpleSpeakerListenerEnv',
17 | max_episode_steps=100,
18 | )
19 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/core.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/core.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/environment.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/environment.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/multi_discrete.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/multi_discrete.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/rendering.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/rendering.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/scenario.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/scenario.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/core.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # physical/external base state of all entites
4 | class EntityState(object):
5 | def __init__(self):
6 | # physical position
7 | self.p_pos = None
8 | # physical velocity
9 | self.p_vel = None
10 |
11 | # state of agents (including communication and internal/mental state)
12 | class AgentState(EntityState):
13 | def __init__(self):
14 | super(AgentState, self).__init__()
15 | # communication utterance
16 | self.c = None
17 |
18 | # action of the agent
19 | class Action(object):
20 | def __init__(self):
21 | # physical action
22 | self.u = None
23 | # communication action
24 | self.c = None
25 |
26 | # properties and state of physical world entity
27 | class Entity(object):
28 | def __init__(self):
29 | # name
30 | self.name = ''
31 | # properties:
32 | self.size = 0.050
33 | # entity can move / be pushed
34 | self.movable = False
35 | # entity collides with others
36 | self.collide = True
37 | # material density (affects mass)
38 | self.density = 25.0
39 | # color
40 | self.color = None
41 | # max speed and accel
42 | self.max_speed = None
43 | self.accel = None
44 | # state
45 | self.state = EntityState()
46 | # mass
47 | self.initial_mass = 1.0
48 |
49 | @property
50 | def mass(self):
51 | return self.initial_mass
52 |
53 | # properties of landmark entities
54 | class Landmark(Entity):
55 | def __init__(self):
56 | super(Landmark, self).__init__()
57 |
58 | # properties of agent entities
59 | class Agent(Entity):
60 | def __init__(self):
61 | super(Agent, self).__init__()
62 | # agents are movable by default
63 | self.movable = True
64 | # cannot send communication signals
65 | self.silent = False
66 | # cannot observe the world
67 | self.blind = False
68 | # physical motor noise amount
69 | self.u_noise = None
70 | # communication noise amount
71 | self.c_noise = None
72 | # control range
73 | self.u_range = 1.0
74 | # state
75 | self.state = AgentState()
76 | # action
77 | self.action = Action()
78 | # script behavior to execute
79 | self.action_callback = None
80 |
81 | self.death = False
82 |
83 | # multi-agent world
84 | class World(object):
85 | def __init__(self):
86 | # list of agents and entities (can change at execution-time!)
87 | self.agents = []
88 | self.landmarks = []
89 | # communication channel dimensionality
90 | self.dim_c = 0
91 | # position dimensionality
92 | self.dim_p = 2
93 | # color dimensionality
94 | self.dim_color = 3
95 | # simulation timestep
96 | self.dt = 0.1
97 | # physical damping
98 | self.damping = 0.25
99 | # contact response parameters
100 | self.contact_force = 1e+2
101 | self.contact_margin = 1e-3
102 |
103 | # return all entities in the world
104 | @property
105 | def entities(self):
106 | return self.agents + self.landmarks
107 |
108 | # return all agents controllable by external policies
109 | @property
110 | def policy_agents(self):
111 | return [agent for agent in self.agents if agent.action_callback is None]
112 |
113 | # return all agents controlled by world scripts
114 | @property
115 | def scripted_agents(self):
116 | return [agent for agent in self.agents if agent.action_callback is not None]
117 |
118 | # update state of the world
119 | def step(self):
120 | # set actions for scripted agents
121 | for agent in self.scripted_agents:
122 | agent.action = agent.action_callback(agent, self)
123 | # gather forces applied to entities
124 | p_force = [None] * len(self.entities)
125 | # apply agent physical controls
126 | p_force = self.apply_action_force(p_force)
127 | # apply environment forces
128 | p_force = self.apply_environment_force(p_force)
129 | # integrate physical state
130 | self.integrate_state(p_force)
131 | # update agent state
132 | for agent in self.agents:
133 | self.update_agent_state(agent)
134 |
135 | # gather agent action forces
136 | def apply_action_force(self, p_force):
137 | # set applied forces
138 | for i,agent in enumerate(self.agents):
139 | if agent.movable:
140 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
141 | p_force[i] = agent.action.u + noise
142 | return p_force
143 |
144 | # gather physical forces acting on entities
145 | def apply_environment_force(self, p_force):
146 | # simple (but inefficient) collision response
147 | for a,entity_a in enumerate(self.entities):
148 | for b,entity_b in enumerate(self.entities):
149 | if(b <= a): continue
150 | [f_a, f_b] = self.get_collision_force(entity_a, entity_b)
151 | if(f_a is not None):
152 | if(p_force[a] is None): p_force[a] = 0.0
153 | p_force[a] = f_a + p_force[a]
154 | if(f_b is not None):
155 | if(p_force[b] is None): p_force[b] = 0.0
156 | p_force[b] = f_b + p_force[b]
157 | return p_force
158 |
159 | # integrate physical state
160 | def integrate_state(self, p_force):
161 | for i,entity in enumerate(self.entities):
162 | if not entity.movable: continue
163 | entity.state.p_vel = entity.state.p_vel * (1 - self.damping)
164 | if (p_force[i] is not None):
165 | entity.state.p_vel += (p_force[i] / entity.mass) * self.dt
166 | if entity.max_speed is not None:
167 | speed = np.sqrt(np.square(entity.state.p_vel[0]) + np.square(entity.state.p_vel[1]))
168 | if speed > entity.max_speed:
169 | entity.state.p_vel = entity.state.p_vel / np.sqrt(np.square(entity.state.p_vel[0]) +
170 | np.square(entity.state.p_vel[1])) * entity.max_speed
171 | entity.state.p_pos += entity.state.p_vel * self.dt
172 |
173 | def update_agent_state(self, agent):
174 | # set communication state (directly for now)
175 | if agent.silent:
176 | agent.state.c = np.zeros(self.dim_c)
177 | else:
178 | noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0
179 | agent.state.c = agent.action.c + noise
180 |
181 | # get collision forces for any contact between two entities
182 | def get_collision_force(self, entity_a, entity_b):
183 | if (not entity_a.collide) or (not entity_b.collide):
184 | return [None, None] # not a collider
185 | if (entity_a is entity_b):
186 | return [None, None] # don't collide against itself
187 | # compute actual distance between entities
188 | delta_pos = entity_a.state.p_pos - entity_b.state.p_pos
189 | dist = np.sqrt(np.sum(np.square(delta_pos)))
190 | # minimum allowable distance
191 | dist_min = entity_a.size + entity_b.size
192 | # softmax penetration
193 | k = self.contact_margin
194 | penetration = np.logaddexp(0, -(dist - dist_min)/k)*k
195 | force = self.contact_force * delta_pos / dist * penetration
196 | force_a = +force if entity_a.movable else None
197 | force_b = -force if entity_b.movable else None
198 | return [force_a, force_b]
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/multi_discrete.py:
--------------------------------------------------------------------------------
1 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
2 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
3 |
4 | import numpy as np
5 |
6 | import gym
7 | from gym.spaces import prng
8 |
9 | class MultiDiscrete(gym.Space):
10 | """
11 | - The multi-discrete action space consists of a series of discrete action spaces with different parameters
12 | - It can be adapted to both a Discrete action space or a continuous (Box) action space
13 | - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
14 | - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
15 | where the discrete action space can take any integers from `min` to `max` (both inclusive)
16 | Note: A value of 0 always need to represent the NOOP action.
17 | e.g. Nintendo Game Controller
18 | - Can be conceptualized as 3 discrete action spaces:
19 | 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
20 | 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
21 | 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
22 | - Can be initialized as
23 | MultiDiscrete([ [0,4], [0,1], [0,1] ])
24 | """
25 | def __init__(self, array_of_param_array):
26 | self.low = np.array([x[0] for x in array_of_param_array])
27 | self.high = np.array([x[1] for x in array_of_param_array])
28 | self.num_discrete_space = self.low.shape[0]
29 |
30 | def sample(self):
31 | """ Returns a array with one sample from each discrete action space """
32 | # For each row: round(random .* (max - min) + min, 0)
33 | random_array = prng.np_random.rand(self.num_discrete_space)
34 | return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
35 | def contains(self, x):
36 | return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
37 |
38 | @property
39 | def shape(self):
40 | return self.num_discrete_space
41 | def __repr__(self):
42 | return "MultiDiscrete" + str(self.num_discrete_space)
43 | def __eq__(self, other):
44 | return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/policy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from pyglet.window import key
3 |
4 | # individual agent policy
5 | class Policy(object):
6 | def __init__(self):
7 | pass
8 | def action(self, obs):
9 | raise NotImplementedError()
10 |
11 | # interactive policy based on keyboard input
12 | # hard-coded to deal only with movement, not communication
13 | class InteractivePolicy(Policy):
14 | def __init__(self, env, agent_index):
15 | super(InteractivePolicy, self).__init__()
16 | self.env = env
17 | # hard-coded keyboard events
18 | self.move = [False for i in range(4)]
19 | self.comm = [False for i in range(env.world.dim_c)]
20 | # register keyboard events with this environment's window
21 | env.viewers[agent_index].window.on_key_press = self.key_press
22 | env.viewers[agent_index].window.on_key_release = self.key_release
23 |
24 | def action(self, obs):
25 | # ignore observation and just act based on keyboard events
26 | if self.env.discrete_action_input:
27 | u = 0
28 | if self.move[0]: u = 1
29 | if self.move[1]: u = 2
30 | if self.move[2]: u = 4
31 | if self.move[3]: u = 3
32 | else:
33 | u = np.zeros(5) # 5-d because of no-move action
34 | if self.move[0]: u[1] += 1.0
35 | if self.move[1]: u[2] += 1.0
36 | if self.move[3]: u[3] += 1.0
37 | if self.move[2]: u[4] += 1.0
38 | if True not in self.move:
39 | u[0] += 1.0
40 | return np.concatenate([u, np.zeros(self.env.world.dim_c)])
41 |
42 | # keyboard event callbacks
43 | def key_press(self, k, mod):
44 | if k==key.LEFT: self.move[0] = True
45 | if k==key.RIGHT: self.move[1] = True
46 | if k==key.UP: self.move[2] = True
47 | if k==key.DOWN: self.move[3] = True
48 | def key_release(self, k, mod):
49 | if k==key.LEFT: self.move[0] = False
50 | if k==key.RIGHT: self.move[1] = False
51 | if k==key.UP: self.move[2] = False
52 | if k==key.DOWN: self.move[3] = False
53 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/rendering.py:
--------------------------------------------------------------------------------
1 | """
2 | 2D rendering framework
3 | """
4 | from __future__ import division
5 | import os
6 | import six
7 | import sys
8 |
9 | if "Apple" in sys.version:
10 | if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
11 | os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
12 | # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
13 |
14 | from gym.utils import reraise
15 | from gym import error
16 |
17 | try:
18 | import pyglet
19 | except ImportError as e:
20 | reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
21 |
22 | try:
23 | from pyglet.gl import *
24 | except ImportError as e:
25 | reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python '")
26 |
27 | import math
28 | import numpy as np
29 |
30 | RAD2DEG = 57.29577951308232
31 |
32 | def get_display(spec):
33 | """Convert a display specification (such as :0) into an actual Display
34 | object.
35 |
36 | Pyglet only supports multiple Displays on Linux.
37 | """
38 | if spec is None:
39 | return None
40 | elif isinstance(spec, six.string_types):
41 | return pyglet.canvas.Display(spec)
42 | else:
43 | raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
44 |
45 | class Viewer(object):
46 | def __init__(self, width, height, display=None):
47 | display = get_display(display)
48 |
49 | self.width = width
50 | self.height = height
51 |
52 | self.window = pyglet.window.Window(width=width, height=height, display=display)
53 | self.window.on_close = self.window_closed_by_user
54 | self.geoms = []
55 | self.onetime_geoms = []
56 | self.transform = Transform()
57 |
58 | glEnable(GL_BLEND)
59 | # glEnable(GL_MULTISAMPLE)
60 | glEnable(GL_LINE_SMOOTH)
61 | # glHint(GL_LINE_SMOOTH_HINT, GL_DONT_CARE)
62 | glHint(GL_LINE_SMOOTH_HINT, GL_NICEST)
63 | glLineWidth(2.0)
64 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
65 |
66 | def close(self):
67 | self.window.close()
68 |
69 | def window_closed_by_user(self):
70 | self.close()
71 |
72 | def set_bounds(self, left, right, bottom, top):
73 | assert right > left and top > bottom
74 | scalex = self.width/(right-left)
75 | scaley = self.height/(top-bottom)
76 | self.transform = Transform(
77 | translation=(-left*scalex, -bottom*scaley),
78 | scale=(scalex, scaley))
79 |
80 | def add_geom(self, geom):
81 | self.geoms.append(geom)
82 |
83 | def add_onetime(self, geom):
84 | self.onetime_geoms.append(geom)
85 |
86 | def render(self, return_rgb_array=False):
87 | glClearColor(1,1,1,1)
88 | self.window.clear()
89 | self.window.switch_to()
90 | self.window.dispatch_events()
91 | self.transform.enable()
92 | for geom in self.geoms:
93 | geom.render()
94 | for geom in self.onetime_geoms:
95 | geom.render()
96 | self.transform.disable()
97 | arr = None
98 | if return_rgb_array:
99 | buffer = pyglet.image.get_buffer_manager().get_color_buffer()
100 | image_data = buffer.get_image_data()
101 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
102 | # In https://github.com/openai/gym-http-api/issues/2, we
103 | # discovered that someone using Xmonad on Arch was having
104 | # a window of size 598 x 398, though a 600 x 400 window
105 | # was requested. (Guess Xmonad was preserving a pixel for
106 | # the boundary.) So we use the buffer height/width rather
107 | # than the requested one.
108 | arr = arr.reshape(buffer.height, buffer.width, 4)
109 | arr = arr[::-1,:,0:3]
110 | self.window.flip()
111 | self.onetime_geoms = []
112 | return arr
113 |
114 | # Convenience
115 | def draw_circle(self, radius=10, res=30, filled=True, **attrs):
116 | geom = make_circle(radius=radius, res=res, filled=filled)
117 | _add_attrs(geom, attrs)
118 | self.add_onetime(geom)
119 | return geom
120 |
121 | def draw_polygon(self, v, filled=True, **attrs):
122 | geom = make_polygon(v=v, filled=filled)
123 | _add_attrs(geom, attrs)
124 | self.add_onetime(geom)
125 | return geom
126 |
127 | def draw_polyline(self, v, **attrs):
128 | geom = make_polyline(v=v)
129 | _add_attrs(geom, attrs)
130 | self.add_onetime(geom)
131 | return geom
132 |
133 | def draw_line(self, start, end, **attrs):
134 | geom = Line(start, end)
135 | _add_attrs(geom, attrs)
136 | self.add_onetime(geom)
137 | return geom
138 |
139 | def get_array(self):
140 | self.window.flip()
141 | image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
142 | self.window.flip()
143 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
144 | arr = arr.reshape(self.height, self.width, 4)
145 | return arr[::-1,:,0:3]
146 |
147 | def _add_attrs(geom, attrs):
148 | if "color" in attrs:
149 | geom.set_color(*attrs["color"])
150 | if "linewidth" in attrs:
151 | geom.set_linewidth(attrs["linewidth"])
152 |
153 | class Geom(object):
154 | def __init__(self):
155 | self._color=Color((0, 0, 0, 1.0))
156 | self.attrs = [self._color]
157 | def render(self):
158 | for attr in reversed(self.attrs):
159 | attr.enable()
160 | self.render1()
161 | for attr in self.attrs:
162 | attr.disable()
163 | def render1(self):
164 | raise NotImplementedError
165 | def add_attr(self, attr):
166 | self.attrs.append(attr)
167 | def set_color(self, r, g, b, alpha=1):
168 | self._color.vec4 = (r, g, b, alpha)
169 |
170 | class Attr(object):
171 | def enable(self):
172 | raise NotImplementedError
173 | def disable(self):
174 | pass
175 |
176 | class Transform(Attr):
177 | def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
178 | self.set_translation(*translation)
179 | self.set_rotation(rotation)
180 | self.set_scale(*scale)
181 | def enable(self):
182 | glPushMatrix()
183 | glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
184 | glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
185 | glScalef(self.scale[0], self.scale[1], 1)
186 | def disable(self):
187 | glPopMatrix()
188 | def set_translation(self, newx, newy):
189 | self.translation = (float(newx), float(newy))
190 | def set_rotation(self, new):
191 | self.rotation = float(new)
192 | def set_scale(self, newx, newy):
193 | self.scale = (float(newx), float(newy))
194 |
195 | class Color(Attr):
196 | def __init__(self, vec4):
197 | self.vec4 = vec4
198 | def enable(self):
199 | glColor4f(*self.vec4)
200 |
201 | class LineStyle(Attr):
202 | def __init__(self, style):
203 | self.style = style
204 | def enable(self):
205 | glEnable(GL_LINE_STIPPLE)
206 | glLineStipple(1, self.style)
207 | def disable(self):
208 | glDisable(GL_LINE_STIPPLE)
209 |
210 | class LineWidth(Attr):
211 | def __init__(self, stroke):
212 | self.stroke = stroke
213 | def enable(self):
214 | glLineWidth(self.stroke)
215 |
216 | class Point(Geom):
217 | def __init__(self):
218 | Geom.__init__(self)
219 | def render1(self):
220 | glBegin(GL_POINTS) # draw point
221 | glVertex3f(0.0, 0.0, 0.0)
222 | glEnd()
223 |
224 | class FilledPolygon(Geom):
225 | def __init__(self, v):
226 | Geom.__init__(self)
227 | self.v = v
228 | def render1(self):
229 | if len(self.v) == 4 : glBegin(GL_QUADS)
230 | elif len(self.v) > 4 : glBegin(GL_POLYGON)
231 | else: glBegin(GL_TRIANGLES)
232 | for p in self.v:
233 | glVertex3f(p[0], p[1],0) # draw each vertex
234 | glEnd()
235 |
236 | color = (self._color.vec4[0] * 0.5, self._color.vec4[1] * 0.5, self._color.vec4[2] * 0.5, self._color.vec4[3] * 0.5)
237 | glColor4f(*color)
238 | glBegin(GL_LINE_LOOP)
239 | for p in self.v:
240 | glVertex3f(p[0], p[1],0) # draw each vertex
241 | glEnd()
242 |
243 | def make_circle(radius=10, res=30, filled=True):
244 | points = []
245 | for i in range(res):
246 | ang = 2*math.pi*i / res
247 | points.append((math.cos(ang)*radius, math.sin(ang)*radius))
248 | if filled:
249 | return FilledPolygon(points)
250 | else:
251 | return PolyLine(points, True)
252 |
253 | def make_polygon(v, filled=True):
254 | if filled: return FilledPolygon(v)
255 | else: return PolyLine(v, True)
256 |
257 | def make_polyline(v):
258 | return PolyLine(v, False)
259 |
260 | def make_line(start,end):
261 | return Line(start,end)
262 |
263 | def make_capsule(length, width):
264 | l, r, t, b = 0, length, width/2, -width/2
265 | box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
266 | circ0 = make_circle(width/2)
267 | circ1 = make_circle(width/2)
268 | circ1.add_attr(Transform(translation=(length, 0)))
269 | geom = Compound([box, circ0, circ1])
270 | return geom
271 |
272 | class Compound(Geom):
273 | def __init__(self, gs):
274 | Geom.__init__(self)
275 | self.gs = gs
276 | for g in self.gs:
277 | g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
278 | def render1(self):
279 | for g in self.gs:
280 | g.render()
281 |
282 | class PolyLine(Geom):
283 | def __init__(self, v, close):
284 | Geom.__init__(self)
285 | self.v = v
286 | self.close = close
287 | self.linewidth = LineWidth(1)
288 | self.add_attr(self.linewidth)
289 | def render1(self):
290 | glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
291 | for p in self.v:
292 | glVertex3f(p[0], p[1],0) # draw each vertex
293 | glEnd()
294 | def set_linewidth(self, x):
295 | self.linewidth.stroke = x
296 |
297 | class Line(Geom):
298 | def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
299 | Geom.__init__(self)
300 | self.start = start
301 | self.end = end
302 | self.linewidth = LineWidth(1)
303 | self.add_attr(self.linewidth)
304 |
305 | def render1(self):
306 | glBegin(GL_LINES)
307 | glVertex2f(*self.start)
308 | glVertex2f(*self.end)
309 | glEnd()
310 |
311 | class Image(Geom):
312 | def __init__(self, fname, width, height):
313 | Geom.__init__(self)
314 | self.width = width
315 | self.height = height
316 | img = pyglet.image.load(fname)
317 | self.img = img
318 | self.flip = False
319 | def render1(self):
320 | self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
321 |
322 | # ================================================================
323 |
324 | class SimpleImageViewer(object):
325 | def __init__(self, display=None):
326 | self.window = None
327 | self.isopen = False
328 | self.display = display
329 | def imshow(self, arr):
330 | if self.window is None:
331 | height, width, channels = arr.shape
332 | self.window = pyglet.window.Window(width=width, height=height, display=self.display)
333 | self.width = width
334 | self.height = height
335 | self.isopen = True
336 | assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape"
337 | image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3)
338 | self.window.clear()
339 | self.window.switch_to()
340 | self.window.dispatch_events()
341 | image.blit(0,0)
342 | self.window.flip()
343 | def close(self):
344 | if self.isopen:
345 | self.window.close()
346 | self.isopen = False
347 | def __del__(self):
348 | self.close()
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenario.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # defines scenario upon which the world is built
4 | class BaseScenario(object):
5 | # create elements of the world
6 | def make_world(self):
7 | raise NotImplementedError()
8 | # create initial conditions of the world
9 | def reset_world(self, world):
10 | raise NotImplementedError()
11 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__init__.py:
--------------------------------------------------------------------------------
1 | import imp
2 | import os.path as osp
3 |
4 |
5 | def load(name):
6 | pathname = osp.join(osp.dirname(__file__), name)
7 | return imp.load_source('', pathname)
8 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/competition_3v3.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/competition_3v3.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple_tag_v1.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple_tag_v1.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple_tag_yuan_v2.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple_tag_yuan_v2.cpython-35.pyc
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/angle_3v3.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 | import math
5 |
6 | attack_angle = 90
7 | defense_angle = 90
8 | fire_range = 0.1
9 |
10 |
11 | class Scenario(BaseScenario):
12 | def make_world(self):
13 | world = World()
14 | # set any world properties first
15 | world.dim_c = 2
16 | num_good_agents = 2
17 | num_adversaries = 2
18 | num_agents = num_adversaries + num_good_agents
19 | num_landmarks = 0
20 | # add agents
21 | world.agents = [Agent() for i in range(num_agents)]
22 | for i, agent in enumerate(world.agents):
23 | agent.name = 'agent %d' % i
24 | agent.collide = True
25 | agent.silent = True
26 | agent.adversary = True if i < num_adversaries else False
27 | agent.size = 0.03 if agent.adversary else 0.03
28 | agent.accel = 3.0 if agent.adversary else 4.0
29 | #agent.accel = 20.0 if agent.adversary else 25.0
30 | agent.max_speed = 1.3 if agent.adversary else 1.0
31 | #agent.max_speed = 1.0 if agent.adversary else 0.0 ###changed by liyuan
32 | agent.death = False
33 | # add landmarks
34 | world.landmarks = [Landmark() for i in range(num_landmarks)]
35 | for i, landmark in enumerate(world.landmarks):
36 | landmark.name = 'landmark %d' % i
37 | landmark.collide = True
38 | landmark.movable = False
39 | landmark.size = 0.2
40 | landmark.boundary = False
41 | # make initial conditions
42 | self.reset_world(world)
43 | return world
44 |
45 |
46 | def reset_world(self, world):
47 | # random properties for agents
48 | for i, agent in enumerate(world.agents):
49 | agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35])
50 | # random properties for landmarks
51 | for i, landmark in enumerate(world.landmarks):
52 | landmark.color = np.array([0.25, 0.25, 0.25])
53 | # set random initial states
54 | for agent in world.agents:
55 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
56 | agent.state.p_vel = np.zeros(world.dim_p)
57 | agent.state.c = np.zeros(world.dim_c)
58 | agent.death = False
59 | for i, landmark in enumerate(world.landmarks):
60 | if not landmark.boundary:
61 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
62 | landmark.state.p_vel = np.zeros(world.dim_p)
63 |
64 |
65 | def benchmark_data(self, agent, world):
66 | # returns data for benchmarking purposes
67 | if agent.adversary:
68 | collisions = 0
69 | for a in self.good_agents(world):
70 | if self.is_collision(a, agent) and a.death == False:
71 | collisions += 1
72 | return collisions
73 | else:
74 | return 0
75 |
76 | '''
77 | def is_collision(self, agent1, agent2):
78 | if agent1.death or agent2.death:
79 | return False
80 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
81 | dist = np.sqrt(np.sum(np.square(delta_pos)))
82 | dist_min = agent1.size + agent2.size
83 | return True if dist < dist_min else False
84 | '''
85 | ##liyuan: compute the number of lokcing number of the agent
86 | def compute_lock_num(self, agent, world):
87 | opponent = []
88 | if agent.adversary:
89 | opponent = self.good_agents(world)
90 | else:
91 | opponent = self.adversaries(world)
92 |
93 | for i, opp in enumerate(opponent):
94 | capture,flag = self.is_collision(opp,agent)
95 | if flag == 1:
96 | agent.lock_num[i] += 1
97 | else:
98 | agent.lock_num[i] = 0
99 |
100 | def is_collision(self, agent1, agent2):
101 | if agent1.death or agent2.death:
102 | return False,0
103 |
104 | ###liyuan:judged by angle
105 | delta_pos = agent2.state.p_pos - agent1.state.p_pos
106 | distance = np.sqrt(np.sum(np.square(delta_pos)))
107 | if distance <= 0.0001:
108 | return False,0
109 |
110 | agent1_cross = (delta_pos[0]*agent1.state.p_vel[0]+delta_pos[1]*agent1.state.p_vel[1])/(distance)
111 | if agent1_cross < -1:
112 | agent1_cross = -1
113 | if agent1_cross > 1:
114 | agent1_cross = 1
115 | agent1_angle = math.acos(agent1_cross)
116 |
117 | agent2_cross = (-delta_pos[0]*agent2.state.p_vel[0]-delta_pos[1]*agent2.state.p_vel[1])/(distance)
118 | if agent2_cross < -1:
119 | agent2_cross = -1
120 | if agent2_cross > 1:
121 | agent2_cross = 1
122 | agent2_angle = math.acos(agent2_cross)
123 |
124 | if distance < fire_range and agent2_angle*180/math.pi>defense_angle and agent1_angle*180/math.pidefense_angle:
127 | return True,2
128 | else:
129 | return False,0
130 |
131 | # return all agents that are not adversaries
132 | def good_agents(self, world):
133 | return [agent for agent in world.agents if not agent.adversary]
134 |
135 | # return all adversarial agents
136 | def adversaries(self, world):
137 | return [agent for agent in world.agents if agent.adversary]
138 |
139 |
140 | def reward(self, agent, world):
141 | # Agents are rewarded based on minimum agent distance to each landmark
142 | main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
143 | return main_reward
144 |
145 | def agent_reward(self, agent, world):
146 | ####added by liyuan
147 | if agent.death == True:
148 | return 0
149 | # Agents are negatively rewarded if caught by adversaries
150 | rew = 0
151 | #shape = False
152 | shape = True
153 | adversaries = self.adversaries(world)
154 | if shape: # reward can optionally be shaped (increased reward for increased distance from adversary)
155 | for adv in adversaries:
156 | ###changed by liyuan
157 | if adv.death == True:
158 | continue
159 | rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
160 | if agent.collide:
161 | for a in adversaries:
162 | ###changed by liyuan
163 | if self.is_collision(a, agent) and a.death == False:
164 | rew -= 10
165 | agent.death = True
166 |
167 | # agents are penalized for exiting the screen, so that they can be caught by the adversaries
168 | def bound(x):
169 | if x < 0.9:
170 | return 0
171 | if x < 1.0:
172 | return (x - 0.9) * 10
173 | return min(np.exp(2 * x - 2), 10)
174 | for p in range(world.dim_p):
175 | x = abs(agent.state.p_pos[p])
176 | rew -= bound(x)
177 |
178 | for p in range(world.dim_p):
179 | x = abs(agent.state.p_pos[p])
180 | if (x > 1.0):
181 | rew -= 5
182 | break
183 |
184 | return rew
185 |
186 | def adversary_reward(self, agent, world):
187 | ####added by liyuan
188 | if agent.death == True:
189 | return 0
190 | # Adversaries are rewarded for collisions with agents
191 | rew = 0
192 | #shape = False
193 | shape = True
194 | agents = self.good_agents(world)
195 | adversaries = self.adversaries(world)
196 |
197 | '''
198 | if shape: # reward can optionally be shaped (decreased reward for increased distance from agents)
199 | for adv in adversaries:
200 | ###rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents])
201 | if adv.death == False:
202 | dis = []
203 | for a in agents:
204 | if a.death == False:
205 | dis.append(np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))))
206 | if len(dis) > 0:
207 | rew -= 0.1 * min(dis)
208 | '''
209 | if shape:
210 | dis = []
211 | for a in agents:
212 | if a.death == False:
213 | dis.append(np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))))
214 | if len(dis) > 0:
215 | rew -= 0.1 * min(dis)
216 |
217 |
218 | if agent.collide:
219 | for ag in agents:
220 | for adv in adversaries:
221 | ###changed by liyuan
222 | if self.is_collision(ag, adv) and ag.death == False and adv.death == False:
223 | if adv is agent:
224 | rew += 50
225 | else:
226 | rew += 30
227 | break
228 |
229 |
230 | for adv in adversaries:
231 | if adv.death == False:
232 | exceed = False
233 | for p in range(world.dim_p):
234 | x = abs(adv.state.p_pos[p])
235 | if (x > 1.0):
236 | exceed = True
237 | break
238 | if exceed == True:
239 | if adv is agent:
240 | rew -= 20
241 | else:
242 | rew -=10
243 | break
244 |
245 | return rew
246 |
247 | def observation(self, agent, world):
248 | # get positions of all entities in this agent's reference frame
249 | entity_pos = []
250 | for entity in world.landmarks:
251 | if not entity.boundary:
252 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
253 | # communication of all other agents
254 | comm = []
255 | other_pos = []
256 | other_vel = []
257 | for other in world.agents:
258 | if other is agent: continue
259 | ###changed by liyuan
260 | if other.death:
261 | comm.append(np.zeros(world.dim_c))
262 | other_pos.append(np.zeros(world.dim_p))
263 | other_vel.append(np.zeros(world.dim_p))
264 | else:
265 | comm.append(other.state.c)
266 | other_pos.append(other.state.p_pos - agent.state.p_pos)
267 | #if not other.adversary:
268 | other_vel.append(other.state.p_vel)
269 |
270 | #comm.append(other.state.c)
271 | #other_pos.append(other.state.p_pos - agent.state.p_pos)
272 | #if not other.adversary:
273 | #other_vel.append(other.state.p_vel)
274 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
275 |
276 | ##added by liyuan: if all green nodes die, this epsoid is over.
277 | def done(self, agent, world):
278 | allDie = True
279 | agents = self.good_agents(world)
280 | for agent in agents:
281 | if agent.death == False:
282 | allDie = False
283 | break
284 | return allDie
285 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # add agents
9 | world.agents = [Agent() for i in range(1)]
10 | for i, agent in enumerate(world.agents):
11 | agent.name = 'agent %d' % i
12 | agent.collide = False
13 | agent.silent = True
14 | # add landmarks
15 | world.landmarks = [Landmark() for i in range(1)]
16 | for i, landmark in enumerate(world.landmarks):
17 | landmark.name = 'landmark %d' % i
18 | landmark.collide = False
19 | landmark.movable = False
20 | # make initial conditions
21 | self.reset_world(world)
22 | return world
23 |
24 | def reset_world(self, world):
25 | # random properties for agents
26 | for i, agent in enumerate(world.agents):
27 | agent.color = np.array([0.25,0.25,0.25])
28 | # random properties for landmarks
29 | for i, landmark in enumerate(world.landmarks):
30 | landmark.color = np.array([0.75,0.75,0.75])
31 | world.landmarks[0].color = np.array([0.75,0.25,0.25])
32 | # set random initial states
33 | for agent in world.agents:
34 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
35 | agent.state.p_vel = np.zeros(world.dim_p)
36 | agent.state.c = np.zeros(world.dim_c)
37 | for i, landmark in enumerate(world.landmarks):
38 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
39 | landmark.state.p_vel = np.zeros(world.dim_p)
40 |
41 | def reward(self, agent, world):
42 | dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
43 | return -dist2
44 |
45 | def observation(self, agent, world):
46 | # get positions of all entities in this agent's reference frame
47 | entity_pos = []
48 | for entity in world.landmarks:
49 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
50 | return np.concatenate([agent.state.p_vel] + entity_pos)
51 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_adversary.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 |
8 | def make_world(self):
9 | world = World()
10 | # set any world properties first
11 | world.dim_c = 2
12 | num_agents = 3
13 | world.num_agents = num_agents
14 | num_adversaries = 1
15 | num_landmarks = num_agents - 1
16 | # add agents
17 | world.agents = [Agent() for i in range(num_agents)]
18 | for i, agent in enumerate(world.agents):
19 | agent.name = 'agent %d' % i
20 | agent.collide = False
21 | agent.silent = True
22 | agent.adversary = True if i < num_adversaries else False
23 | agent.size = 0.15
24 | # add landmarks
25 | world.landmarks = [Landmark() for i in range(num_landmarks)]
26 | for i, landmark in enumerate(world.landmarks):
27 | landmark.name = 'landmark %d' % i
28 | landmark.collide = False
29 | landmark.movable = False
30 | landmark.size = 0.08
31 | # make initial conditions
32 | self.reset_world(world)
33 | return world
34 |
35 | def reset_world(self, world):
36 | # random properties for agents
37 | world.agents[0].color = np.array([0.85, 0.35, 0.35])
38 | for i in range(1, world.num_agents):
39 | world.agents[i].color = np.array([0.35, 0.35, 0.85])
40 | # random properties for landmarks
41 | for i, landmark in enumerate(world.landmarks):
42 | landmark.color = np.array([0.15, 0.15, 0.15])
43 | # set goal landmark
44 | goal = np.random.choice(world.landmarks)
45 | goal.color = np.array([0.15, 0.65, 0.15])
46 | for agent in world.agents:
47 | agent.goal_a = goal
48 | # set random initial states
49 | for agent in world.agents:
50 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
51 | agent.state.p_vel = np.zeros(world.dim_p)
52 | agent.state.c = np.zeros(world.dim_c)
53 | for i, landmark in enumerate(world.landmarks):
54 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
55 | landmark.state.p_vel = np.zeros(world.dim_p)
56 |
57 | def benchmark_data(self, agent, world):
58 | # returns data for benchmarking purposes
59 | if agent.adversary:
60 | return np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
61 | else:
62 | dists = []
63 | for l in world.landmarks:
64 | dists.append(np.sum(np.square(agent.state.p_pos - l.state.p_pos)))
65 | dists.append(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
66 | return tuple(dists)
67 |
68 | # return all agents that are not adversaries
69 | def good_agents(self, world):
70 | return [agent for agent in world.agents if not agent.adversary]
71 |
72 | # return all adversarial agents
73 | def adversaries(self, world):
74 | return [agent for agent in world.agents if agent.adversary]
75 |
76 | def reward(self, agent, world):
77 | # Agents are rewarded based on minimum agent distance to each landmark
78 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
79 |
80 | def agent_reward(self, agent, world):
81 | # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
82 | shaped_reward = True
83 | shaped_adv_reward = True
84 |
85 | # Calculate negative reward for adversary
86 | adversary_agents = self.adversaries(world)
87 | if shaped_adv_reward: # distance-based adversary reward
88 | adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
89 | else: # proximity-based adversary reward (binary)
90 | adv_rew = 0
91 | for a in adversary_agents:
92 | if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
93 | adv_rew -= 5
94 |
95 | # Calculate positive reward for agents
96 | good_agents = self.good_agents(world)
97 | if shaped_reward: # distance-based agent reward
98 | pos_rew = -min(
99 | [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
100 | else: # proximity-based agent reward (binary)
101 | pos_rew = 0
102 | if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \
103 | < 2 * agent.goal_a.size:
104 | pos_rew += 5
105 | pos_rew -= min(
106 | [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
107 | return pos_rew + adv_rew
108 |
109 | def adversary_reward(self, agent, world):
110 | # Rewarded based on proximity to the goal landmark
111 | shaped_reward = True
112 | if shaped_reward: # distance-based reward
113 | return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
114 | else: # proximity-based reward (binary)
115 | adv_rew = 0
116 | if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size:
117 | adv_rew += 5
118 | return adv_rew
119 |
120 |
121 | def observation(self, agent, world):
122 | # get positions of all entities in this agent's reference frame
123 | entity_pos = []
124 | for entity in world.landmarks:
125 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
126 | # entity colors
127 | entity_color = []
128 | for entity in world.landmarks:
129 | entity_color.append(entity.color)
130 | # communication of all other agents
131 | other_pos = []
132 | for other in world.agents:
133 | if other is agent: continue
134 | other_pos.append(other.state.p_pos - agent.state.p_pos)
135 |
136 | if not agent.adversary:
137 | return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos)
138 | else:
139 | return np.concatenate(entity_pos + other_pos)
140 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_crypto.py:
--------------------------------------------------------------------------------
1 | """
2 | Scenario:
3 | 1 speaker, 2 listeners (one of which is an adversary). Good agents rewarded for proximity to goal, and distance from
4 | adversary to goal. Adversary is rewarded for its distance to the goal.
5 | """
6 |
7 |
8 | import numpy as np
9 | from multiagent.core import World, Agent, Landmark
10 | from multiagent.scenario import BaseScenario
11 | import random
12 |
13 |
14 | class CryptoAgent(Agent):
15 | def __init__(self):
16 | super(CryptoAgent, self).__init__()
17 | self.key = None
18 |
19 | class Scenario(BaseScenario):
20 |
21 | def make_world(self):
22 | world = World()
23 | # set any world properties first
24 | num_agents = 3
25 | num_adversaries = 1
26 | num_landmarks = 2
27 | world.dim_c = 4
28 | # add agents
29 | world.agents = [CryptoAgent() for i in range(num_agents)]
30 | for i, agent in enumerate(world.agents):
31 | agent.name = 'agent %d' % i
32 | agent.collide = False
33 | agent.adversary = True if i < num_adversaries else False
34 | agent.speaker = True if i == 2 else False
35 | agent.movable = False
36 | # add landmarks
37 | world.landmarks = [Landmark() for i in range(num_landmarks)]
38 | for i, landmark in enumerate(world.landmarks):
39 | landmark.name = 'landmark %d' % i
40 | landmark.collide = False
41 | landmark.movable = False
42 | # make initial conditions
43 | self.reset_world(world)
44 | return world
45 |
46 |
47 | def reset_world(self, world):
48 | # random properties for agents
49 | for i, agent in enumerate(world.agents):
50 | agent.color = np.array([0.25, 0.25, 0.25])
51 | if agent.adversary:
52 | agent.color = np.array([0.75, 0.25, 0.25])
53 | agent.key = None
54 | # random properties for landmarks
55 | color_list = [np.zeros(world.dim_c) for i in world.landmarks]
56 | for i, color in enumerate(color_list):
57 | color[i] += 1
58 | for color, landmark in zip(color_list, world.landmarks):
59 | landmark.color = color
60 | # set goal landmark
61 | goal = np.random.choice(world.landmarks)
62 | world.agents[1].color = goal.color
63 | world.agents[2].key = np.random.choice(world.landmarks).color
64 |
65 | for agent in world.agents:
66 | agent.goal_a = goal
67 |
68 | # set random initial states
69 | for agent in world.agents:
70 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
71 | agent.state.p_vel = np.zeros(world.dim_p)
72 | agent.state.c = np.zeros(world.dim_c)
73 | for i, landmark in enumerate(world.landmarks):
74 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
75 | landmark.state.p_vel = np.zeros(world.dim_p)
76 |
77 |
78 | def benchmark_data(self, agent, world):
79 | # returns data for benchmarking purposes
80 | return (agent.state.c, agent.goal_a.color)
81 |
82 | # return all agents that are not adversaries
83 | def good_listeners(self, world):
84 | return [agent for agent in world.agents if not agent.adversary and not agent.speaker]
85 |
86 | # return all agents that are not adversaries
87 | def good_agents(self, world):
88 | return [agent for agent in world.agents if not agent.adversary]
89 |
90 | # return all adversarial agents
91 | def adversaries(self, world):
92 | return [agent for agent in world.agents if agent.adversary]
93 |
94 | def reward(self, agent, world):
95 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
96 |
97 | def agent_reward(self, agent, world):
98 | # Agents rewarded if Bob can reconstruct message, but adversary (Eve) cannot
99 | good_listeners = self.good_listeners(world)
100 | adversaries = self.adversaries(world)
101 | good_rew = 0
102 | adv_rew = 0
103 | for a in good_listeners:
104 | if (a.state.c == np.zeros(world.dim_c)).all():
105 | continue
106 | else:
107 | good_rew -= np.sum(np.square(a.state.c - agent.goal_a.color))
108 | for a in adversaries:
109 | if (a.state.c == np.zeros(world.dim_c)).all():
110 | continue
111 | else:
112 | adv_l1 = np.sum(np.square(a.state.c - agent.goal_a.color))
113 | adv_rew += adv_l1
114 | return adv_rew + good_rew
115 |
116 | def adversary_reward(self, agent, world):
117 | # Adversary (Eve) is rewarded if it can reconstruct original goal
118 | rew = 0
119 | if not (agent.state.c == np.zeros(world.dim_c)).all():
120 | rew -= np.sum(np.square(agent.state.c - agent.goal_a.color))
121 | return rew
122 |
123 |
124 | def observation(self, agent, world):
125 | # goal color
126 | goal_color = np.zeros(world.dim_color)
127 | if agent.goal_a is not None:
128 | goal_color = agent.goal_a.color
129 |
130 | # get positions of all entities in this agent's reference frame
131 | entity_pos = []
132 | for entity in world.landmarks:
133 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
134 | # communication of all other agents
135 | comm = []
136 | for other in world.agents:
137 | if other is agent or (other.state.c is None) or not other.speaker: continue
138 | comm.append(other.state.c)
139 |
140 | confer = np.array([0])
141 |
142 | if world.agents[2].key is None:
143 | confer = np.array([1])
144 | key = np.zeros(world.dim_c)
145 | goal_color = np.zeros(world.dim_c)
146 | else:
147 | key = world.agents[2].key
148 |
149 | prnt = False
150 | # speaker
151 | if agent.speaker:
152 | if prnt:
153 | print('speaker')
154 | print(agent.state.c)
155 | print(np.concatenate([goal_color] + [key] + [confer] + [np.random.randn(1)]))
156 | return np.concatenate([goal_color] + [key])
157 | # listener
158 | if not agent.speaker and not agent.adversary:
159 | if prnt:
160 | print('listener')
161 | print(agent.state.c)
162 | print(np.concatenate([key] + comm + [confer]))
163 | return np.concatenate([key] + comm)
164 | if not agent.speaker and agent.adversary:
165 | if prnt:
166 | print('adversary')
167 | print(agent.state.c)
168 | print(np.concatenate(comm + [confer]))
169 | return np.concatenate(comm)
170 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_push.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 2
10 | num_agents = 2
11 | num_adversaries = 1
12 | num_landmarks = 2
13 | # add agents
14 | world.agents = [Agent() for i in range(num_agents)]
15 | for i, agent in enumerate(world.agents):
16 | agent.name = 'agent %d' % i
17 | agent.collide = True
18 | agent.silent = True
19 | if i < num_adversaries:
20 | agent.adversary = True
21 | else:
22 | agent.adversary = False
23 | # add landmarks
24 | world.landmarks = [Landmark() for i in range(num_landmarks)]
25 | for i, landmark in enumerate(world.landmarks):
26 | landmark.name = 'landmark %d' % i
27 | landmark.collide = False
28 | landmark.movable = False
29 | # make initial conditions
30 | self.reset_world(world)
31 | return world
32 |
33 | def reset_world(self, world):
34 | # random properties for landmarks
35 | for i, landmark in enumerate(world.landmarks):
36 | landmark.color = np.array([0.1, 0.1, 0.1])
37 | landmark.color[i + 1] += 0.8
38 | landmark.index = i
39 | # set goal landmark
40 | goal = np.random.choice(world.landmarks)
41 | for i, agent in enumerate(world.agents):
42 | agent.goal_a = goal
43 | agent.color = np.array([0.25, 0.25, 0.25])
44 | if agent.adversary:
45 | agent.color = np.array([0.75, 0.25, 0.25])
46 | else:
47 | j = goal.index
48 | agent.color[j + 1] += 0.5
49 | # set random initial states
50 | for agent in world.agents:
51 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
52 | agent.state.p_vel = np.zeros(world.dim_p)
53 | agent.state.c = np.zeros(world.dim_c)
54 | for i, landmark in enumerate(world.landmarks):
55 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
56 | landmark.state.p_vel = np.zeros(world.dim_p)
57 |
58 | def reward(self, agent, world):
59 | # Agents are rewarded based on minimum agent distance to each landmark
60 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
61 |
62 | def agent_reward(self, agent, world):
63 | # the distance to the goal
64 | return -np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
65 |
66 | def adversary_reward(self, agent, world):
67 | # keep the nearest good agents away from the goal
68 | agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary]
69 | pos_rew = min(agent_dist)
70 | #nearest_agent = world.good_agents[np.argmin(agent_dist)]
71 | #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos)))
72 | neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos)))
73 | #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents])
74 | return pos_rew - neg_rew
75 |
76 | def observation(self, agent, world):
77 | # get positions of all entities in this agent's reference frame
78 | entity_pos = []
79 | for entity in world.landmarks: # world.entities:
80 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
81 | # entity colors
82 | entity_color = []
83 | for entity in world.landmarks: # world.entities:
84 | entity_color.append(entity.color)
85 | # communication of all other agents
86 | comm = []
87 | other_pos = []
88 | for other in world.agents:
89 | if other is agent: continue
90 | comm.append(other.state.c)
91 | other_pos.append(other.state.p_pos - agent.state.p_pos)
92 | if not agent.adversary:
93 | return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos)
94 | else:
95 | #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos # randomize position of other agents in adversary network
96 | return np.concatenate([agent.state.p_vel] + entity_pos + other_pos)
97 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_reference.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 10
10 | world.collaborative = True # whether agents share rewards
11 | # add agents
12 | world.agents = [Agent() for i in range(2)]
13 | for i, agent in enumerate(world.agents):
14 | agent.name = 'agent %d' % i
15 | agent.collide = False
16 | # add landmarks
17 | world.landmarks = [Landmark() for i in range(3)]
18 | for i, landmark in enumerate(world.landmarks):
19 | landmark.name = 'landmark %d' % i
20 | landmark.collide = False
21 | landmark.movable = False
22 | # make initial conditions
23 | self.reset_world(world)
24 | return world
25 |
26 | def reset_world(self, world):
27 | # assign goals to agents
28 | for agent in world.agents:
29 | agent.goal_a = None
30 | agent.goal_b = None
31 | # want other agent to go to the goal landmark
32 | world.agents[0].goal_a = world.agents[1]
33 | world.agents[0].goal_b = np.random.choice(world.landmarks)
34 | world.agents[1].goal_a = world.agents[0]
35 | world.agents[1].goal_b = np.random.choice(world.landmarks)
36 | # random properties for agents
37 | for i, agent in enumerate(world.agents):
38 | agent.color = np.array([0.25,0.25,0.25])
39 | # random properties for landmarks
40 | world.landmarks[0].color = np.array([0.75,0.25,0.25])
41 | world.landmarks[1].color = np.array([0.25,0.75,0.25])
42 | world.landmarks[2].color = np.array([0.25,0.25,0.75])
43 | # special colors for goals
44 | world.agents[0].goal_a.color = world.agents[0].goal_b.color
45 | world.agents[1].goal_a.color = world.agents[1].goal_b.color
46 | # set random initial states
47 | for agent in world.agents:
48 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
49 | agent.state.p_vel = np.zeros(world.dim_p)
50 | agent.state.c = np.zeros(world.dim_c)
51 | for i, landmark in enumerate(world.landmarks):
52 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
53 | landmark.state.p_vel = np.zeros(world.dim_p)
54 |
55 | def reward(self, agent, world):
56 | if agent.goal_a is None or agent.goal_b is None:
57 | return 0.0
58 | dist2 = np.sum(np.square(agent.goal_a.state.p_pos - agent.goal_b.state.p_pos))
59 | return -dist2
60 |
61 | def observation(self, agent, world):
62 | # goal color
63 | goal_color = [np.zeros(world.dim_color), np.zeros(world.dim_color)]
64 | if agent.goal_b is not None:
65 | goal_color[1] = agent.goal_b.color
66 |
67 | # get positions of all entities in this agent's reference frame
68 | entity_pos = []
69 | for entity in world.landmarks:
70 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
71 | # entity colors
72 | entity_color = []
73 | for entity in world.landmarks:
74 | entity_color.append(entity.color)
75 | # communication of all other agents
76 | comm = []
77 | for other in world.agents:
78 | if other is agent: continue
79 | comm.append(other.state.c)
80 | return np.concatenate([agent.state.p_vel] + entity_pos + [goal_color[1]] + comm)
81 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_speaker_listener.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 3
10 | num_landmarks = 3
11 | world.collaborative = True
12 | # add agents
13 | world.agents = [Agent() for i in range(2)]
14 | for i, agent in enumerate(world.agents):
15 | agent.name = 'agent %d' % i
16 | agent.collide = False
17 | agent.size = 0.075
18 | # speaker
19 | world.agents[0].movable = False
20 | # listener
21 | world.agents[1].silent = True
22 | # add landmarks
23 | world.landmarks = [Landmark() for i in range(num_landmarks)]
24 | for i, landmark in enumerate(world.landmarks):
25 | landmark.name = 'landmark %d' % i
26 | landmark.collide = False
27 | landmark.movable = False
28 | landmark.size = 0.04
29 | # make initial conditions
30 | self.reset_world(world)
31 | return world
32 |
33 | def reset_world(self, world):
34 | # assign goals to agents
35 | for agent in world.agents:
36 | agent.goal_a = None
37 | agent.goal_b = None
38 | # want listener to go to the goal landmark
39 | world.agents[0].goal_a = world.agents[1]
40 | world.agents[0].goal_b = np.random.choice(world.landmarks)
41 | # random properties for agents
42 | for i, agent in enumerate(world.agents):
43 | agent.color = np.array([0.25,0.25,0.25])
44 | # random properties for landmarks
45 | world.landmarks[0].color = np.array([0.65,0.15,0.15])
46 | world.landmarks[1].color = np.array([0.15,0.65,0.15])
47 | world.landmarks[2].color = np.array([0.15,0.15,0.65])
48 | # special colors for goals
49 | world.agents[0].goal_a.color = world.agents[0].goal_b.color + np.array([0.45, 0.45, 0.45])
50 | # set random initial states
51 | for agent in world.agents:
52 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
53 | agent.state.p_vel = np.zeros(world.dim_p)
54 | agent.state.c = np.zeros(world.dim_c)
55 | for i, landmark in enumerate(world.landmarks):
56 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
57 | landmark.state.p_vel = np.zeros(world.dim_p)
58 |
59 | def benchmark_data(self, agent, world):
60 | # returns data for benchmarking purposes
61 | return self.reward(agent, reward)
62 |
63 | def reward(self, agent, world):
64 | # squared distance from listener to landmark
65 | a = world.agents[0]
66 | dist2 = np.sum(np.square(a.goal_a.state.p_pos - a.goal_b.state.p_pos))
67 | return -dist2
68 |
69 | def observation(self, agent, world):
70 | # goal color
71 | goal_color = np.zeros(world.dim_color)
72 | if agent.goal_b is not None:
73 | goal_color = agent.goal_b.color
74 |
75 | # get positions of all entities in this agent's reference frame
76 | entity_pos = []
77 | for entity in world.landmarks:
78 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
79 |
80 | # communication of all other agents
81 | comm = []
82 | for other in world.agents:
83 | if other is agent or (other.state.c is None): continue
84 | comm.append(other.state.c)
85 |
86 | # speaker
87 | if not agent.movable:
88 | return np.concatenate([goal_color])
89 | # listener
90 | if agent.silent:
91 | return np.concatenate([agent.state.p_vel] + entity_pos + comm)
92 |
93 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_spread.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 | def make_world(self):
8 | world = World()
9 | # set any world properties first
10 | world.dim_c = 2
11 | num_agents = 3
12 | num_landmarks = 3
13 | world.collaborative = True
14 | # add agents
15 | world.agents = [Agent() for i in range(num_agents)]
16 | for i, agent in enumerate(world.agents):
17 | agent.name = 'agent %d' % i
18 | agent.collide = True
19 | agent.silent = True
20 | agent.size = 0.15
21 | # add landmarks
22 | world.landmarks = [Landmark() for i in range(num_landmarks)]
23 | for i, landmark in enumerate(world.landmarks):
24 | landmark.name = 'landmark %d' % i
25 | landmark.collide = False
26 | landmark.movable = False
27 | # make initial conditions
28 | self.reset_world(world)
29 | return world
30 |
31 | def reset_world(self, world):
32 | # random properties for agents
33 | for i, agent in enumerate(world.agents):
34 | agent.color = np.array([0.35, 0.35, 0.85])
35 | # random properties for landmarks
36 | for i, landmark in enumerate(world.landmarks):
37 | landmark.color = np.array([0.25, 0.25, 0.25])
38 | # set random initial states
39 | for agent in world.agents:
40 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
41 | agent.state.p_vel = np.zeros(world.dim_p)
42 | agent.state.c = np.zeros(world.dim_c)
43 | for i, landmark in enumerate(world.landmarks):
44 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
45 | landmark.state.p_vel = np.zeros(world.dim_p)
46 |
47 | def benchmark_data(self, agent, world):
48 | rew = 0
49 | collisions = 0
50 | occupied_landmarks = 0
51 | min_dists = 0
52 | for l in world.landmarks:
53 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
54 | min_dists += min(dists)
55 | rew -= min(dists)
56 | if min(dists) < 0.1:
57 | occupied_landmarks += 1
58 | if agent.collide:
59 | for a in world.agents:
60 | if self.is_collision(a, agent):
61 | rew -= 1
62 | collisions += 1
63 | return (rew, collisions, min_dists, occupied_landmarks)
64 |
65 |
66 | def is_collision(self, agent1, agent2):
67 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
68 | dist = np.sqrt(np.sum(np.square(delta_pos)))
69 | dist_min = agent1.size + agent2.size
70 | return True if dist < dist_min else False
71 |
72 | def reward(self, agent, world):
73 | # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
74 | rew = 0
75 | for l in world.landmarks:
76 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
77 | rew -= min(dists)
78 | if agent.collide:
79 | for a in world.agents:
80 | if self.is_collision(a, agent):
81 | rew -= 1
82 | return rew
83 |
84 | def observation(self, agent, world):
85 | # get positions of all entities in this agent's reference frame
86 | entity_pos = []
87 | for entity in world.landmarks: # world.entities:
88 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
89 | # entity colors
90 | entity_color = []
91 | for entity in world.landmarks: # world.entities:
92 | entity_color.append(entity.color)
93 | # communication of all other agents
94 | comm = []
95 | other_pos = []
96 | for other in world.agents:
97 | if other is agent: continue
98 | comm.append(other.state.c)
99 | other_pos.append(other.state.p_pos - agent.state.p_pos)
100 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm)
101 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_tag_v1.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 | def make_world(self):
8 | world = World()
9 | # set any world properties first
10 | world.dim_c = 2
11 | num_good_agents = 1
12 | num_adversaries = 2
13 | num_agents = num_adversaries + num_good_agents
14 | num_landmarks = 0
15 | # add agents
16 | world.agents = [Agent() for i in range(num_agents)]
17 | for i, agent in enumerate(world.agents):
18 | agent.name = 'agent %d' % i
19 | agent.collide = True
20 | agent.silent = True
21 | agent.adversary = True if i < num_adversaries else False
22 | agent.size = 0.075 if agent.adversary else 0.05
23 | agent.accel = 3.0 if agent.adversary else 4.0
24 | #agent.accel = 20.0 if agent.adversary else 25.0
25 | agent.max_speed = 1.0 if agent.adversary else 1.3
26 | # add landmarks
27 | world.landmarks = [Landmark() for i in range(num_landmarks)]
28 | for i, landmark in enumerate(world.landmarks):
29 | landmark.name = 'landmark %d' % i
30 | landmark.collide = True
31 | landmark.movable = False
32 | landmark.size = 0.2
33 | landmark.boundary = False
34 | # make initial conditions
35 | self.reset_world(world)
36 | return world
37 |
38 |
39 | def reset_world(self, world):
40 | # random properties for agents
41 | for i, agent in enumerate(world.agents):
42 | agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35])
43 | # random properties for landmarks
44 | for i, landmark in enumerate(world.landmarks):
45 | landmark.color = np.array([0.25, 0.25, 0.25])
46 | # set random initial states
47 | for agent in world.agents:
48 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
49 | agent.state.p_vel = np.zeros(world.dim_p)
50 | agent.state.c = np.zeros(world.dim_c)
51 | for i, landmark in enumerate(world.landmarks):
52 | if not landmark.boundary:
53 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
54 | landmark.state.p_vel = np.zeros(world.dim_p)
55 |
56 |
57 | def benchmark_data(self, agent, world):
58 | # returns data for benchmarking purposes
59 | if agent.adversary:
60 | collisions = 0
61 | for a in self.good_agents(world):
62 | if self.is_collision(a, agent):
63 | collisions += 1
64 | return collisions
65 | else:
66 | return 0
67 |
68 |
69 | def is_collision(self, agent1, agent2):
70 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
71 | dist = np.sqrt(np.sum(np.square(delta_pos)))
72 | dist_min = agent1.size + agent2.size
73 | return True if dist < dist_min else False
74 |
75 | # return all agents that are not adversaries
76 | def good_agents(self, world):
77 | return [agent for agent in world.agents if not agent.adversary]
78 |
79 | # return all adversarial agents
80 | def adversaries(self, world):
81 | return [agent for agent in world.agents if agent.adversary]
82 |
83 |
84 | def reward(self, agent, world):
85 | # Agents are rewarded based on minimum agent distance to each landmark
86 | main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
87 | return main_reward
88 |
89 | def agent_reward(self, agent, world):
90 | # Agents are negatively rewarded if caught by adversaries
91 | rew = 0
92 | #shape = False
93 | shape = True
94 | adversaries = self.adversaries(world)
95 | if shape: # reward can optionally be shaped (increased reward for increased distance from adversary)
96 | for adv in adversaries:
97 | rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
98 | if agent.collide:
99 | for a in adversaries:
100 | if self.is_collision(a, agent):
101 | rew -= 10
102 |
103 | # agents are penalized for exiting the screen, so that they can be caught by the adversaries
104 | def bound(x):
105 | if x < 0.9:
106 | return 0
107 | if x < 1.0:
108 | return (x - 0.9) * 10
109 | return min(np.exp(2 * x - 2), 10)
110 | for p in range(world.dim_p):
111 | x = abs(agent.state.p_pos[p])
112 | rew -= bound(x)
113 |
114 | return rew
115 |
116 | def adversary_reward(self, agent, world):
117 | # Adversaries are rewarded for collisions with agents
118 | rew = 0
119 | #shape = False
120 | shape = True
121 | agents = self.good_agents(world)
122 | adversaries = self.adversaries(world)
123 | if shape: # reward can optionally be shaped (decreased reward for increased distance from agents)
124 | for adv in adversaries:
125 | rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents])
126 | if agent.collide:
127 | for ag in agents:
128 | for adv in adversaries:
129 | if self.is_collision(ag, adv):
130 | rew += 10
131 | return rew
132 |
133 | def observation(self, agent, world):
134 | # get positions of all entities in this agent's reference frame
135 | entity_pos = []
136 | for entity in world.landmarks:
137 | if not entity.boundary:
138 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
139 | # communication of all other agents
140 | comm = []
141 | other_pos = []
142 | other_vel = []
143 | for other in world.agents:
144 | if other is agent: continue
145 | comm.append(other.state.c)
146 | other_pos.append(other.state.p_pos - agent.state.p_pos)
147 | if not other.adversary:
148 | other_vel.append(other.state.p_vel)
149 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
150 |
--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(name='multiagent',
4 | version='0.0.1',
5 | description='Multi-Agent Goal-Driven Communication Environment',
6 | url='https://github.com/openai/multiagent-public',
7 | author='Igor Mordatch',
8 | author_email='mordatch@openai.com',
9 | packages=find_packages(),
10 | include_package_data=True,
11 | zip_safe=False,
12 | install_requires=['gym', 'numpy-stl']
13 | )
14 |
--------------------------------------------------------------------------------
/MADDPG/reward setting:
--------------------------------------------------------------------------------
1 | red reward
2 | 1 distance -0.1*min_dis
3 | 2 红方任何人抓住一个蓝方 +10
4 | 3 红方离开屏幕,-50
5 | 4 抓住所有蓝方 +100
6 |
7 | 蓝方
8 | 1、距离 +0.1 * 距离所有红方的距离
9 | 2、被抓住 -10
10 | 3、离开屏幕 -5
--------------------------------------------------------------------------------
/MADQN/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.egg-info/
3 | *.pyc
4 | .vscode/settings.json
5 | multiagent/.DS_Store
6 | .DS_Store
7 |
--------------------------------------------------------------------------------
/MADQN/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 OpenAI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MADQN/README.md:
--------------------------------------------------------------------------------
1 | **Status:** Archive (code is provided as-is, no updates expected)
2 |
3 | # Multi-Agent Particle Environment
4 |
5 | A simple multi-agent particle world with a continuous observation and discrete action space, along with some basic simulated physics.
6 | Used in the paper [Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments](https://arxiv.org/pdf/1706.02275.pdf).
7 |
8 | ## Getting started:
9 |
10 | - To install, `cd` into the root directory and type `pip install -e .`
11 |
12 | - To interactively view moving to landmark scenario (see others in ./scenarios/):
13 | `bin/interactive.py --scenario simple.py`
14 |
15 | - Known dependencies: Python (3.5.4), OpenAI gym (0.10.5), numpy (1.14.5)
16 |
17 | - To use the environments, look at the code for importing them in `make_env.py`.
18 |
19 |
--------------------------------------------------------------------------------
/MADQN/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADQN/bin/__init__.py
--------------------------------------------------------------------------------
/MADQN/bin/interactive.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os,sys
3 | sys.path.insert(1, os.path.join(sys.path[0], '..'))
4 | import argparse
5 |
6 | from multiagent.environment import MultiAgentEnv
7 | from multiagent.policy import InteractivePolicy
8 | import multiagent.scenarios as scenarios
9 |
10 | if __name__ == '__main__':
11 | # parse arguments
12 | parser = argparse.ArgumentParser(description=None)
13 | parser.add_argument('-s', '--scenario', default='simple.py', help='Path of the scenario Python script.')
14 | args = parser.parse_args()
15 |
16 | # load scenario from script
17 | scenario = scenarios.load(args.scenario).Scenario()
18 | # create world
19 | world = scenario.make_world()
20 | # create multiagent environment
21 | env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer = False)
22 | # render call to create viewer window (necessary only for interactive policies)
23 | env.render()
24 | # create interactive policies for each agent
25 | policies = [InteractivePolicy(env,i) for i in range(env.n)]
26 | # execution loop
27 | obs_n = env.reset()
28 | while True:
29 | # query for action from each agent's policy
30 | act_n = []
31 | for i, policy in enumerate(policies):
32 | act_n.append(policy.action(obs_n[i]))
33 | # step environment
34 | obs_n, reward_n, done_n, _ = env.step(act_n)
35 | # render all agent views
36 | env.render()
37 | # display rewards
38 | #for agent in env.world.agents:
39 | # print(agent.name + " reward: %0.3f" % env._get_reward(agent))
40 |
--------------------------------------------------------------------------------
/MADQN/dqn.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | from keras.models import Sequential
4 | from keras.layers import Dense
5 | from keras.optimizers import Adam
6 |
7 |
8 | class DQN:
9 | def __init__(self, n_actions, state_size, gamma=0.9, learning_rate=0.001,
10 | eps_greedy=0.5, eps_increment=1e-5, replace_target_freq=2000):
11 | self.n_actions = n_actions
12 | self.state_size = state_size
13 | self.gamma = gamma
14 | self.learning_rate = learning_rate
15 | self.eps_greedy = eps_greedy
16 | self.eps_increment = eps_increment
17 | self.learning_step = 0
18 | self.replace_target_freq = replace_target_freq
19 | self.eval_network = self.build_network()
20 | self.target_network = self.build_network()
21 | self.update_target_weights()
22 |
23 | def build_network(self):
24 | model = Sequential()
25 | model.add(Dense(50, input_dim=self.state_size, activation='relu'))
26 | model.add(Dense(50, activation='relu'))
27 | model.add(Dense(self.n_actions, activation='linear'))
28 | model.compile(loss='mse', optimizer=Adam(self.learning_rate))
29 |
30 | return model
31 |
32 | def update_target_weights(self):
33 | self.target_network.set_weights(self.eval_network.get_weights())
34 |
35 | def choose_action(self, state):
36 | p = np.random.random()
37 | if p < self.eps_greedy:
38 | action_probs = self.eval_network.predict(state[np.newaxis, :])
39 | return np.argmax(action_probs[0])
40 | else:
41 | return random.randrange(self.n_actions)
42 |
43 | def learn(self, states, actions, rewards, states_next, done):
44 | if self.learning_step % self.replace_target_freq == 0:
45 | self.update_target_weights()
46 |
47 | rows = np.arange(done.shape[0])
48 | not_done = np.logical_not(done)
49 |
50 | eval_next = self.eval_network.predict(states_next)
51 | target_next = self.target_network.predict(states_next)
52 | discounted_rewards = self.gamma * \
53 | target_next[rows, np.argmax(eval_next, axis=1)]
54 |
55 | y = self.eval_network.predict(states)
56 | y[rows, actions] = rewards
57 | y[not_done, actions[not_done]] += discounted_rewards[not_done]
58 |
59 | history = self.eval_network.fit(states, y, epochs=1, verbose=0)
60 | self.learning_step += 1
61 | if self.eps_greedy < 0.9:
62 | self.eps_greedy += self.eps_increment
63 |
64 | return history
65 |
66 | def load(self, name):
67 | self.eval_network.load_weights(name)
68 | self.update_target_weights()
69 |
70 | def save(self, name):
71 | self.eval_network.save_weights(name)
72 |
--------------------------------------------------------------------------------
/MADQN/dqn_tag.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 | import tensorflow as tf
4 | import argparse
5 | import itertools
6 | import time
7 | import os
8 | import pickle
9 | import code
10 | import random
11 |
12 | from dqn import DQN
13 | from memory import Memory
14 | from make_env import make_env
15 | import general_utilities
16 | import simple_tag_utilities
17 |
18 |
19 | def play(episodes, is_render, is_testing, checkpoint_interval,
20 | weights_filename_prefix, csv_filename_prefix, batch_size,env):
21 | # init statistics. NOTE: simple tag specific!
22 | statistics_header = ["episode"]
23 | statistics_header.append("steps")
24 | statistics_header.extend(["reward_{}".format(i) for i in range(env.n)])
25 | statistics_header.extend(["loss_{}".format(i) for i in range(env.n)])
26 | statistics_header.extend(["eps_greedy_{}".format(i) for i in range(env.n)])
27 | statistics_header.extend(["collisions_{}".format(i) for i in range(env.n)])
28 | print("Collecting statistics {}:".format(" ".join(statistics_header)))
29 | statistics = general_utilities.Time_Series_Statistics_Store(
30 | statistics_header)
31 |
32 | for episode in range(args.episodes):
33 | states = env.reset()
34 | episode_losses = np.zeros(env.n)
35 | episode_rewards = np.zeros(env.n)
36 | collision_count = np.zeros(env.n)
37 | steps = 0
38 |
39 | while True:
40 | steps += 1
41 | mysteps=steps
42 | #steps
43 | terminal = (mysteps >= 50)
44 |
45 | # render
46 | if args.render:
47 | env.render()
48 |
49 | # act
50 | actions = []
51 | actions_onehot = []
52 | for i in range(env.n):
53 | action = dqns[i].choose_action(states[i])
54 | speed = 0.9 if env.agents[i].adversary else 1
55 |
56 | onehot_action = np.zeros(n_actions[i])
57 | onehot_action[action] = speed
58 | actions_onehot.append(onehot_action)
59 | actions.append(action)
60 |
61 | # step
62 | states_next, rewards, done, info = env.step(actions_onehot)
63 |
64 | # learn
65 | if not args.testing:
66 | size = memories[0].pointer
67 | batch = random.sample(range(size), size) if size < batch_size else random.sample(
68 | range(size), batch_size)
69 |
70 | for i in range(env.n):
71 | if done[i]:
72 | rewards[i] -= 50
73 |
74 | memories[i].remember(states[i], actions[i],
75 | rewards[i], states_next[i], done[i])
76 |
77 | if memories[i].pointer > batch_size * 10:
78 | history = dqns[i].learn(*memories[i].sample(batch))
79 | episode_losses[i] += history.history["loss"][0]
80 | else:
81 | episode_losses[i] = -1
82 |
83 | states = states_next
84 | episode_rewards += rewards
85 | collision_count += np.array(
86 | simple_tag_utilities.count_agent_collisions(env))
87 |
88 | # reset states if done
89 | if (any(done) or terminal):
90 | mysteps=0
91 | episode_rewards = episode_rewards / steps
92 | episode_losses = episode_losses / steps
93 |
94 | statistic = [episode]
95 | statistic.append(steps)
96 | statistic.extend([episode_rewards[i] for i in range(env.n)])
97 | statistic.extend([episode_losses[i] for i in range(env.n)])
98 | statistic.extend([dqns[i].eps_greedy for i in range(env.n)])
99 | statistic.extend(collision_count.tolist())
100 | statistics.add_statistics(statistic)
101 |
102 | if episode % 25 == 0:
103 | print(statistics.summarize_last())
104 | break
105 |
106 | if episode % checkpoint_interval == 0:
107 | statistics.dump("{}_{}.csv".format(csv_filename_prefix,
108 | episode))
109 | general_utilities.save_dqn_weights(dqns,
110 | "{}_{}_".format(weights_filename_prefix, episode))
111 | if episode >= checkpoint_interval:
112 | os.remove("{}_{}.csv".format(csv_filename_prefix,
113 | episode - checkpoint_interval))
114 |
115 | return statistics
116 |
117 |
118 | if __name__ == '__main__':
119 | print ("Have enter main")
120 | parser = argparse.ArgumentParser()
121 | parser.add_argument('--env', default='simple_tag_guided', type=str)
122 | parser.add_argument('--learning_rate', default=0.001, type=float)
123 | parser.add_argument('--episodes', default=60000, type=int)
124 | parser.add_argument('--render', default=False, action="store_true")
125 | parser.add_argument('--benchmark', default=False, action="store_true")
126 | parser.add_argument('--experiment_prefix', default=".",
127 | help="directory to store all experiment data")
128 | parser.add_argument('--weights_filename_prefix', default='/save/tag-dqn',
129 | help="where to store/load network weights")
130 | parser.add_argument('--csv_filename_prefix', default='/save/statistics-dqn',
131 | help="where to store statistics")
132 | parser.add_argument('--checkpoint_frequency', default=500,
133 | help="how often to checkpoint")
134 | parser.add_argument('--testing', default=False, action="store_true",
135 | help="reduces exploration substantially")
136 | parser.add_argument('--random_seed', default=2, type=int)
137 | parser.add_argument('--memory_size', default=10000, type=int)
138 | parser.add_argument('--batch_size', default=128, type=int)
139 | parser.add_argument('--epsilon_greedy', nargs='+', type=float,
140 | help="Epsilon greedy parameter for each agent")
141 | args = parser.parse_args()
142 |
143 | general_utilities.dump_dict_as_json(vars(args),
144 | args.experiment_prefix + "/save/run_parameters.json")
145 | # init env
146 | print ("Have init env")
147 | env = make_env(args.env, args.benchmark)
148 |
149 | if args.epsilon_greedy is not None:
150 | if len(args.epsilon_greedy) == env.n:
151 | epsilon_greedy = args.epsilon_greedy
152 | else:
153 | raise ValueError("Must have enough epsilon_greedy for all agents")
154 | else:
155 | epsilon_greedy = [0.5 for i in range(env.n)]
156 |
157 | # set random seed
158 | print ("Have set seed")
159 | env.seed(args.random_seed)
160 | random.seed(args.random_seed)
161 | np.random.seed(args.random_seed)
162 | tf.set_random_seed(args.random_seed)
163 |
164 | # init DQNs
165 | print ("Have init DQN")
166 | n_actions = [env.action_space[i].n for i in range(env.n)]
167 | state_sizes = [env.observation_space[i].shape[0] for i in range(env.n)]
168 | memories = [Memory(args.memory_size) for i in range(env.n)]
169 | dqns = [DQN(n_actions[i], state_sizes[i], eps_greedy=epsilon_greedy[i])
170 | for i in range(env.n)]
171 |
172 | general_utilities.load_dqn_weights_if_exist(
173 | dqns, args.experiment_prefix + args.weights_filename_prefix)
174 |
175 | start_time = time.time()
176 |
177 | # play
178 | print ("Have play")
179 | statistics = play(args.episodes, args.render, args.testing,
180 | args.checkpoint_frequency,
181 | args.experiment_prefix + args.weights_filename_prefix,
182 | args.experiment_prefix + args.csv_filename_prefix,
183 | args.batch_size,env)
184 |
185 | # bookkeeping
186 | print("Finished {} episodes in {} seconds".format(args.episodes,
187 | time.time() - start_time))
188 | general_utilities.save_dqn_weights(
189 | dqns, args.experiment_prefix + args.weights_filename_prefix)
190 | statistics.dump(args.experiment_prefix + args.csv_filename_prefix + ".csv")
191 |
--------------------------------------------------------------------------------
/MADQN/make_env.py:
--------------------------------------------------------------------------------
1 | """
2 | Code for creating a multiagent environment with one of the scenarios listed
3 | in ./scenarios/.
4 | Can be called by using, for example:
5 | env = make_env('simple_speaker_listener')
6 | After producing the env object, can be used similarly to an OpenAI gym
7 | environment.
8 |
9 | A policy using this environment must output actions in the form of a list
10 | for all agents. Each element of the list should be a numpy array,
11 | of size (env.world.dim_p + env.world.dim_c, 1). Physical actions precede
12 | communication actions in this array. See environment.py for more details.
13 | """
14 |
15 | def make_env(scenario_name, benchmark=False):
16 | '''
17 | Creates a MultiAgentEnv object as env. This can be used similar to a gym
18 | environment by calling env.reset() and env.step().
19 | Use env.render() to view the environment on the screen.
20 |
21 | Input:
22 | scenario_name : name of the scenario from ./scenarios/ to be Returns
23 | (without the .py extension)
24 | benchmark : whether you want to produce benchmarking data
25 | (usually only done during evaluation)
26 |
27 | Some useful env properties (see environment.py):
28 | .observation_space : Returns the observation space for each agent
29 | .action_space : Returns the action space for each agent
30 | .n : Returns the number of Agents
31 | '''
32 | from multiagent.environment import MultiAgentEnv
33 | import multiagent.scenarios as scenarios
34 |
35 | # load scenario from script
36 | scenario = scenarios.load(scenario_name + ".py").Scenario()
37 | # create world
38 | world = scenario.make_world()
39 | # create multiagent environment
40 | if benchmark:
41 | env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data)
42 | else:
43 | env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation)# done_callback=scenario.done)
44 | return env
45 |
--------------------------------------------------------------------------------
/MADQN/multiagent/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 |
3 | # Multiagent envs
4 | # ----------------------------------------
5 |
6 | register(
7 | id='MultiagentSimple-v0',
8 | entry_point='multiagent.envs:SimpleEnv',
9 | # FIXME(cathywu) currently has to be exactly max_path_length parameters in
10 | # rllab run script
11 | max_episode_steps=100,
12 | )
13 |
14 | register(
15 | id='MultiagentSimpleSpeakerListener-v0',
16 | entry_point='multiagent.envs:SimpleSpeakerListenerEnv',
17 | max_episode_steps=100,
18 | )
19 |
--------------------------------------------------------------------------------
/MADQN/multiagent/core.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # physical/external base state of all entites
4 | class EntityState(object):
5 | def __init__(self):
6 | # physical position
7 | self.p_pos = None
8 | # physical velocity
9 | self.p_vel = None
10 |
11 | # state of agents (including communication and internal/mental state)
12 | class AgentState(EntityState):
13 | def __init__(self):
14 | super(AgentState, self).__init__()
15 | # communication utterance
16 | self.c = None
17 |
18 | # action of the agent
19 | class Action(object):
20 | def __init__(self):
21 | # physical action
22 | self.u = None
23 | # communication action
24 | self.c = None
25 |
26 | # properties and state of physical world entity
27 | class Entity(object):
28 | def __init__(self):
29 | # name
30 | self.name = ''
31 | # properties:
32 | self.size = 0.050
33 | # entity can move / be pushed
34 | self.movable = False
35 | # entity collides with others
36 | self.collide = True
37 | # material density (affects mass)
38 | self.density = 25.0
39 | # color
40 | self.color = None
41 | # max speed and accel
42 | self.max_speed = None
43 | self.accel = None
44 | # state
45 | self.state = EntityState()
46 | # mass
47 | self.initial_mass = 1.0
48 |
49 | @property
50 | def mass(self):
51 | return self.initial_mass
52 |
53 | # properties of landmark entities
54 | class Landmark(Entity):
55 | def __init__(self):
56 | super(Landmark, self).__init__()
57 |
58 | # properties of agent entities
59 | class Agent(Entity):
60 | def __init__(self):
61 | super(Agent, self).__init__()
62 | # agents are movable by default
63 | self.movable = True
64 | # cannot send communication signals
65 | self.silent = False
66 | # cannot observe the world
67 | self.blind = False
68 | # physical motor noise amount
69 | self.u_noise = None
70 | # communication noise amount
71 | self.c_noise = None
72 | # control range
73 | self.u_range = 1.0
74 | # state
75 | self.state = AgentState()
76 | # action
77 | self.action = Action()
78 | # script behavior to execute
79 | self.action_callback = None
80 |
81 | # multi-agent world
82 | class World(object):
83 | def __init__(self):
84 | # list of agents and entities (can change at execution-time!)
85 | self.agents = []
86 | self.landmarks = []
87 | # communication channel dimensionality
88 | self.dim_c = 0
89 | # position dimensionality
90 | self.dim_p = 2
91 | # color dimensionality
92 | self.dim_color = 3
93 | # simulation timestep
94 | self.dt = 0.1
95 | # physical damping
96 | self.damping = 0.25
97 | # contact response parameters
98 | self.contact_force = 1e+2
99 | self.contact_margin = 1e-3
100 |
101 | # return all entities in the world
102 | @property
103 | def entities(self):
104 | return self.agents + self.landmarks
105 |
106 | # return all agents controllable by external policies
107 | @property
108 | def policy_agents(self):
109 | return [agent for agent in self.agents if agent.action_callback is None]
110 |
111 | # return all agents controlled by world scripts
112 | @property
113 | def scripted_agents(self):
114 | return [agent for agent in self.agents if agent.action_callback is not None]
115 |
116 | # update state of the world
117 | def step(self):
118 | # set actions for scripted agents
119 | for agent in self.scripted_agents:
120 | agent.action = agent.action_callback(agent, self)
121 | # gather forces applied to entities
122 | p_force = [None] * len(self.entities)
123 | # apply agent physical controls
124 | p_force = self.apply_action_force(p_force)
125 | # apply environment forces
126 | p_force = self.apply_environment_force(p_force)
127 | # integrate physical state
128 | self.integrate_state(p_force)
129 | # update agent state
130 | for agent in self.agents:
131 | self.update_agent_state(agent)
132 |
133 | # gather agent action forces
134 | def apply_action_force(self, p_force):
135 | # set applied forces
136 | for i,agent in enumerate(self.agents):
137 | if agent.movable:
138 | noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
139 | p_force[i] = agent.action.u + noise
140 | return p_force
141 |
142 | # gather physical forces acting on entities
143 | def apply_environment_force(self, p_force):
144 | # simple (but inefficient) collision response
145 | for a,entity_a in enumerate(self.entities):
146 | for b,entity_b in enumerate(self.entities):
147 | if(b <= a): continue
148 | [f_a, f_b] = self.get_collision_force(entity_a, entity_b)
149 | if(f_a is not None):
150 | if(p_force[a] is None): p_force[a] = 0.0
151 | p_force[a] = f_a + p_force[a]
152 | if(f_b is not None):
153 | if(p_force[b] is None): p_force[b] = 0.0
154 | p_force[b] = f_b + p_force[b]
155 | return p_force
156 |
157 | # integrate physical state
158 | def integrate_state(self, p_force):
159 | for i,entity in enumerate(self.entities):
160 | if not entity.movable: continue
161 | entity.state.p_vel = entity.state.p_vel * (1 - self.damping)
162 | if (p_force[i] is not None):
163 | entity.state.p_vel += (p_force[i] / entity.mass) * self.dt
164 | if entity.max_speed is not None:
165 | speed = np.sqrt(np.square(entity.state.p_vel[0]) + np.square(entity.state.p_vel[1]))
166 | if speed > entity.max_speed:
167 | entity.state.p_vel = entity.state.p_vel / np.sqrt(np.square(entity.state.p_vel[0]) +
168 | np.square(entity.state.p_vel[1])) * entity.max_speed
169 | entity.state.p_pos += entity.state.p_vel * self.dt
170 |
171 | def update_agent_state(self, agent):
172 | # set communication state (directly for now)
173 | if agent.silent:
174 | agent.state.c = np.zeros(self.dim_c)
175 | else:
176 | noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0
177 | agent.state.c = agent.action.c + noise
178 |
179 | # get collision forces for any contact between two entities
180 | def get_collision_force(self, entity_a, entity_b):
181 | if (not entity_a.collide) or (not entity_b.collide):
182 | return [None, None] # not a collider
183 | if (entity_a is entity_b):
184 | return [None, None] # don't collide against itself
185 | # compute actual distance between entities
186 | delta_pos = entity_a.state.p_pos - entity_b.state.p_pos
187 | dist = np.sqrt(np.sum(np.square(delta_pos)))
188 | # minimum allowable distance
189 | dist_min = entity_a.size + entity_b.size
190 | # softmax penetration
191 | k = self.contact_margin
192 | penetration = np.logaddexp(0, -(dist - dist_min)/k)*k
193 | force = self.contact_force * delta_pos / dist * penetration
194 | force_a = +force if entity_a.movable else None
195 | force_b = -force if entity_b.movable else None
196 | return [force_a, force_b]
--------------------------------------------------------------------------------
/MADQN/multiagent/multi_discrete.py:
--------------------------------------------------------------------------------
1 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
2 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
3 |
4 | import numpy as np
5 |
6 | import gym
7 | from gym.spaces import prng
8 |
9 | class MultiDiscrete(gym.Space):
10 | """
11 | - The multi-discrete action space consists of a series of discrete action spaces with different parameters
12 | - It can be adapted to both a Discrete action space or a continuous (Box) action space
13 | - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
14 | - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
15 | where the discrete action space can take any integers from `min` to `max` (both inclusive)
16 | Note: A value of 0 always need to represent the NOOP action.
17 | e.g. Nintendo Game Controller
18 | - Can be conceptualized as 3 discrete action spaces:
19 | 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
20 | 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
21 | 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
22 | - Can be initialized as
23 | MultiDiscrete([ [0,4], [0,1], [0,1] ])
24 | """
25 | def __init__(self, array_of_param_array):
26 | self.low = np.array([x[0] for x in array_of_param_array])
27 | self.high = np.array([x[1] for x in array_of_param_array])
28 | self.num_discrete_space = self.low.shape[0]
29 |
30 | def sample(self):
31 | """ Returns a array with one sample from each discrete action space """
32 | # For each row: round(random .* (max - min) + min, 0)
33 | random_array = prng.np_random.rand(self.num_discrete_space)
34 | return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
35 | def contains(self, x):
36 | return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
37 |
38 | @property
39 | def shape(self):
40 | return self.num_discrete_space
41 | def __repr__(self):
42 | return "MultiDiscrete" + str(self.num_discrete_space)
43 | def __eq__(self, other):
44 | return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)
--------------------------------------------------------------------------------
/MADQN/multiagent/policy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from pyglet.window import key
3 |
4 | # individual agent policy
5 | class Policy(object):
6 | def __init__(self):
7 | pass
8 | def action(self, obs):
9 | raise NotImplementedError()
10 |
11 | # interactive policy based on keyboard input
12 | # hard-coded to deal only with movement, not communication
13 | class InteractivePolicy(Policy):
14 | def __init__(self, env, agent_index):
15 | super(InteractivePolicy, self).__init__()
16 | self.env = env
17 | # hard-coded keyboard events
18 | self.move = [False for i in range(4)]
19 | self.comm = [False for i in range(env.world.dim_c)]
20 | # register keyboard events with this environment's window
21 | env.viewers[agent_index].window.on_key_press = self.key_press
22 | env.viewers[agent_index].window.on_key_release = self.key_release
23 |
24 | def action(self, obs):
25 | # ignore observation and just act based on keyboard events
26 | if self.env.discrete_action_input:
27 | u = 0
28 | if self.move[0]: u = 1
29 | if self.move[1]: u = 2
30 | if self.move[2]: u = 4
31 | if self.move[3]: u = 3
32 | else:
33 | u = np.zeros(5) # 5-d because of no-move action
34 | if self.move[0]: u[1] += 1.0
35 | if self.move[1]: u[2] += 1.0
36 | if self.move[3]: u[3] += 1.0
37 | if self.move[2]: u[4] += 1.0
38 | if True not in self.move:
39 | u[0] += 1.0
40 | return np.concatenate([u, np.zeros(self.env.world.dim_c)])
41 |
42 | # keyboard event callbacks
43 | def key_press(self, k, mod):
44 | if k==key.LEFT: self.move[0] = True
45 | if k==key.RIGHT: self.move[1] = True
46 | if k==key.UP: self.move[2] = True
47 | if k==key.DOWN: self.move[3] = True
48 | def key_release(self, k, mod):
49 | if k==key.LEFT: self.move[0] = False
50 | if k==key.RIGHT: self.move[1] = False
51 | if k==key.UP: self.move[2] = False
52 | if k==key.DOWN: self.move[3] = False
53 |
--------------------------------------------------------------------------------
/MADQN/multiagent/rendering.py:
--------------------------------------------------------------------------------
1 | """
2 | 2D rendering framework
3 | """
4 | from __future__ import division
5 | import os
6 | import six
7 | import sys
8 |
9 | if "Apple" in sys.version:
10 | if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
11 | os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
12 | # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
13 |
14 | from gym.utils import reraise
15 | from gym import error
16 |
17 | try:
18 | import pyglet
19 | except ImportError as e:
20 | reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
21 |
22 | try:
23 | from pyglet.gl import *
24 | except ImportError as e:
25 | reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python '")
26 |
27 | import math
28 | import numpy as np
29 |
30 | RAD2DEG = 57.29577951308232
31 |
32 | def get_display(spec):
33 | """Convert a display specification (such as :0) into an actual Display
34 | object.
35 |
36 | Pyglet only supports multiple Displays on Linux.
37 | """
38 | if spec is None:
39 | return None
40 | elif isinstance(spec, six.string_types):
41 | return pyglet.canvas.Display(spec)
42 | else:
43 | raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
44 |
45 | class Viewer(object):
46 | def __init__(self, width, height, display=None):
47 | display = get_display(display)
48 |
49 | self.width = width
50 | self.height = height
51 |
52 | self.window = pyglet.window.Window(width=width, height=height, display=display)
53 | self.window.on_close = self.window_closed_by_user
54 | self.geoms = []
55 | self.onetime_geoms = []
56 | self.transform = Transform()
57 |
58 | glEnable(GL_BLEND)
59 | # glEnable(GL_MULTISAMPLE)
60 | glEnable(GL_LINE_SMOOTH)
61 | # glHint(GL_LINE_SMOOTH_HINT, GL_DONT_CARE)
62 | glHint(GL_LINE_SMOOTH_HINT, GL_NICEST)
63 | glLineWidth(2.0)
64 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
65 |
66 | def close(self):
67 | self.window.close()
68 |
69 | def window_closed_by_user(self):
70 | self.close()
71 |
72 | def set_bounds(self, left, right, bottom, top):
73 | assert right > left and top > bottom
74 | scalex = self.width/(right-left)
75 | scaley = self.height/(top-bottom)
76 | self.transform = Transform(
77 | translation=(-left*scalex, -bottom*scaley),
78 | scale=(scalex, scaley))
79 |
80 | def add_geom(self, geom):
81 | self.geoms.append(geom)
82 |
83 | def add_onetime(self, geom):
84 | self.onetime_geoms.append(geom)
85 |
86 | def render(self, return_rgb_array=False):
87 | glClearColor(1,1,1,1)
88 | self.window.clear()
89 | self.window.switch_to()
90 | self.window.dispatch_events()
91 | self.transform.enable()
92 | for geom in self.geoms:
93 | geom.render()
94 | for geom in self.onetime_geoms:
95 | geom.render()
96 | self.transform.disable()
97 | arr = None
98 | if return_rgb_array:
99 | buffer = pyglet.image.get_buffer_manager().get_color_buffer()
100 | image_data = buffer.get_image_data()
101 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
102 | # In https://github.com/openai/gym-http-api/issues/2, we
103 | # discovered that someone using Xmonad on Arch was having
104 | # a window of size 598 x 398, though a 600 x 400 window
105 | # was requested. (Guess Xmonad was preserving a pixel for
106 | # the boundary.) So we use the buffer height/width rather
107 | # than the requested one.
108 | arr = arr.reshape(buffer.height, buffer.width, 4)
109 | arr = arr[::-1,:,0:3]
110 | self.window.flip()
111 | self.onetime_geoms = []
112 | return arr
113 |
114 | # Convenience
115 | def draw_circle(self, radius=10, res=30, filled=True, **attrs):
116 | geom = make_circle(radius=radius, res=res, filled=filled)
117 | _add_attrs(geom, attrs)
118 | self.add_onetime(geom)
119 | return geom
120 |
121 | def draw_polygon(self, v, filled=True, **attrs):
122 | geom = make_polygon(v=v, filled=filled)
123 | _add_attrs(geom, attrs)
124 | self.add_onetime(geom)
125 | return geom
126 |
127 | def draw_polyline(self, v, **attrs):
128 | geom = make_polyline(v=v)
129 | _add_attrs(geom, attrs)
130 | self.add_onetime(geom)
131 | return geom
132 |
133 | def draw_line(self, start, end, **attrs):
134 | geom = Line(start, end)
135 | _add_attrs(geom, attrs)
136 | self.add_onetime(geom)
137 | return geom
138 |
139 | def get_array(self):
140 | self.window.flip()
141 | image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
142 | self.window.flip()
143 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
144 | arr = arr.reshape(self.height, self.width, 4)
145 | return arr[::-1,:,0:3]
146 |
147 | def _add_attrs(geom, attrs):
148 | if "color" in attrs:
149 | geom.set_color(*attrs["color"])
150 | if "linewidth" in attrs:
151 | geom.set_linewidth(attrs["linewidth"])
152 |
153 | class Geom(object):
154 | def __init__(self):
155 | self._color=Color((0, 0, 0, 1.0))
156 | self.attrs = [self._color]
157 | def render(self):
158 | for attr in reversed(self.attrs):
159 | attr.enable()
160 | self.render1()
161 | for attr in self.attrs:
162 | attr.disable()
163 | def render1(self):
164 | raise NotImplementedError
165 | def add_attr(self, attr):
166 | self.attrs.append(attr)
167 | def set_color(self, r, g, b, alpha=1):
168 | self._color.vec4 = (r, g, b, alpha)
169 |
170 | class Attr(object):
171 | def enable(self):
172 | raise NotImplementedError
173 | def disable(self):
174 | pass
175 |
176 | class Transform(Attr):
177 | def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
178 | self.set_translation(*translation)
179 | self.set_rotation(rotation)
180 | self.set_scale(*scale)
181 | def enable(self):
182 | glPushMatrix()
183 | glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
184 | glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
185 | glScalef(self.scale[0], self.scale[1], 1)
186 | def disable(self):
187 | glPopMatrix()
188 | def set_translation(self, newx, newy):
189 | self.translation = (float(newx), float(newy))
190 | def set_rotation(self, new):
191 | self.rotation = float(new)
192 | def set_scale(self, newx, newy):
193 | self.scale = (float(newx), float(newy))
194 |
195 | class Color(Attr):
196 | def __init__(self, vec4):
197 | self.vec4 = vec4
198 | def enable(self):
199 | glColor4f(*self.vec4)
200 |
201 | class LineStyle(Attr):
202 | def __init__(self, style):
203 | self.style = style
204 | def enable(self):
205 | glEnable(GL_LINE_STIPPLE)
206 | glLineStipple(1, self.style)
207 | def disable(self):
208 | glDisable(GL_LINE_STIPPLE)
209 |
210 | class LineWidth(Attr):
211 | def __init__(self, stroke):
212 | self.stroke = stroke
213 | def enable(self):
214 | glLineWidth(self.stroke)
215 |
216 | class Point(Geom):
217 | def __init__(self):
218 | Geom.__init__(self)
219 | def render1(self):
220 | glBegin(GL_POINTS) # draw point
221 | glVertex3f(0.0, 0.0, 0.0)
222 | glEnd()
223 |
224 | class FilledPolygon(Geom):
225 | def __init__(self, v):
226 | Geom.__init__(self)
227 | self.v = v
228 | def render1(self):
229 | if len(self.v) == 4 : glBegin(GL_QUADS)
230 | elif len(self.v) > 4 : glBegin(GL_POLYGON)
231 | else: glBegin(GL_TRIANGLES)
232 | for p in self.v:
233 | glVertex3f(p[0], p[1],0) # draw each vertex
234 | glEnd()
235 |
236 | color = (self._color.vec4[0] * 0.5, self._color.vec4[1] * 0.5, self._color.vec4[2] * 0.5, self._color.vec4[3] * 0.5)
237 | glColor4f(*color)
238 | glBegin(GL_LINE_LOOP)
239 | for p in self.v:
240 | glVertex3f(p[0], p[1],0) # draw each vertex
241 | glEnd()
242 |
243 | def make_circle(radius=10, res=30, filled=True):
244 | points = []
245 | for i in range(res):
246 | ang = 2*math.pi*i / res
247 | points.append((math.cos(ang)*radius, math.sin(ang)*radius))
248 | if filled:
249 | return FilledPolygon(points)
250 | else:
251 | return PolyLine(points, True)
252 |
253 | def make_polygon(v, filled=True):
254 | if filled: return FilledPolygon(v)
255 | else: return PolyLine(v, True)
256 |
257 | def make_polyline(v):
258 | return PolyLine(v, False)
259 |
260 | def make_capsule(length, width):
261 | l, r, t, b = 0, length, width/2, -width/2
262 | box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
263 | circ0 = make_circle(width/2)
264 | circ1 = make_circle(width/2)
265 | circ1.add_attr(Transform(translation=(length, 0)))
266 | geom = Compound([box, circ0, circ1])
267 | return geom
268 |
269 | class Compound(Geom):
270 | def __init__(self, gs):
271 | Geom.__init__(self)
272 | self.gs = gs
273 | for g in self.gs:
274 | g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
275 | def render1(self):
276 | for g in self.gs:
277 | g.render()
278 |
279 | class PolyLine(Geom):
280 | def __init__(self, v, close):
281 | Geom.__init__(self)
282 | self.v = v
283 | self.close = close
284 | self.linewidth = LineWidth(1)
285 | self.add_attr(self.linewidth)
286 | def render1(self):
287 | glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
288 | for p in self.v:
289 | glVertex3f(p[0], p[1],0) # draw each vertex
290 | glEnd()
291 | def set_linewidth(self, x):
292 | self.linewidth.stroke = x
293 |
294 | class Line(Geom):
295 | def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
296 | Geom.__init__(self)
297 | self.start = start
298 | self.end = end
299 | self.linewidth = LineWidth(1)
300 | self.add_attr(self.linewidth)
301 |
302 | def render1(self):
303 | glBegin(GL_LINES)
304 | glVertex2f(*self.start)
305 | glVertex2f(*self.end)
306 | glEnd()
307 |
308 | class Image(Geom):
309 | def __init__(self, fname, width, height):
310 | Geom.__init__(self)
311 | self.width = width
312 | self.height = height
313 | img = pyglet.image.load(fname)
314 | self.img = img
315 | self.flip = False
316 | def render1(self):
317 | self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
318 |
319 | # ================================================================
320 |
321 | class SimpleImageViewer(object):
322 | def __init__(self, display=None):
323 | self.window = None
324 | self.isopen = False
325 | self.display = display
326 | def imshow(self, arr):
327 | if self.window is None:
328 | height, width, channels = arr.shape
329 | self.window = pyglet.window.Window(width=width, height=height, display=self.display)
330 | self.width = width
331 | self.height = height
332 | self.isopen = True
333 | assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape"
334 | image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3)
335 | self.window.clear()
336 | self.window.switch_to()
337 | self.window.dispatch_events()
338 | image.blit(0,0)
339 | self.window.flip()
340 | def close(self):
341 | if self.isopen:
342 | self.window.close()
343 | self.isopen = False
344 | def __del__(self):
345 | self.close()
--------------------------------------------------------------------------------
/MADQN/multiagent/scenario.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # defines scenario upon which the world is built
4 | class BaseScenario(object):
5 | # create elements of the world
6 | def make_world(self):
7 | raise NotImplementedError()
8 | # create initial conditions of the world
9 | def reset_world(self, world):
10 | raise NotImplementedError()
11 |
--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/__init__.py:
--------------------------------------------------------------------------------
1 | import imp
2 | import os.path as osp
3 |
4 |
5 | def load(name):
6 | pathname = osp.join(osp.dirname(__file__), name)
7 | return imp.load_source('', pathname)
8 |
--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # add agents
9 | world.agents = [Agent() for i in range(1)]
10 | for i, agent in enumerate(world.agents):
11 | agent.name = 'agent %d' % i
12 | agent.collide = False
13 | agent.silent = True
14 | # add landmarks
15 | world.landmarks = [Landmark() for i in range(1)]
16 | for i, landmark in enumerate(world.landmarks):
17 | landmark.name = 'landmark %d' % i
18 | landmark.collide = False
19 | landmark.movable = False
20 | # make initial conditions
21 | self.reset_world(world)
22 | return world
23 |
24 | def reset_world(self, world):
25 | # random properties for agents
26 | for i, agent in enumerate(world.agents):
27 | agent.color = np.array([0.25,0.25,0.25])
28 | # random properties for landmarks
29 | for i, landmark in enumerate(world.landmarks):
30 | landmark.color = np.array([0.75,0.75,0.75])
31 | world.landmarks[0].color = np.array([0.75,0.25,0.25])
32 | # set random initial states
33 | for agent in world.agents:
34 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
35 | agent.state.p_vel = np.zeros(world.dim_p)
36 | agent.state.c = np.zeros(world.dim_c)
37 | for i, landmark in enumerate(world.landmarks):
38 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
39 | landmark.state.p_vel = np.zeros(world.dim_p)
40 |
41 | def reward(self, agent, world):
42 | dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
43 | return -dist2
44 |
45 | def observation(self, agent, world):
46 | # get positions of all entities in this agent's reference frame
47 | entity_pos = []
48 | for entity in world.landmarks:
49 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
50 | return np.concatenate([agent.state.p_vel] + entity_pos)
51 |
--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_adversary.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 |
8 | def make_world(self):
9 | world = World()
10 | # set any world properties first
11 | world.dim_c = 2
12 | num_agents = 3
13 | world.num_agents = num_agents
14 | num_adversaries = 1
15 | num_landmarks = num_agents - 1
16 | # add agents
17 | world.agents = [Agent() for i in range(num_agents)]
18 | for i, agent in enumerate(world.agents):
19 | agent.name = 'agent %d' % i
20 | agent.collide = False
21 | agent.silent = True
22 | agent.adversary = True if i < num_adversaries else False
23 | agent.size = 0.15
24 | # add landmarks
25 | world.landmarks = [Landmark() for i in range(num_landmarks)]
26 | for i, landmark in enumerate(world.landmarks):
27 | landmark.name = 'landmark %d' % i
28 | landmark.collide = False
29 | landmark.movable = False
30 | landmark.size = 0.08
31 | # make initial conditions
32 | self.reset_world(world)
33 | return world
34 |
35 | def reset_world(self, world):
36 | # random properties for agents
37 | world.agents[0].color = np.array([0.85, 0.35, 0.35])
38 | for i in range(1, world.num_agents):
39 | world.agents[i].color = np.array([0.35, 0.35, 0.85])
40 | # random properties for landmarks
41 | for i, landmark in enumerate(world.landmarks):
42 | landmark.color = np.array([0.15, 0.15, 0.15])
43 | # set goal landmark
44 | goal = np.random.choice(world.landmarks)
45 | goal.color = np.array([0.15, 0.65, 0.15])
46 | for agent in world.agents:
47 | agent.goal_a = goal
48 | # set random initial states
49 | for agent in world.agents:
50 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
51 | agent.state.p_vel = np.zeros(world.dim_p)
52 | agent.state.c = np.zeros(world.dim_c)
53 | for i, landmark in enumerate(world.landmarks):
54 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
55 | landmark.state.p_vel = np.zeros(world.dim_p)
56 |
57 | def benchmark_data(self, agent, world):
58 | # returns data for benchmarking purposes
59 | if agent.adversary:
60 | return np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
61 | else:
62 | dists = []
63 | for l in world.landmarks:
64 | dists.append(np.sum(np.square(agent.state.p_pos - l.state.p_pos)))
65 | dists.append(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
66 | return tuple(dists)
67 |
68 | # return all agents that are not adversaries
69 | def good_agents(self, world):
70 | return [agent for agent in world.agents if not agent.adversary]
71 |
72 | # return all adversarial agents
73 | def adversaries(self, world):
74 | return [agent for agent in world.agents if agent.adversary]
75 |
76 | def reward(self, agent, world):
77 | # Agents are rewarded based on minimum agent distance to each landmark
78 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
79 |
80 | def agent_reward(self, agent, world):
81 | # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
82 | shaped_reward = True
83 | shaped_adv_reward = True
84 |
85 | # Calculate negative reward for adversary
86 | adversary_agents = self.adversaries(world)
87 | if shaped_adv_reward: # distance-based adversary reward
88 | adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
89 | else: # proximity-based adversary reward (binary)
90 | adv_rew = 0
91 | for a in adversary_agents:
92 | if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
93 | adv_rew -= 5
94 |
95 | # Calculate positive reward for agents
96 | good_agents = self.good_agents(world)
97 | if shaped_reward: # distance-based agent reward
98 | pos_rew = -min(
99 | [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
100 | else: # proximity-based agent reward (binary)
101 | pos_rew = 0
102 | if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \
103 | < 2 * agent.goal_a.size:
104 | pos_rew += 5
105 | pos_rew -= min(
106 | [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
107 | return pos_rew + adv_rew
108 |
109 | def adversary_reward(self, agent, world):
110 | # Rewarded based on proximity to the goal landmark
111 | shaped_reward = True
112 | if shaped_reward: # distance-based reward
113 | return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
114 | else: # proximity-based reward (binary)
115 | adv_rew = 0
116 | if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size:
117 | adv_rew += 5
118 | return adv_rew
119 |
120 |
121 | def observation(self, agent, world):
122 | # get positions of all entities in this agent's reference frame
123 | entity_pos = []
124 | for entity in world.landmarks:
125 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
126 | # entity colors
127 | entity_color = []
128 | for entity in world.landmarks:
129 | entity_color.append(entity.color)
130 | # communication of all other agents
131 | other_pos = []
132 | for other in world.agents:
133 | if other is agent: continue
134 | other_pos.append(other.state.p_pos - agent.state.p_pos)
135 |
136 | if not agent.adversary:
137 | return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos)
138 | else:
139 | return np.concatenate(entity_pos + other_pos)
140 |
--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_crypto.py:
--------------------------------------------------------------------------------
1 | """
2 | Scenario:
3 | 1 speaker, 2 listeners (one of which is an adversary). Good agents rewarded for proximity to goal, and distance from
4 | adversary to goal. Adversary is rewarded for its distance to the goal.
5 | """
6 |
7 |
8 | import numpy as np
9 | from multiagent.core import World, Agent, Landmark
10 | from multiagent.scenario import BaseScenario
11 | import random
12 |
13 |
14 | class CryptoAgent(Agent):
15 | def __init__(self):
16 | super(CryptoAgent, self).__init__()
17 | self.key = None
18 |
19 | class Scenario(BaseScenario):
20 |
21 | def make_world(self):
22 | world = World()
23 | # set any world properties first
24 | num_agents = 3
25 | num_adversaries = 1
26 | num_landmarks = 2
27 | world.dim_c = 4
28 | # add agents
29 | world.agents = [CryptoAgent() for i in range(num_agents)]
30 | for i, agent in enumerate(world.agents):
31 | agent.name = 'agent %d' % i
32 | agent.collide = False
33 | agent.adversary = True if i < num_adversaries else False
34 | agent.speaker = True if i == 2 else False
35 | agent.movable = False
36 | # add landmarks
37 | world.landmarks = [Landmark() for i in range(num_landmarks)]
38 | for i, landmark in enumerate(world.landmarks):
39 | landmark.name = 'landmark %d' % i
40 | landmark.collide = False
41 | landmark.movable = False
42 | # make initial conditions
43 | self.reset_world(world)
44 | return world
45 |
46 |
47 | def reset_world(self, world):
48 | # random properties for agents
49 | for i, agent in enumerate(world.agents):
50 | agent.color = np.array([0.25, 0.25, 0.25])
51 | if agent.adversary:
52 | agent.color = np.array([0.75, 0.25, 0.25])
53 | agent.key = None
54 | # random properties for landmarks
55 | color_list = [np.zeros(world.dim_c) for i in world.landmarks]
56 | for i, color in enumerate(color_list):
57 | color[i] += 1
58 | for color, landmark in zip(color_list, world.landmarks):
59 | landmark.color = color
60 | # set goal landmark
61 | goal = np.random.choice(world.landmarks)
62 | world.agents[1].color = goal.color
63 | world.agents[2].key = np.random.choice(world.landmarks).color
64 |
65 | for agent in world.agents:
66 | agent.goal_a = goal
67 |
68 | # set random initial states
69 | for agent in world.agents:
70 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
71 | agent.state.p_vel = np.zeros(world.dim_p)
72 | agent.state.c = np.zeros(world.dim_c)
73 | for i, landmark in enumerate(world.landmarks):
74 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
75 | landmark.state.p_vel = np.zeros(world.dim_p)
76 |
77 |
78 | def benchmark_data(self, agent, world):
79 | # returns data for benchmarking purposes
80 | return (agent.state.c, agent.goal_a.color)
81 |
82 | # return all agents that are not adversaries
83 | def good_listeners(self, world):
84 | return [agent for agent in world.agents if not agent.adversary and not agent.speaker]
85 |
86 | # return all agents that are not adversaries
87 | def good_agents(self, world):
88 | return [agent for agent in world.agents if not agent.adversary]
89 |
90 | # return all adversarial agents
91 | def adversaries(self, world):
92 | return [agent for agent in world.agents if agent.adversary]
93 |
94 | def reward(self, agent, world):
95 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
96 |
97 | def agent_reward(self, agent, world):
98 | # Agents rewarded if Bob can reconstruct message, but adversary (Eve) cannot
99 | good_listeners = self.good_listeners(world)
100 | adversaries = self.adversaries(world)
101 | good_rew = 0
102 | adv_rew = 0
103 | for a in good_listeners:
104 | if (a.state.c == np.zeros(world.dim_c)).all():
105 | continue
106 | else:
107 | good_rew -= np.sum(np.square(a.state.c - agent.goal_a.color))
108 | for a in adversaries:
109 | if (a.state.c == np.zeros(world.dim_c)).all():
110 | continue
111 | else:
112 | adv_l1 = np.sum(np.square(a.state.c - agent.goal_a.color))
113 | adv_rew += adv_l1
114 | return adv_rew + good_rew
115 |
116 | def adversary_reward(self, agent, world):
117 | # Adversary (Eve) is rewarded if it can reconstruct original goal
118 | rew = 0
119 | if not (agent.state.c == np.zeros(world.dim_c)).all():
120 | rew -= np.sum(np.square(agent.state.c - agent.goal_a.color))
121 | return rew
122 |
123 |
124 | def observation(self, agent, world):
125 | # goal color
126 | goal_color = np.zeros(world.dim_color)
127 | if agent.goal_a is not None:
128 | goal_color = agent.goal_a.color
129 |
130 | # get positions of all entities in this agent's reference frame
131 | entity_pos = []
132 | for entity in world.landmarks:
133 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
134 | # communication of all other agents
135 | comm = []
136 | for other in world.agents:
137 | if other is agent or (other.state.c is None) or not other.speaker: continue
138 | comm.append(other.state.c)
139 |
140 | confer = np.array([0])
141 |
142 | if world.agents[2].key is None:
143 | confer = np.array([1])
144 | key = np.zeros(world.dim_c)
145 | goal_color = np.zeros(world.dim_c)
146 | else:
147 | key = world.agents[2].key
148 |
149 | prnt = False
150 | # speaker
151 | if agent.speaker:
152 | if prnt:
153 | print('speaker')
154 | print(agent.state.c)
155 | print(np.concatenate([goal_color] + [key] + [confer] + [np.random.randn(1)]))
156 | return np.concatenate([goal_color] + [key])
157 | # listener
158 | if not agent.speaker and not agent.adversary:
159 | if prnt:
160 | print('listener')
161 | print(agent.state.c)
162 | print(np.concatenate([key] + comm + [confer]))
163 | return np.concatenate([key] + comm)
164 | if not agent.speaker and agent.adversary:
165 | if prnt:
166 | print('adversary')
167 | print(agent.state.c)
168 | print(np.concatenate(comm + [confer]))
169 | return np.concatenate(comm)
170 |
--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_push.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 2
10 | num_agents = 2
11 | num_adversaries = 1
12 | num_landmarks = 2
13 | # add agents
14 | world.agents = [Agent() for i in range(num_agents)]
15 | for i, agent in enumerate(world.agents):
16 | agent.name = 'agent %d' % i
17 | agent.collide = True
18 | agent.silent = True
19 | if i < num_adversaries:
20 | agent.adversary = True
21 | else:
22 | agent.adversary = False
23 | # add landmarks
24 | world.landmarks = [Landmark() for i in range(num_landmarks)]
25 | for i, landmark in enumerate(world.landmarks):
26 | landmark.name = 'landmark %d' % i
27 | landmark.collide = False
28 | landmark.movable = False
29 | # make initial conditions
30 | self.reset_world(world)
31 | return world
32 |
33 | def reset_world(self, world):
34 | # random properties for landmarks
35 | for i, landmark in enumerate(world.landmarks):
36 | landmark.color = np.array([0.1, 0.1, 0.1])
37 | landmark.color[i + 1] += 0.8
38 | landmark.index = i
39 | # set goal landmark
40 | goal = np.random.choice(world.landmarks)
41 | for i, agent in enumerate(world.agents):
42 | agent.goal_a = goal
43 | agent.color = np.array([0.25, 0.25, 0.25])
44 | if agent.adversary:
45 | agent.color = np.array([0.75, 0.25, 0.25])
46 | else:
47 | j = goal.index
48 | agent.color[j + 1] += 0.5
49 | # set random initial states
50 | for agent in world.agents:
51 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
52 | agent.state.p_vel = np.zeros(world.dim_p)
53 | agent.state.c = np.zeros(world.dim_c)
54 | for i, landmark in enumerate(world.landmarks):
55 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
56 | landmark.state.p_vel = np.zeros(world.dim_p)
57 |
58 | def reward(self, agent, world):
59 | # Agents are rewarded based on minimum agent distance to each landmark
60 | return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
61 |
62 | def agent_reward(self, agent, world):
63 | # the distance to the goal
64 | return -np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
65 |
66 | def adversary_reward(self, agent, world):
67 | # keep the nearest good agents away from the goal
68 | agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary]
69 | pos_rew = min(agent_dist)
70 | #nearest_agent = world.good_agents[np.argmin(agent_dist)]
71 | #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos)))
72 | neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos)))
73 | #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents])
74 | return pos_rew - neg_rew
75 |
76 | def observation(self, agent, world):
77 | # get positions of all entities in this agent's reference frame
78 | entity_pos = []
79 | for entity in world.landmarks: # world.entities:
80 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
81 | # entity colors
82 | entity_color = []
83 | for entity in world.landmarks: # world.entities:
84 | entity_color.append(entity.color)
85 | # communication of all other agents
86 | comm = []
87 | other_pos = []
88 | for other in world.agents:
89 | if other is agent: continue
90 | comm.append(other.state.c)
91 | other_pos.append(other.state.p_pos - agent.state.p_pos)
92 | if not agent.adversary:
93 | return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos)
94 | else:
95 | #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos # randomize position of other agents in adversary network
96 | return np.concatenate([agent.state.p_vel] + entity_pos + other_pos)
97 |
--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_reference.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 10
10 | world.collaborative = True # whether agents share rewards
11 | # add agents
12 | world.agents = [Agent() for i in range(2)]
13 | for i, agent in enumerate(world.agents):
14 | agent.name = 'agent %d' % i
15 | agent.collide = False
16 | # add landmarks
17 | world.landmarks = [Landmark() for i in range(3)]
18 | for i, landmark in enumerate(world.landmarks):
19 | landmark.name = 'landmark %d' % i
20 | landmark.collide = False
21 | landmark.movable = False
22 | # make initial conditions
23 | self.reset_world(world)
24 | return world
25 |
26 | def reset_world(self, world):
27 | # assign goals to agents
28 | for agent in world.agents:
29 | agent.goal_a = None
30 | agent.goal_b = None
31 | # want other agent to go to the goal landmark
32 | world.agents[0].goal_a = world.agents[1]
33 | world.agents[0].goal_b = np.random.choice(world.landmarks)
34 | world.agents[1].goal_a = world.agents[0]
35 | world.agents[1].goal_b = np.random.choice(world.landmarks)
36 | # random properties for agents
37 | for i, agent in enumerate(world.agents):
38 | agent.color = np.array([0.25,0.25,0.25])
39 | # random properties for landmarks
40 | world.landmarks[0].color = np.array([0.75,0.25,0.25])
41 | world.landmarks[1].color = np.array([0.25,0.75,0.25])
42 | world.landmarks[2].color = np.array([0.25,0.25,0.75])
43 | # special colors for goals
44 | world.agents[0].goal_a.color = world.agents[0].goal_b.color
45 | world.agents[1].goal_a.color = world.agents[1].goal_b.color
46 | # set random initial states
47 | for agent in world.agents:
48 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
49 | agent.state.p_vel = np.zeros(world.dim_p)
50 | agent.state.c = np.zeros(world.dim_c)
51 | for i, landmark in enumerate(world.landmarks):
52 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
53 | landmark.state.p_vel = np.zeros(world.dim_p)
54 |
55 | def reward(self, agent, world):
56 | if agent.goal_a is None or agent.goal_b is None:
57 | return 0.0
58 | dist2 = np.sum(np.square(agent.goal_a.state.p_pos - agent.goal_b.state.p_pos))
59 | return -dist2
60 |
61 | def observation(self, agent, world):
62 | # goal color
63 | goal_color = [np.zeros(world.dim_color), np.zeros(world.dim_color)]
64 | if agent.goal_b is not None:
65 | goal_color[1] = agent.goal_b.color
66 |
67 | # get positions of all entities in this agent's reference frame
68 | entity_pos = []
69 | for entity in world.landmarks:
70 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
71 | # entity colors
72 | entity_color = []
73 | for entity in world.landmarks:
74 | entity_color.append(entity.color)
75 | # communication of all other agents
76 | comm = []
77 | for other in world.agents:
78 | if other is agent: continue
79 | comm.append(other.state.c)
80 | return np.concatenate([agent.state.p_vel] + entity_pos + [goal_color[1]] + comm)
81 |
--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_speaker_listener.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 | class Scenario(BaseScenario):
6 | def make_world(self):
7 | world = World()
8 | # set any world properties first
9 | world.dim_c = 3
10 | num_landmarks = 3
11 | world.collaborative = True
12 | # add agents
13 | world.agents = [Agent() for i in range(2)]
14 | for i, agent in enumerate(world.agents):
15 | agent.name = 'agent %d' % i
16 | agent.collide = False
17 | agent.size = 0.075
18 | # speaker
19 | world.agents[0].movable = False
20 | # listener
21 | world.agents[1].silent = True
22 | # add landmarks
23 | world.landmarks = [Landmark() for i in range(num_landmarks)]
24 | for i, landmark in enumerate(world.landmarks):
25 | landmark.name = 'landmark %d' % i
26 | landmark.collide = False
27 | landmark.movable = False
28 | landmark.size = 0.04
29 | # make initial conditions
30 | self.reset_world(world)
31 | return world
32 |
33 | def reset_world(self, world):
34 | # assign goals to agents
35 | for agent in world.agents:
36 | agent.goal_a = None
37 | agent.goal_b = None
38 | # want listener to go to the goal landmark
39 | world.agents[0].goal_a = world.agents[1]
40 | world.agents[0].goal_b = np.random.choice(world.landmarks)
41 | # random properties for agents
42 | for i, agent in enumerate(world.agents):
43 | agent.color = np.array([0.25,0.25,0.25])
44 | # random properties for landmarks
45 | world.landmarks[0].color = np.array([0.65,0.15,0.15])
46 | world.landmarks[1].color = np.array([0.15,0.65,0.15])
47 | world.landmarks[2].color = np.array([0.15,0.15,0.65])
48 | # special colors for goals
49 | world.agents[0].goal_a.color = world.agents[0].goal_b.color + np.array([0.45, 0.45, 0.45])
50 | # set random initial states
51 | for agent in world.agents:
52 | agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
53 | agent.state.p_vel = np.zeros(world.dim_p)
54 | agent.state.c = np.zeros(world.dim_c)
55 | for i, landmark in enumerate(world.landmarks):
56 | landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
57 | landmark.state.p_vel = np.zeros(world.dim_p)
58 |
59 | def benchmark_data(self, agent, world):
60 | # returns data for benchmarking purposes
61 | return self.reward(agent, reward)
62 |
63 | def reward(self, agent, world):
64 | # squared distance from listener to landmark
65 | a = world.agents[0]
66 | dist2 = np.sum(np.square(a.goal_a.state.p_pos - a.goal_b.state.p_pos))
67 | return -dist2
68 |
69 | def observation(self, agent, world):
70 | # goal color
71 | goal_color = np.zeros(world.dim_color)
72 | if agent.goal_b is not None:
73 | goal_color = agent.goal_b.color
74 |
75 | # get positions of all entities in this agent's reference frame
76 | entity_pos = []
77 | for entity in world.landmarks:
78 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
79 |
80 | # communication of all other agents
81 | comm = []
82 | for other in world.agents:
83 | if other is agent or (other.state.c is None): continue
84 | comm.append(other.state.c)
85 |
86 | # speaker
87 | if not agent.movable:
88 | return np.concatenate([goal_color])
89 | # listener
90 | if agent.silent:
91 | return np.concatenate([agent.state.p_vel] + entity_pos + comm)
92 |
93 |
--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_spread.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 | def make_world(self):
8 | world = World()
9 | # set any world properties first
10 | world.dim_c = 2
11 | num_agents = 3
12 | num_landmarks = 3
13 | world.collaborative = True
14 | # add agents
15 | world.agents = [Agent() for i in range(num_agents)]
16 | for i, agent in enumerate(world.agents):
17 | agent.name = 'agent %d' % i
18 | agent.collide = True
19 | agent.silent = True
20 | agent.size = 0.15
21 | # add landmarks
22 | world.landmarks = [Landmark() for i in range(num_landmarks)]
23 | for i, landmark in enumerate(world.landmarks):
24 | landmark.name = 'landmark %d' % i
25 | landmark.collide = False
26 | landmark.movable = False
27 | # make initial conditions
28 | self.reset_world(world)
29 | return world
30 |
31 | def reset_world(self, world):
32 | # random properties for agents
33 | for i, agent in enumerate(world.agents):
34 | agent.color = np.array([0.35, 0.35, 0.85])
35 | # random properties for landmarks
36 | for i, landmark in enumerate(world.landmarks):
37 | landmark.color = np.array([0.25, 0.25, 0.25])
38 | # set random initial states
39 | for agent in world.agents:
40 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
41 | agent.state.p_vel = np.zeros(world.dim_p)
42 | agent.state.c = np.zeros(world.dim_c)
43 | for i, landmark in enumerate(world.landmarks):
44 | landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
45 | landmark.state.p_vel = np.zeros(world.dim_p)
46 |
47 | def benchmark_data(self, agent, world):
48 | rew = 0
49 | collisions = 0
50 | occupied_landmarks = 0
51 | min_dists = 0
52 | for l in world.landmarks:
53 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
54 | min_dists += min(dists)
55 | rew -= min(dists)
56 | if min(dists) < 0.1:
57 | occupied_landmarks += 1
58 | if agent.collide:
59 | for a in world.agents:
60 | if self.is_collision(a, agent):
61 | rew -= 1
62 | collisions += 1
63 | return (rew, collisions, min_dists, occupied_landmarks)
64 |
65 |
66 | def is_collision(self, agent1, agent2):
67 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
68 | dist = np.sqrt(np.sum(np.square(delta_pos)))
69 | dist_min = agent1.size + agent2.size
70 | return True if dist < dist_min else False
71 |
72 | def reward(self, agent, world):
73 | # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
74 | rew = 0
75 | for l in world.landmarks:
76 | dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
77 | rew -= min(dists)
78 | if agent.collide:
79 | for a in world.agents:
80 | if self.is_collision(a, agent):
81 | rew -= 1
82 | return rew
83 |
84 | def observation(self, agent, world):
85 | # get positions of all entities in this agent's reference frame
86 | entity_pos = []
87 | for entity in world.landmarks: # world.entities:
88 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
89 | # entity colors
90 | entity_color = []
91 | for entity in world.landmarks: # world.entities:
92 | entity_color.append(entity.color)
93 | # communication of all other agents
94 | comm = []
95 | other_pos = []
96 | for other in world.agents:
97 | if other is agent: continue
98 | comm.append(other.state.c)
99 | other_pos.append(other.state.p_pos - agent.state.p_pos)
100 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm)
101 |
--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_tag.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 | def make_world(self):
8 | world = World()
9 | # set any world properties first
10 | world.dim_c = 2
11 | num_good_agents = 1
12 | num_adversaries = 3
13 | num_agents = num_adversaries + num_good_agents
14 | num_landmarks = 2
15 | # add agents
16 | world.agents = [Agent() for i in range(num_agents)]
17 | for i, agent in enumerate(world.agents):
18 | agent.name = 'agent %d' % i
19 | agent.collide = True
20 | agent.silent = True
21 | agent.adversary = True if i < num_adversaries else False
22 | agent.size = 0.075 if agent.adversary else 0.05
23 | agent.accel = 3.0 if agent.adversary else 4.0
24 | #agent.accel = 20.0 if agent.adversary else 25.0
25 | agent.max_speed = 1.0 if agent.adversary else 1.3
26 | # add landmarks
27 | world.landmarks = [Landmark() for i in range(num_landmarks)]
28 | for i, landmark in enumerate(world.landmarks):
29 | landmark.name = 'landmark %d' % i
30 | landmark.collide = True
31 | landmark.movable = False
32 | landmark.size = 0.2
33 | landmark.boundary = False
34 | # make initial conditions
35 | self.reset_world(world)
36 | return world
37 |
38 |
39 | def reset_world(self, world):
40 | # random properties for agents
41 | for i, agent in enumerate(world.agents):
42 | agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35])
43 | # random properties for landmarks
44 | for i, landmark in enumerate(world.landmarks):
45 | landmark.color = np.array([0.25, 0.25, 0.25])
46 | # set random initial states
47 | for agent in world.agents:
48 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
49 | agent.state.p_vel = np.zeros(world.dim_p)
50 | agent.state.c = np.zeros(world.dim_c)
51 | for i, landmark in enumerate(world.landmarks):
52 | if not landmark.boundary:
53 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
54 | landmark.state.p_vel = np.zeros(world.dim_p)
55 |
56 |
57 | def benchmark_data(self, agent, world):
58 | # returns data for benchmarking purposes
59 | if agent.adversary:
60 | collisions = 0
61 | for a in self.good_agents(world):
62 | if self.is_collision(a, agent):
63 | collisions += 1
64 | return collisions
65 | else:
66 | return 0
67 |
68 |
69 | def is_collision(self, agent1, agent2):
70 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
71 | dist = np.sqrt(np.sum(np.square(delta_pos)))
72 | dist_min = agent1.size + agent2.size
73 | return True if dist < dist_min else False
74 |
75 | # return all agents that are not adversaries
76 | def good_agents(self, world):
77 | return [agent for agent in world.agents if not agent.adversary]
78 |
79 | # return all adversarial agents
80 | def adversaries(self, world):
81 | return [agent for agent in world.agents if agent.adversary]
82 |
83 |
84 | def reward(self, agent, world):
85 | # Agents are rewarded based on minimum agent distance to each landmark
86 | main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
87 | return main_reward
88 |
89 | def agent_reward(self, agent, world):
90 | # Agents are negatively rewarded if caught by adversaries
91 | rew = 0
92 | shape = False
93 | adversaries = self.adversaries(world)
94 | if shape: # reward can optionally be shaped (increased reward for increased distance from adversary)
95 | for adv in adversaries:
96 | rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
97 | if agent.collide:
98 | for a in adversaries:
99 | if self.is_collision(a, agent):
100 | rew -= 10
101 |
102 | # agents are penalized for exiting the screen, so that they can be caught by the adversaries
103 | def bound(x):
104 | if x < 0.9:
105 | return 0
106 | if x < 1.0:
107 | return (x - 0.9) * 10
108 | return min(np.exp(2 * x - 2), 10)
109 | for p in range(world.dim_p):
110 | x = abs(agent.state.p_pos[p])
111 | rew -= bound(x)
112 |
113 | return rew
114 |
115 | def adversary_reward(self, agent, world):
116 | # Adversaries are rewarded for collisions with agents
117 | rew = 0
118 | shape = False
119 | agents = self.good_agents(world)
120 | adversaries = self.adversaries(world)
121 | if shape: # reward can optionally be shaped (decreased reward for increased distance from agents)
122 | for adv in adversaries:
123 | rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents])
124 | if agent.collide:
125 | for ag in agents:
126 | for adv in adversaries:
127 | if self.is_collision(ag, adv):
128 | rew += 10
129 | return rew
130 |
131 | def observation(self, agent, world):
132 | # get positions of all entities in this agent's reference frame
133 | entity_pos = []
134 | for entity in world.landmarks:
135 | if not entity.boundary:
136 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
137 | # communication of all other agents
138 | comm = []
139 | other_pos = []
140 | other_vel = []
141 | for other in world.agents:
142 | if other is agent: continue
143 | comm.append(other.state.c)
144 | other_pos.append(other.state.p_pos - agent.state.p_pos)
145 | if not other.adversary:
146 | other_vel.append(other.state.p_vel)
147 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
148 |
--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_tag_v1.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 | def make_world(self):
8 | world = World()
9 | # set any world properties first
10 | world.dim_c = 2
11 | num_good_agents = 1
12 | num_adversaries = 2
13 | num_agents = num_adversaries + num_good_agents
14 | num_landmarks = 0
15 | # add agents
16 | world.agents = [Agent() for i in range(num_agents)]
17 | for i, agent in enumerate(world.agents):
18 | agent.name = 'agent %d' % i
19 | agent.collide = True
20 | agent.silent = True
21 | agent.adversary = True if i < num_adversaries else False
22 | agent.size = 0.075 if agent.adversary else 0.05
23 | agent.accel = 3.0 if agent.adversary else 4.0
24 | #agent.accel = 20.0 if agent.adversary else 25.0
25 | agent.max_speed = 1.0 if agent.adversary else 1.3
26 | # add landmarks
27 | world.landmarks = [Landmark() for i in range(num_landmarks)]
28 | for i, landmark in enumerate(world.landmarks):
29 | landmark.name = 'landmark %d' % i
30 | landmark.collide = True
31 | landmark.movable = False
32 | landmark.size = 0.2
33 | landmark.boundary = False
34 | # make initial conditions
35 | self.reset_world(world)
36 | return world
37 |
38 |
39 | def reset_world(self, world):
40 | # random properties for agents
41 | for i, agent in enumerate(world.agents):
42 | agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35])
43 | # random properties for landmarks
44 | for i, landmark in enumerate(world.landmarks):
45 | landmark.color = np.array([0.25, 0.25, 0.25])
46 | # set random initial states
47 | for agent in world.agents:
48 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
49 | agent.state.p_vel = np.zeros(world.dim_p)
50 | agent.state.c = np.zeros(world.dim_c)
51 | for i, landmark in enumerate(world.landmarks):
52 | if not landmark.boundary:
53 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
54 | landmark.state.p_vel = np.zeros(world.dim_p)
55 |
56 |
57 | def benchmark_data(self, agent, world):
58 | # returns data for benchmarking purposes
59 | if agent.adversary:
60 | collisions = 0
61 | for a in self.good_agents(world):
62 | if self.is_collision(a, agent):
63 | collisions += 1
64 | return collisions
65 | else:
66 | return 0
67 |
68 |
69 | def is_collision(self, agent1, agent2):
70 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
71 | dist = np.sqrt(np.sum(np.square(delta_pos)))
72 | dist_min = agent1.size + agent2.size
73 | return True if dist < dist_min else False
74 |
75 | # return all agents that are not adversaries
76 | def good_agents(self, world):
77 | return [agent for agent in world.agents if not agent.adversary]
78 |
79 | # return all adversarial agents
80 | def adversaries(self, world):
81 | return [agent for agent in world.agents if agent.adversary]
82 |
83 |
84 | def reward(self, agent, world):
85 | # Agents are rewarded based on minimum agent distance to each landmark
86 | main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
87 | return main_reward
88 |
89 | def agent_reward(self, agent, world):
90 | # Agents are negatively rewarded if caught by adversaries
91 | rew = 0
92 | #shape = False
93 | shape = True
94 | adversaries = self.adversaries(world)
95 | if shape: # reward can optionally be shaped (increased reward for increased distance from adversary)
96 | for adv in adversaries:
97 | rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
98 | if agent.collide:
99 | for a in adversaries:
100 | if self.is_collision(a, agent):
101 | rew -= 10
102 |
103 | # agents are penalized for exiting the screen, so that they can be caught by the adversaries
104 | def bound(x):
105 | if x < 0.9:
106 | return 0
107 | if x < 1.0:
108 | return (x - 0.9) * 10
109 | return min(np.exp(2 * x - 2), 10)
110 | for p in range(world.dim_p):
111 | x = abs(agent.state.p_pos[p])
112 | rew -= bound(x)
113 |
114 | return rew
115 |
116 | def adversary_reward(self, agent, world):
117 | # Adversaries are rewarded for collisions with agents
118 | rew = 0
119 | #shape = False
120 | shape = True
121 | agents = self.good_agents(world)
122 | adversaries = self.adversaries(world)
123 | if shape: # reward can optionally be shaped (decreased reward for increased distance from agents)
124 | for adv in adversaries:
125 | rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents])
126 | if agent.collide:
127 | for ag in agents:
128 | for adv in adversaries:
129 | if self.is_collision(ag, adv):
130 | rew += 10
131 | return rew
132 |
133 | def observation(self, agent, world):
134 | # get positions of all entities in this agent's reference frame
135 | entity_pos = []
136 | for entity in world.landmarks:
137 | if not entity.boundary:
138 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
139 | # communication of all other agents
140 | comm = []
141 | other_pos = []
142 | other_vel = []
143 | for other in world.agents:
144 | if other is agent: continue
145 | comm.append(other.state.c)
146 | other_pos.append(other.state.p_pos - agent.state.p_pos)
147 | if not other.adversary:
148 | other_vel.append(other.state.p_vel)
149 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
150 |
151 | def done(self, agent, world):
152 | for p in range(world.dim_p):
153 | x = abs(agent.state.p_pos[p])
154 | if (x > 1.0):
155 | return True
156 |
157 | return False
158 |
159 |
--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_world_comm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from multiagent.core import World, Agent, Landmark
3 | from multiagent.scenario import BaseScenario
4 |
5 |
6 | class Scenario(BaseScenario):
7 | def make_world(self):
8 | world = World()
9 | # set any world properties first
10 | world.dim_c = 4
11 | #world.damping = 1
12 | num_good_agents = 2
13 | num_adversaries = 4
14 | num_agents = num_adversaries + num_good_agents
15 | num_landmarks = 1
16 | num_food = 2
17 | num_forests = 2
18 | # add agents
19 | world.agents = [Agent() for i in range(num_agents)]
20 | for i, agent in enumerate(world.agents):
21 | agent.name = 'agent %d' % i
22 | agent.collide = True
23 | agent.leader = True if i == 0 else False
24 | agent.silent = True if i > 0 else False
25 | agent.adversary = True if i < num_adversaries else False
26 | agent.size = 0.075 if agent.adversary else 0.045
27 | agent.accel = 3.0 if agent.adversary else 4.0
28 | #agent.accel = 20.0 if agent.adversary else 25.0
29 | agent.max_speed = 1.0 if agent.adversary else 1.3
30 | # add landmarks
31 | world.landmarks = [Landmark() for i in range(num_landmarks)]
32 | for i, landmark in enumerate(world.landmarks):
33 | landmark.name = 'landmark %d' % i
34 | landmark.collide = True
35 | landmark.movable = False
36 | landmark.size = 0.2
37 | landmark.boundary = False
38 | world.food = [Landmark() for i in range(num_food)]
39 | for i, landmark in enumerate(world.food):
40 | landmark.name = 'food %d' % i
41 | landmark.collide = False
42 | landmark.movable = False
43 | landmark.size = 0.03
44 | landmark.boundary = False
45 | world.forests = [Landmark() for i in range(num_forests)]
46 | for i, landmark in enumerate(world.forests):
47 | landmark.name = 'forest %d' % i
48 | landmark.collide = False
49 | landmark.movable = False
50 | landmark.size = 0.3
51 | landmark.boundary = False
52 | world.landmarks += world.food
53 | world.landmarks += world.forests
54 | #world.landmarks += self.set_boundaries(world) # world boundaries now penalized with negative reward
55 | # make initial conditions
56 | self.reset_world(world)
57 | return world
58 |
59 | def set_boundaries(self, world):
60 | boundary_list = []
61 | landmark_size = 1
62 | edge = 1 + landmark_size
63 | num_landmarks = int(edge * 2 / landmark_size)
64 | for x_pos in [-edge, edge]:
65 | for i in range(num_landmarks):
66 | l = Landmark()
67 | l.state.p_pos = np.array([x_pos, -1 + i * landmark_size])
68 | boundary_list.append(l)
69 |
70 | for y_pos in [-edge, edge]:
71 | for i in range(num_landmarks):
72 | l = Landmark()
73 | l.state.p_pos = np.array([-1 + i * landmark_size, y_pos])
74 | boundary_list.append(l)
75 |
76 | for i, l in enumerate(boundary_list):
77 | l.name = 'boundary %d' % i
78 | l.collide = True
79 | l.movable = False
80 | l.boundary = True
81 | l.color = np.array([0.75, 0.75, 0.75])
82 | l.size = landmark_size
83 | l.state.p_vel = np.zeros(world.dim_p)
84 |
85 | return boundary_list
86 |
87 |
88 | def reset_world(self, world):
89 | # random properties for agents
90 | for i, agent in enumerate(world.agents):
91 | agent.color = np.array([0.45, 0.95, 0.45]) if not agent.adversary else np.array([0.95, 0.45, 0.45])
92 | agent.color -= np.array([0.3, 0.3, 0.3]) if agent.leader else np.array([0, 0, 0])
93 | # random properties for landmarks
94 | for i, landmark in enumerate(world.landmarks):
95 | landmark.color = np.array([0.25, 0.25, 0.25])
96 | for i, landmark in enumerate(world.food):
97 | landmark.color = np.array([0.15, 0.15, 0.65])
98 | for i, landmark in enumerate(world.forests):
99 | landmark.color = np.array([0.6, 0.9, 0.6])
100 | # set random initial states
101 | for agent in world.agents:
102 | agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
103 | agent.state.p_vel = np.zeros(world.dim_p)
104 | agent.state.c = np.zeros(world.dim_c)
105 | for i, landmark in enumerate(world.landmarks):
106 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
107 | landmark.state.p_vel = np.zeros(world.dim_p)
108 | for i, landmark in enumerate(world.food):
109 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
110 | landmark.state.p_vel = np.zeros(world.dim_p)
111 | for i, landmark in enumerate(world.forests):
112 | landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
113 | landmark.state.p_vel = np.zeros(world.dim_p)
114 |
115 | def benchmark_data(self, agent, world):
116 | if agent.adversary:
117 | collisions = 0
118 | for a in self.good_agents(world):
119 | if self.is_collision(a, agent):
120 | collisions += 1
121 | return collisions
122 | else:
123 | return 0
124 |
125 |
126 | def is_collision(self, agent1, agent2):
127 | delta_pos = agent1.state.p_pos - agent2.state.p_pos
128 | dist = np.sqrt(np.sum(np.square(delta_pos)))
129 | dist_min = agent1.size + agent2.size
130 | return True if dist < dist_min else False
131 |
132 |
133 | # return all agents that are not adversaries
134 | def good_agents(self, world):
135 | return [agent for agent in world.agents if not agent.adversary]
136 |
137 | # return all adversarial agents
138 | def adversaries(self, world):
139 | return [agent for agent in world.agents if agent.adversary]
140 |
141 |
142 | def reward(self, agent, world):
143 | # Agents are rewarded based on minimum agent distance to each landmark
144 | #boundary_reward = -10 if self.outside_boundary(agent) else 0
145 | main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
146 | return main_reward
147 |
148 | def outside_boundary(self, agent):
149 | if agent.state.p_pos[0] > 1 or agent.state.p_pos[0] < -1 or agent.state.p_pos[1] > 1 or agent.state.p_pos[1] < -1:
150 | return True
151 | else:
152 | return False
153 |
154 |
155 | def agent_reward(self, agent, world):
156 | # Agents are rewarded based on minimum agent distance to each landmark
157 | rew = 0
158 | shape = False
159 | adversaries = self.adversaries(world)
160 | if shape:
161 | for adv in adversaries:
162 | rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
163 | if agent.collide:
164 | for a in adversaries:
165 | if self.is_collision(a, agent):
166 | rew -= 5
167 | def bound(x):
168 | if x < 0.9:
169 | return 0
170 | if x < 1.0:
171 | return (x - 0.9) * 10
172 | return min(np.exp(2 * x - 2), 10) # 1 + (x - 1) * (x - 1)
173 |
174 | for p in range(world.dim_p):
175 | x = abs(agent.state.p_pos[p])
176 | rew -= 2 * bound(x)
177 |
178 | for food in world.food:
179 | if self.is_collision(agent, food):
180 | rew += 2
181 | rew += 0.05 * min([np.sqrt(np.sum(np.square(food.state.p_pos - agent.state.p_pos))) for food in world.food])
182 |
183 | return rew
184 |
185 | def adversary_reward(self, agent, world):
186 | # Agents are rewarded based on minimum agent distance to each landmark
187 | rew = 0
188 | shape = True
189 | agents = self.good_agents(world)
190 | adversaries = self.adversaries(world)
191 | if shape:
192 | rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in agents])
193 | if agent.collide:
194 | for ag in agents:
195 | for adv in adversaries:
196 | if self.is_collision(ag, adv):
197 | rew += 5
198 | return rew
199 |
200 |
201 | def observation2(self, agent, world):
202 | # get positions of all entities in this agent's reference frame
203 | entity_pos = []
204 | for entity in world.landmarks:
205 | if not entity.boundary:
206 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
207 |
208 | food_pos = []
209 | for entity in world.food:
210 | if not entity.boundary:
211 | food_pos.append(entity.state.p_pos - agent.state.p_pos)
212 | # communication of all other agents
213 | comm = []
214 | other_pos = []
215 | other_vel = []
216 | for other in world.agents:
217 | if other is agent: continue
218 | comm.append(other.state.c)
219 | other_pos.append(other.state.p_pos - agent.state.p_pos)
220 | if not other.adversary:
221 | other_vel.append(other.state.p_vel)
222 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
223 |
224 | def observation(self, agent, world):
225 | # get positions of all entities in this agent's reference frame
226 | entity_pos = []
227 | for entity in world.landmarks:
228 | if not entity.boundary:
229 | entity_pos.append(entity.state.p_pos - agent.state.p_pos)
230 |
231 | in_forest = [np.array([-1]), np.array([-1])]
232 | inf1 = False
233 | inf2 = False
234 | if self.is_collision(agent, world.forests[0]):
235 | in_forest[0] = np.array([1])
236 | inf1= True
237 | if self.is_collision(agent, world.forests[1]):
238 | in_forest[1] = np.array([1])
239 | inf2 = True
240 |
241 | food_pos = []
242 | for entity in world.food:
243 | if not entity.boundary:
244 | food_pos.append(entity.state.p_pos - agent.state.p_pos)
245 | # communication of all other agents
246 | comm = []
247 | other_pos = []
248 | other_vel = []
249 | for other in world.agents:
250 | if other is agent: continue
251 | comm.append(other.state.c)
252 | oth_f1 = self.is_collision(other, world.forests[0])
253 | oth_f2 = self.is_collision(other, world.forests[1])
254 | if (inf1 and oth_f1) or (inf2 and oth_f2) or (not inf1 and not oth_f1 and not inf2 and not oth_f2) or agent.leader: #without forest vis
255 | other_pos.append(other.state.p_pos - agent.state.p_pos)
256 | if not other.adversary:
257 | other_vel.append(other.state.p_vel)
258 | else:
259 | other_pos.append([0, 0])
260 | if not other.adversary:
261 | other_vel.append([0, 0])
262 |
263 | # to tell the pred when the prey are in the forest
264 | prey_forest = []
265 | ga = self.good_agents(world)
266 | for a in ga:
267 | if any([self.is_collision(a, f) for f in world.forests]):
268 | prey_forest.append(np.array([1]))
269 | else:
270 | prey_forest.append(np.array([-1]))
271 | # to tell leader when pred are in forest
272 | prey_forest_lead = []
273 | for f in world.forests:
274 | if any([self.is_collision(a, f) for a in ga]):
275 | prey_forest_lead.append(np.array([1]))
276 | else:
277 | prey_forest_lead.append(np.array([-1]))
278 |
279 | comm = [world.agents[0].state.c]
280 |
281 | if agent.adversary and not agent.leader:
282 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm)
283 | if agent.leader:
284 | return np.concatenate(
285 | [agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm)
286 | else:
287 | return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + in_forest + other_vel)
288 |
289 |
290 |
--------------------------------------------------------------------------------
/MADQN/readme.txt:
--------------------------------------------------------------------------------
1 | In this project we test the MADQN.
--------------------------------------------------------------------------------
/MADQN/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(name='multiagent',
4 | version='0.0.1',
5 | description='Multi-Agent Goal-Driven Communication Environment',
6 | url='https://github.com/openai/multiagent-public',
7 | author='Igor Mordatch',
8 | author_email='mordatch@openai.com',
9 | packages=find_packages(),
10 | include_package_data=True,
11 | zip_safe=False,
12 | install_requires=['gym', 'numpy-stl']
13 | )
14 |
--------------------------------------------------------------------------------
/MADQN/test/results/dqn_1v2/save/run_parameters.json:
--------------------------------------------------------------------------------
1 | {"random_seed": 2, "render": false, "learning_rate": 0.001, "testing": false, "benchmark": false, "batch_size": 128, "epsilon_greedy": null, "csv_filename_prefix": "/save/statistics-dqn", "episodes": 60000, "weights_filename_prefix": "/save/tag-dqn", "memory_size": 10000, "env": "simple_tag_v1", "experiment_prefix": "./results/dqn_1v2/", "checkpoint_frequency": 500}
--------------------------------------------------------------------------------
/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_0.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_0.h5
--------------------------------------------------------------------------------
/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_1.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_1.h5
--------------------------------------------------------------------------------
/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_2.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_2.h5
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Multiagent reinforcement learning algorithms for multiple-UAV confrontation
2 | This is the source code of "Efficient training techniques for multi-agent reinforcement learning in combatant tasks",
3 | we construct a multi-agent confrontation environment originated from a combatant scenario of multiple unman aerial vehicles.
4 | To begin with, we consider to solve this confrontation problem with two types of MARL algorithms.
5 | One is extended from the classical deep Q-network for multi-agent settings (MADQN).
6 | The other one is extended from the state-of-art multi-agent reinforcement method, multi-agent deep deterministic policy gradient (MADDPG).
7 | We compare the two methods for the initial confrontation scenario and find that MADDPG outperforms MADQN.
8 | Then with MADDPG as the baseline, we propose three efficient training techniques, i.e., scenario-transfer training, self-play training and rule-coupled training.
9 |
10 | 
11 |
12 | Rule-coupled red agents vs Random-move blue agents
13 |
14 | 
15 |
16 | Rule-coupled red agents vs Blue agents trained by self-play
17 |
--------------------------------------------------------------------------------
/Rule-coupled vs Random.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/Rule-coupled vs Random.gif
--------------------------------------------------------------------------------
/Rule-coupled vs Selfplay.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/Rule-coupled vs Selfplay.gif
--------------------------------------------------------------------------------