├── MADDPG
    ├── .idea
    │   ├── encodings.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   ├── openai-maddpg.iml
    │   ├── vcs.xml
    │   └── workspace.xml
    ├── maddpg
    │   ├── .DS_Store
    │   ├── .vscode
    │   │   └── settings.json
    │   ├── LICENSE.txt
    │   ├── README.md
    │   ├── checkpoint
    │   ├── maddpg.egg-info
    │   │   ├── PKG-INFO
    │   │   ├── SOURCES.txt
    │   │   ├── dependency_links.txt
    │   │   ├── not-zip-safe
    │   │   ├── requires.txt
    │   │   └── top_level.txt
    │   ├── maddpg
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-35.pyc
    │   │   ├── common
    │   │   │   ├── __pycache__
    │   │   │   │   ├── distributions.cpython-35.pyc
    │   │   │   │   └── tf_util.cpython-35.pyc
    │   │   │   ├── distributions.py
    │   │   │   └── tf_util.py
    │   │   └── trainer
    │   │   │   ├── __pycache__
    │   │   │       ├── maddpg.cpython-35.pyc
    │   │   │       └── replay_buffer.cpython-35.pyc
    │   │   │   ├── maddpg.py
    │   │   │   └── replay_buffer.py
    │   └── setup.py
    ├── multiagent-particle-envs
    │   ├── .DS_Store
    │   ├── .vscode
    │   │   └── settings.json
    │   ├── LICENSE.txt
    │   ├── README.md
    │   ├── bin
    │   │   ├── __init__.py
    │   │   └── interactive.py
    │   ├── multiagent.egg-info
    │   │   ├── PKG-INFO
    │   │   ├── SOURCES.txt
    │   │   ├── dependency_links.txt
    │   │   ├── not-zip-safe
    │   │   ├── requires.txt
    │   │   └── top_level.txt
    │   ├── multiagent
    │   │   ├── .DS_Store
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   ├── core.cpython-35.pyc
    │   │   │   ├── environment.cpython-35.pyc
    │   │   │   ├── multi_discrete.cpython-35.pyc
    │   │   │   ├── rendering.cpython-35.pyc
    │   │   │   └── scenario.cpython-35.pyc
    │   │   ├── core.py
    │   │   ├── environment-tmp.py
    │   │   ├── environment.py
    │   │   ├── multi_discrete.py
    │   │   ├── policy.py
    │   │   ├── rendering.py
    │   │   ├── scenario.py
    │   │   └── scenarios
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │       ├── __init__.cpython-35.pyc
    │   │   │       ├── competition_3v3.cpython-35.pyc
    │   │   │       ├── simple.cpython-35.pyc
    │   │   │       ├── simple_tag_v1.cpython-35.pyc
    │   │   │       └── simple_tag_yuan_v2.cpython-35.pyc
    │   │   │   ├── angle_3v3.py
    │   │   │   ├── competition_3v3-tmp.py
    │   │   │   ├── competition_3v3.py
    │   │   │   ├── simple.py
    │   │   │   ├── simple_adversary.py
    │   │   │   ├── simple_crypto.py
    │   │   │   ├── simple_push.py
    │   │   │   ├── simple_reference.py
    │   │   │   ├── simple_speaker_listener.py
    │   │   │   ├── simple_spread.py
    │   │   │   ├── simple_tag_v1.py
    │   │   │   └── simple_world_comm.py
    │   └── setup.py
    └── reward setting
├── MADQN
    ├── .gitignore
    ├── LICENSE.txt
    ├── README.md
    ├── bin
    │   ├── __init__.py
    │   └── interactive.py
    ├── dqn.py
    ├── dqn_tag.py
    ├── make_env.py
    ├── multiagent
    │   ├── __init__.py
    │   ├── core.py
    │   ├── environment.py
    │   ├── multi_discrete.py
    │   ├── policy.py
    │   ├── rendering.py
    │   ├── scenario.py
    │   └── scenarios
    │   │   ├── __init__.py
    │   │   ├── simple.py
    │   │   ├── simple_adversary.py
    │   │   ├── simple_crypto.py
    │   │   ├── simple_push.py
    │   │   ├── simple_reference.py
    │   │   ├── simple_speaker_listener.py
    │   │   ├── simple_spread.py
    │   │   ├── simple_tag.py
    │   │   ├── simple_tag_v1.py
    │   │   └── simple_world_comm.py
    ├── readme.txt
    ├── setup.py
    └── test
    │   └── results
    │       └── dqn_1v2
    │           └── save
    │               ├── run_parameters.json
    │               ├── tag-dqn_21500_0.h5
    │               ├── tag-dqn_21500_1.h5
    │               └── tag-dqn_21500_2.h5
├── README.md
├── Rule-coupled vs Random.gif
└── Rule-coupled vs Selfplay.gif


/MADDPG/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="Encoding" addBOMForNewFiles="with NO BOM" />
4 | </project>


--------------------------------------------------------------------------------
/MADDPG/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/MADDPG/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/openai-maddpg.iml" filepath="$PROJECT_DIR$/.idea/openai-maddpg.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/MADDPG/.idea/openai-maddpg.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/MADDPG/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$/maddpg" vcs="Git" />
5 |     <mapping directory="$PROJECT_DIR$/multiagent-particle-envs" vcs="Git" />
6 |   </component>
7 | </project>


--------------------------------------------------------------------------------
/MADDPG/.idea/workspace.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project version="4">
  3 |   <component name="ChangeListManager">
  4 |     <list default="true" id="f45b6dd9-a2ba-41f9-8ae9-47850b9007b4" name="Default Changelist" comment="" />
  5 |     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
  6 |     <option name="SHOW_DIALOG" value="false" />
  7 |     <option name="HIGHLIGHT_CONFLICTS" value="true" />
  8 |     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
  9 |     <option name="LAST_RESOLUTION" value="IGNORE" />
 10 |   </component>
 11 |   <component name="FileEditorManager">
 12 |     <leaf>
 13 |       <file pinned="false" current-in-tab="false">
 14 |         <entry file="file://$PROJECT_DIR$/maddpg/experiments/train.py">
 15 |           <provider selected="true" editor-type-id="text-editor">
 16 |             <state relative-caret-position="-1516">
 17 |               <caret line="317" column="44" selection-start-line="317" selection-start-column="44" selection-end-line="317" selection-end-column="44" />
 18 |               <folding>
 19 |                 <element signature="e#0#15#0" expanded="true" />
 20 |               </folding>
 21 |             </state>
 22 |           </provider>
 23 |         </entry>
 24 |       </file>
 25 |       <file pinned="false" current-in-tab="true">
 26 |         <entry file="file://$PROJECT_DIR$/multiagent-particle-envs/multiagent/scenarios/competition_3v3.py">
 27 |           <provider selected="true" editor-type-id="text-editor">
 28 |             <state relative-caret-position="-7414">
 29 |               <caret line="29" column="32" lean-forward="true" selection-start-line="29" selection-start-column="32" selection-end-line="29" selection-end-column="32" />
 30 |               <folding>
 31 |                 <element signature="e#0#18#0" expanded="true" />
 32 |               </folding>
 33 |             </state>
 34 |           </provider>
 35 |         </entry>
 36 |       </file>
 37 |     </leaf>
 38 |   </component>
 39 |   <component name="FindInProjectRecents">
 40 |     <findStrings>
 41 |       <find>1000</find>
 42 |     </findStrings>
 43 |   </component>
 44 |   <component name="Git.Settings">
 45 |     <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/multiagent-particle-envs" />
 46 |   </component>
 47 |   <component name="IdeDocumentHistory">
 48 |     <option name="CHANGED_PATHS">
 49 |       <list>
 50 |         <option value="$PROJECT_DIR$/maddpg/experiments/train.py" />
 51 |         <option value="$PROJECT_DIR$/multiagent-particle-envs/multiagent/scenarios/competition_3v3.py" />
 52 |       </list>
 53 |     </option>
 54 |   </component>
 55 |   <component name="ProjectFrameBounds" extendedState="6">
 56 |     <option name="x" value="163" />
 57 |     <option name="y" value="7" />
 58 |     <option name="width" value="1388" />
 59 |     <option name="height" value="878" />
 60 |   </component>
 61 |   <component name="ProjectLevelVcsManager" settingsEditedManually="true" />
 62 |   <component name="ProjectView">
 63 |     <navigator proportions="" version="1">
 64 |       <foldersAlwaysOnTop value="true" />
 65 |     </navigator>
 66 |     <panes>
 67 |       <pane id="ProjectPane">
 68 |         <subPane>
 69 |           <expand>
 70 |             <path>
 71 |               <item name="openai-maddpg" type="b2602c69:ProjectViewProjectNode" />
 72 |               <item name="openai-maddpg" type="462c0819:PsiDirectoryNode" />
 73 |             </path>
 74 |             <path>
 75 |               <item name="openai-maddpg" type="b2602c69:ProjectViewProjectNode" />
 76 |               <item name="openai-maddpg" type="462c0819:PsiDirectoryNode" />
 77 |               <item name="maddpg" type="462c0819:PsiDirectoryNode" />
 78 |             </path>
 79 |             <path>
 80 |               <item name="openai-maddpg" type="b2602c69:ProjectViewProjectNode" />
 81 |               <item name="openai-maddpg" type="462c0819:PsiDirectoryNode" />
 82 |               <item name="maddpg" type="462c0819:PsiDirectoryNode" />
 83 |               <item name="experiments" type="462c0819:PsiDirectoryNode" />
 84 |             </path>
 85 |             <path>
 86 |               <item name="openai-maddpg" type="b2602c69:ProjectViewProjectNode" />
 87 |               <item name="openai-maddpg" type="462c0819:PsiDirectoryNode" />
 88 |               <item name="multiagent-particle-envs" type="462c0819:PsiDirectoryNode" />
 89 |             </path>
 90 |             <path>
 91 |               <item name="openai-maddpg" type="b2602c69:ProjectViewProjectNode" />
 92 |               <item name="openai-maddpg" type="462c0819:PsiDirectoryNode" />
 93 |               <item name="multiagent-particle-envs" type="462c0819:PsiDirectoryNode" />
 94 |               <item name="multiagent" type="462c0819:PsiDirectoryNode" />
 95 |             </path>
 96 |             <path>
 97 |               <item name="openai-maddpg" type="b2602c69:ProjectViewProjectNode" />
 98 |               <item name="openai-maddpg" type="462c0819:PsiDirectoryNode" />
 99 |               <item name="multiagent-particle-envs" type="462c0819:PsiDirectoryNode" />
100 |               <item name="multiagent" type="462c0819:PsiDirectoryNode" />
101 |               <item name="scenarios" type="462c0819:PsiDirectoryNode" />
102 |             </path>
103 |           </expand>
104 |           <select />
105 |         </subPane>
106 |       </pane>
107 |       <pane id="Scope" />
108 |     </panes>
109 |   </component>
110 |   <component name="RunDashboard">
111 |     <option name="ruleStates">
112 |       <list>
113 |         <RuleState>
114 |           <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
115 |         </RuleState>
116 |         <RuleState>
117 |           <option name="name" value="StatusDashboardGroupingRule" />
118 |         </RuleState>
119 |       </list>
120 |     </option>
121 |   </component>
122 |   <component name="SvnConfiguration">
123 |     <configuration />
124 |   </component>
125 |   <component name="TaskManager">
126 |     <task active="true" id="Default" summary="Default task">
127 |       <changelist id="f45b6dd9-a2ba-41f9-8ae9-47850b9007b4" name="Default Changelist" comment="" />
128 |       <created>1547086714070</created>
129 |       <option name="number" value="Default" />
130 |       <option name="presentableId" value="Default" />
131 |       <updated>1547086714070</updated>
132 |     </task>
133 |     <servers />
134 |   </component>
135 |   <component name="ToolWindowManager">
136 |     <frame x="65" y="-4" width="1855" height="954" extended-state="6" />
137 |     <editor active="true" />
138 |     <layout>
139 |       <window_info id="Favorites" side_tool="true" />
140 |       <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24959038" />
141 |       <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
142 |       <window_info anchor="bottom" id="Version Control" />
143 |       <window_info anchor="bottom" id="Python Console" />
144 |       <window_info anchor="bottom" id="Terminal" />
145 |       <window_info anchor="bottom" id="Event Log" side_tool="true" />
146 |       <window_info anchor="bottom" id="Message" order="0" />
147 |       <window_info anchor="bottom" id="Find" order="1" />
148 |       <window_info anchor="bottom" id="Run" order="2" />
149 |       <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
150 |       <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
151 |       <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
152 |       <window_info anchor="bottom" id="TODO" order="6" />
153 |       <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
154 |       <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
155 |       <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
156 |     </layout>
157 |   </component>
158 |   <component name="editorHistoryManager">
159 |     <entry file="file://$PROJECT_DIR$/maddpg/experiments/train.py">
160 |       <provider selected="true" editor-type-id="text-editor">
161 |         <state relative-caret-position="-1516">
162 |           <caret line="317" column="44" selection-start-line="317" selection-start-column="44" selection-end-line="317" selection-end-column="44" />
163 |           <folding>
164 |             <element signature="e#0#15#0" expanded="true" />
165 |           </folding>
166 |         </state>
167 |       </provider>
168 |     </entry>
169 |     <entry file="file://$PROJECT_DIR$/multiagent-particle-envs/multiagent/scenarios/competition_3v3.py">
170 |       <provider selected="true" editor-type-id="text-editor">
171 |         <state relative-caret-position="-7414">
172 |           <caret line="29" column="32" lean-forward="true" selection-start-line="29" selection-start-column="32" selection-end-line="29" selection-end-column="32" />
173 |           <folding>
174 |             <element signature="e#0#18#0" expanded="true" />
175 |           </folding>
176 |         </state>
177 |       </provider>
178 |     </entry>
179 |   </component>
180 | </project>


--------------------------------------------------------------------------------
/MADDPG/maddpg/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/.DS_Store


--------------------------------------------------------------------------------
/MADDPG/maddpg/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.pythonPath": "/Users/xunyunliu/anaconda3/envs/tf/bin/python"
3 | }


--------------------------------------------------------------------------------
/MADDPG/maddpg/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MADDPG/maddpg/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/MADDPG/maddpg/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "/home/airc/python_code/openai-maddpg/maddpg/experiments"
2 | all_model_checkpoint_paths: "/home/airc/python_code/openai-maddpg/maddpg/experiments"
3 | 


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: maddpg
 3 | Version: 0.0.1
 4 | Summary: Multi-Agent Deep Deterministic Policy Gradient
 5 | Home-page: https://github.com/openai/maddpg
 6 | Author: Igor Mordatch
 7 | Author-email: mordatch@openai.com
 8 | License: UNKNOWN
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | README.md
2 | setup.py
3 | maddpg/__init__.py
4 | maddpg.egg-info/PKG-INFO
5 | maddpg.egg-info/SOURCES.txt
6 | maddpg.egg-info/dependency_links.txt
7 | maddpg.egg-info/not-zip-safe
8 | maddpg.egg-info/requires.txt
9 | maddpg.egg-info/top_level.txt


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/not-zip-safe:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | gym
2 | numpy-stl
3 | 


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | maddpg
2 | 


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/__init__.py:
--------------------------------------------------------------------------------
 1 | class AgentTrainer(object):
 2 |     def __init__(self, name, model, obs_shape, act_space, args):
 3 |         raise NotImplemented()
 4 | 
 5 |     def action(self, obs):
 6 |         raise NotImplemented()
 7 | 
 8 |     def process_experience(self, obs, act, rew, new_obs, done, terminal):
 9 |         raise NotImplemented()
10 | 
11 |     def preupdate(self):
12 |         raise NotImplemented()
13 | 
14 |     def update(self, agents):
15 |         raise NotImplemented()


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/maddpg/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/common/__pycache__/distributions.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/maddpg/common/__pycache__/distributions.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/common/__pycache__/tf_util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/maddpg/common/__pycache__/tf_util.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/trainer/__pycache__/maddpg.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/maddpg/trainer/__pycache__/maddpg.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/trainer/__pycache__/replay_buffer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/maddpg/maddpg/trainer/__pycache__/replay_buffer.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/trainer/maddpg.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import tensorflow as tf
  4 | import maddpg.common.tf_util as U
  5 | 
  6 | from maddpg.common.distributions import make_pdtype
  7 | from maddpg import AgentTrainer
  8 | from maddpg.trainer.replay_buffer import ReplayBuffer
  9 | 
 10 | 
 11 | def discount_with_dones(rewards, dones, gamma):
 12 |     discounted = []
 13 |     r = 0
 14 |     for reward, done in zip(rewards[::-1], dones[::-1]):
 15 |         r = reward + gamma*r
 16 |         r = r*(1.-done)
 17 |         discounted.append(r)
 18 |     return discounted[::-1]
 19 | 
 20 | def make_update_exp(vals, target_vals):
 21 |     polyak = 1.0 - 1e-2
 22 |     expression = []
 23 |     for var, var_target in zip(sorted(vals, key=lambda v: v.name), sorted(target_vals, key=lambda v: v.name)):
 24 |         expression.append(var_target.assign(polyak * var_target + (1.0-polyak) * var))
 25 |     expression = tf.group(*expression)
 26 |     return U.function([], [], updates=[expression])
 27 | 
 28 | def p_train(make_obs_ph_n, act_space_n, p_index, p_func, q_func, optimizer, grad_norm_clipping=None, local_q_func=False, num_units=64, scope="trainer", reuse=None):
 29 |     with tf.variable_scope(scope, reuse=reuse):
 30 |         # create distribtuions
 31 |         act_pdtype_n = [make_pdtype(act_space) for act_space in act_space_n]
 32 | 
 33 |         # set up placeholders
 34 |         obs_ph_n = make_obs_ph_n
 35 |         act_ph_n = [act_pdtype_n[i].sample_placeholder([None], name="action"+str(i)) for i in range(len(act_space_n))]
 36 | 
 37 |         p_input = obs_ph_n[p_index]
 38 | 
 39 |         p = p_func(p_input, int(act_pdtype_n[p_index].param_shape()[0]), scope="p_func", num_units=num_units)
 40 |         p_func_vars = U.scope_vars(U.absolute_scope_name("p_func"))
 41 | 
 42 |         # wrap parameters in distribution
 43 |         act_pd = act_pdtype_n[p_index].pdfromflat(p)
 44 | 
 45 |         act_sample = act_pd.sample()
 46 |         p_reg = tf.reduce_mean(tf.square(act_pd.flatparam()))
 47 | 
 48 |         act_input_n = act_ph_n + []
 49 |         act_input_n[p_index] = act_pd.sample()
 50 |         q_input = tf.concat(obs_ph_n + act_input_n, 1)
 51 |         if local_q_func:
 52 |             q_input = tf.concat([obs_ph_n[p_index], act_input_n[p_index]], 1)
 53 |         q = q_func(q_input, 1, scope="q_func", reuse=True, num_units=num_units)[:,0]
 54 |         pg_loss = -tf.reduce_mean(q)
 55 | 
 56 |         loss = pg_loss + p_reg * 1e-3
 57 | 
 58 |         optimize_expr = U.minimize_and_clip(optimizer, loss, p_func_vars, grad_norm_clipping)
 59 | 
 60 |         # Create callable functions
 61 |         train = U.function(inputs=obs_ph_n + act_ph_n, outputs=loss, updates=[optimize_expr])
 62 |         act = U.function(inputs=[obs_ph_n[p_index]], outputs=act_sample)
 63 | 
 64 |         act_test = U.function(inputs=[obs_ph_n[p_index]], outputs=p)
 65 | 
 66 |         p_values = U.function([obs_ph_n[p_index]], p)
 67 | 
 68 |         # target network
 69 |         target_p = p_func(p_input, int(act_pdtype_n[p_index].param_shape()[0]), scope="target_p_func", num_units=num_units)
 70 |         target_p_func_vars = U.scope_vars(U.absolute_scope_name("target_p_func"))
 71 |         update_target_p = make_update_exp(p_func_vars, target_p_func_vars)
 72 | 
 73 |         target_act_sample = act_pdtype_n[p_index].pdfromflat(target_p).sample()
 74 |         target_act = U.function(inputs=[obs_ph_n[p_index]], outputs=target_act_sample)
 75 | 
 76 |         return act_test, act, train, update_target_p, {'p_values': p_values, 'target_act': target_act, 'p_vars': p_func_vars, 'target_p_vars': target_p_func_vars}
 77 | 
 78 | def q_train(make_obs_ph_n, act_space_n, q_index, q_func, optimizer, grad_norm_clipping=None, local_q_func=False, scope="trainer", reuse=None, num_units=64):
 79 |     with tf.variable_scope(scope, reuse=reuse):
 80 |         # create distribtuions
 81 |         act_pdtype_n = [make_pdtype(act_space) for act_space in act_space_n]
 82 | 
 83 |         # set up placeholders
 84 |         obs_ph_n = make_obs_ph_n
 85 |         act_ph_n = [act_pdtype_n[i].sample_placeholder([None], name="action"+str(i)) for i in range(len(act_space_n))]
 86 |         target_ph = tf.placeholder(tf.float32, [None], name="target")
 87 | 
 88 |         q_input = tf.concat(obs_ph_n + act_ph_n, 1)
 89 |         if local_q_func:
 90 |             q_input = tf.concat([obs_ph_n[q_index], act_ph_n[q_index]], 1)
 91 |         q = q_func(q_input, 1, scope="q_func", num_units=num_units)[:,0]
 92 |         q_func_vars = U.scope_vars(U.absolute_scope_name("q_func"))
 93 | 
 94 |         q_loss = tf.reduce_mean(tf.square(q - target_ph))
 95 | 
 96 |         # viscosity solution to Bellman differential equation in place of an initial condition
 97 |         q_reg = tf.reduce_mean(tf.square(q))
 98 |         loss = q_loss #+ 1e-3 * q_reg
 99 | 
100 |         optimize_expr = U.minimize_and_clip(optimizer, loss, q_func_vars, grad_norm_clipping)
101 | 
102 |         # Create callable functions
103 |         train = U.function(inputs=obs_ph_n + act_ph_n + [target_ph], outputs=loss, updates=[optimize_expr])
104 |         q_values = U.function(obs_ph_n + act_ph_n, q)
105 | 
106 |         # target network
107 |         target_q = q_func(q_input, 1, scope="target_q_func", num_units=num_units)[:,0]
108 |         target_q_func_vars = U.scope_vars(U.absolute_scope_name("target_q_func"))
109 |         update_target_q = make_update_exp(q_func_vars, target_q_func_vars)
110 | 
111 |         target_q_values = U.function(obs_ph_n + act_ph_n, target_q)
112 | 
113 |         return train, update_target_q, {'q_values': q_values, 'target_q_values': target_q_values, 'q_vars': q_func_vars, 'target_q_vars': target_q_func_vars}
114 | 
115 | class MADDPGAgentTrainer(AgentTrainer):
116 |     def __init__(self, name, model, obs_shape_n, act_space_n, agent_index, args, local_q_func=False):
117 |         self.name = name
118 |         self.n = len(obs_shape_n) 
119 |         #self.n = args.num_adversaries########
120 | 
121 |         self.agent_index = agent_index
122 |         self.args = args
123 |         obs_ph_n = []
124 |         for i in range(self.n):
125 |             obs_ph_n.append(U.BatchInput(obs_shape_n[i], name="observation"+str(i)).get())
126 | 
127 |         # Create all the functions necessary to train the model
128 |         self.q_train, self.q_update, self.q_debug = q_train(
129 |             scope=self.name,
130 |             make_obs_ph_n=obs_ph_n,
131 |             act_space_n=act_space_n,
132 |             q_index=agent_index,
133 |             q_func=model,
134 |             optimizer=tf.train.AdamOptimizer(learning_rate=args.lr),
135 |             grad_norm_clipping=0.5,
136 |             local_q_func=local_q_func,
137 |             num_units=args.num_units
138 |         )
139 |         self.act_test, self.act, self.p_train, self.p_update, self.p_debug = p_train(
140 |             scope=self.name,
141 |             make_obs_ph_n=obs_ph_n,
142 |             act_space_n=act_space_n,
143 |             p_index=agent_index,
144 |             p_func=model,
145 |             q_func=model,
146 |             optimizer=tf.train.AdamOptimizer(learning_rate=args.lr),
147 |             grad_norm_clipping=0.5,
148 |             local_q_func=local_q_func,
149 |             num_units=args.num_units
150 |         )
151 |         # Create experience buffer
152 |         self.replay_buffer = ReplayBuffer(1e6)
153 |         self.max_replay_buffer_len = args.batch_size * args.max_episode_len
154 |         self.replay_sample_index = None
155 | 
156 |     def action(self, obs):
157 |         return self.act(obs[None])[0]
158 |     
159 |     def action_test(self, obs):
160 |         return self.act_test(obs[None])[0]
161 | 
162 |     def experience(self, obs, act, rew, new_obs, done, terminal):
163 |         # Store transition in the replay buffer.
164 |         self.replay_buffer.add(obs, act, rew, new_obs, float(done))
165 | 
166 |     def preupdate(self):
167 |         self.replay_sample_index = None
168 | 
169 |     def update(self, agents, t):
170 |         if len(self.replay_buffer) < self.max_replay_buffer_len: # replay buffer is not large enough
171 |             return
172 |         if not t % 100 == 0:  # only update every 100 steps
173 |             return
174 | 
175 |         self.replay_sample_index = self.replay_buffer.make_index(self.args.batch_size)
176 |         # collect replay sample from all agents
177 |         obs_n = []
178 |         obs_next_n = []
179 |         act_n = []
180 |         index = self.replay_sample_index
181 |         
182 |         for i in range(self.n):
183 |             ####changed by liyuan
184 |             #tmp_index = agents[i].replay_buffer.make_index(self.args.batch_size)
185 |             #obs, act, rew, obs_next, done = agents[i].replay_buffer.sample_index(tmp_index)
186 |             
187 |             obs, act, rew, obs_next, done = agents[i].replay_buffer.sample_index(index)
188 |             obs_n.append(obs)
189 |             obs_next_n.append(obs_next)
190 |             act_n.append(act)
191 |         obs, act, rew, obs_next, done = self.replay_buffer.sample_index(index)
192 | 
193 |         # train q network
194 |         num_sample = 1
195 |         target_q = 0.0
196 |         for j in range(num_sample):
197 |             '''
198 |             #####green nodes take dqn
199 |             target_act_next_n = []
200 |             for i in range(self.n):
201 |                 if i<self.args.num_adversaries:
202 |                     target_act_next_n.append(agents[i].p_debug['target_act'](obs_next_n[i]))
203 |                 else:
204 |                     target_act_next_n.append(agents[i].get_next_action(obs_next_n[i]))
205 |             '''
206 |             #####green nodes take ddpg
207 |             target_act_next_n = [agents[i].p_debug['target_act'](obs_next_n[i]) for i in range(self.n)]
208 | 
209 |             target_q_next = self.q_debug['target_q_values'](*(obs_next_n + target_act_next_n))
210 |             target_q += rew + self.args.gamma * (1.0 - done) * target_q_next
211 |         target_q /= num_sample
212 |         q_loss = self.q_train(*(obs_n + act_n + [target_q]))
213 | 
214 |         # train p network
215 |         p_loss = self.p_train(*(obs_n + act_n))
216 | 
217 |         self.p_update()
218 |         self.q_update()
219 | 
220 |         return [q_loss, p_loss, np.mean(target_q), np.mean(rew), np.mean(target_q_next), np.std(target_q)]
221 | 


--------------------------------------------------------------------------------
/MADDPG/maddpg/maddpg/trainer/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | 
 4 | class ReplayBuffer(object):
 5 |     def __init__(self, size):
 6 |         """Create Prioritized Replay buffer.
 7 | 
 8 |         Parameters
 9 |         ----------
10 |         size: int
11 |             Max number of transitions to store in the buffer. When the buffer
12 |             overflows the old memories are dropped.
13 |         """
14 |         self._storage = []
15 |         self._maxsize = int(size)
16 |         self._next_idx = 0
17 | 
18 |     def __len__(self):
19 |         return len(self._storage)
20 | 
21 |     def clear(self):
22 |         self._storage = []
23 |         self._next_idx = 0
24 | 
25 |     def add(self, obs_t, action, reward, obs_tp1, done):
26 |         data = (obs_t, action, reward, obs_tp1, done)
27 | 
28 |         if self._next_idx >= len(self._storage):
29 |             self._storage.append(data)
30 |         else:
31 |             self._storage[self._next_idx] = data
32 |         self._next_idx = (self._next_idx + 1) % self._maxsize
33 | 
34 |     def _encode_sample(self, idxes):
35 |         obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
36 |         for i in idxes:
37 |             data = self._storage[i]
38 |             obs_t, action, reward, obs_tp1, done = data
39 |             obses_t.append(np.array(obs_t, copy=False))
40 |             actions.append(np.array(action, copy=False))
41 |             rewards.append(reward)
42 |             obses_tp1.append(np.array(obs_tp1, copy=False))
43 |             dones.append(done)
44 |         return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones)
45 | 
46 |     def make_index(self, batch_size):
47 |         return [random.randint(0, len(self._storage) - 1) for _ in range(batch_size)]
48 | 
49 |     def make_latest_index(self, batch_size):
50 |         idx = [(self._next_idx - 1 - i) % self._maxsize for i in range(batch_size)]
51 |         np.random.shuffle(idx)
52 |         return idx
53 | 
54 |     def sample_index(self, idxes):
55 |         return self._encode_sample(idxes)
56 | 
57 |     def sample(self, batch_size):
58 |         """Sample a batch of experiences.
59 | 
60 |         Parameters
61 |         ----------
62 |         batch_size: int
63 |             How many transitions to sample.
64 | 
65 |         Returns
66 |         -------
67 |         obs_batch: np.array
68 |             batch of observations
69 |         act_batch: np.array
70 |             batch of actions executed given obs_batch
71 |         rew_batch: np.array
72 |             rewards received as results of executing act_batch
73 |         next_obs_batch: np.array
74 |             next set of observations seen after executing act_batch
75 |         done_mask: np.array
76 |             done_mask[i] = 1 if executing act_batch[i] resulted in
77 |             the end of an episode and 0 otherwise.
78 |         """
79 |         if batch_size > 0:
80 |             idxes = self.make_index(batch_size)
81 |         else:
82 |             idxes = range(0, len(self._storage))
83 |         return self._encode_sample(idxes)
84 | 
85 |     def collect(self):
86 |         return self.sample(-1)
87 | 


--------------------------------------------------------------------------------
/MADDPG/maddpg/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(name='maddpg',
 4 |       version='0.0.1',
 5 |       description='Multi-Agent Deep Deterministic Policy Gradient',
 6 |       url='https://github.com/openai/maddpg',
 7 |       author='Igor Mordatch',
 8 |       author_email='mordatch@openai.com',
 9 |       packages=find_packages(),
10 |       include_package_data=True,
11 |       zip_safe=False,
12 |       install_requires=['gym', 'numpy-stl']
13 | )
14 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/.DS_Store


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.pythonPath": "/Users/xunyunliu/anaconda3/envs/tf/bin/python"
3 | }


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/README.md:
--------------------------------------------------------------------------------
 1 | **Status:** Archive (code is provided as-is, no updates expected)
 2 | 
 3 | # Multi-Agent Particle Environment
 4 | 
 5 | A simple multi-agent particle world with a continuous observation and discrete action space, along with some basic simulated physics.
 6 | Used in the paper [Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments](https://arxiv.org/pdf/1706.02275.pdf).
 7 | 
 8 | ## Getting started:
 9 | 
10 | - To install, `cd` into the root directory and type `pip install -e .`
11 | 
12 | - To interactively view moving to landmark scenario (see others in ./scenarios/):
13 | `bin/interactive.py --scenario simple.py`
14 | 
15 | - Known dependencies: Python (3.5.4), OpenAI gym (0.10.5), numpy (1.14.5)
16 | 
17 | - To use the environments, look at the code for importing them in `make_env.py`.
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/bin/__init__.py


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/bin/interactive.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os,sys
 3 | sys.path.insert(1, os.path.join(sys.path[0], '..'))
 4 | import argparse
 5 | 
 6 | from multiagent.environment import MultiAgentEnv
 7 | from multiagent.policy import InteractivePolicy
 8 | import multiagent.scenarios as scenarios
 9 | 
10 | if __name__ == '__main__':
11 |     # parse arguments
12 |     parser = argparse.ArgumentParser(description=None)
13 |     parser.add_argument('-s', '--scenario', default='simple.py', help='Path of the scenario Python script.')
14 |     args = parser.parse_args()
15 | 
16 |     # load scenario from script
17 |     scenario = scenarios.load(args.scenario).Scenario()
18 |     # create world
19 |     world = scenario.make_world()
20 |     # create multiagent environment
21 |     env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer = False)
22 |     # render call to create viewer window (necessary only for interactive policies)
23 |     env.render()
24 |     # create interactive policies for each agent
25 |     policies = [InteractivePolicy(env,i) for i in range(env.n)]
26 |     # execution loop
27 |     obs_n = env.reset()
28 |     while True:
29 |         # query for action from each agent's policy
30 |         act_n = []
31 |         for i, policy in enumerate(policies):
32 |             act_n.append(policy.action(obs_n[i]))
33 |         # step environment
34 |         obs_n, reward_n, done_n, _ = env.step(act_n)
35 |         # render all agent views
36 |         env.render()
37 |         # display rewards
38 |         #for agent in env.world.agents:
39 |         #    print(agent.name + " reward: %0.3f" % env._get_reward(agent))
40 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: multiagent
 3 | Version: 0.0.1
 4 | Summary: Multi-Agent Goal-Driven Communication Environment
 5 | Home-page: https://github.com/openai/multiagent-public
 6 | Author: Igor Mordatch
 7 | Author-email: mordatch@openai.com
 8 | License: UNKNOWN
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | README.md
 2 | setup.py
 3 | bin/__init__.py
 4 | bin/interactive.py
 5 | multiagent/__init__.py
 6 | multiagent/core.py
 7 | multiagent/environment.py
 8 | multiagent/multi_discrete.py
 9 | multiagent/policy.py
10 | multiagent/rendering.py
11 | multiagent/scenario.py
12 | multiagent.egg-info/PKG-INFO
13 | multiagent.egg-info/SOURCES.txt
14 | multiagent.egg-info/dependency_links.txt
15 | multiagent.egg-info/not-zip-safe
16 | multiagent.egg-info/requires.txt
17 | multiagent.egg-info/top_level.txt
18 | multiagent/scenarios/__init__.py
19 | multiagent/scenarios/angle_3v3.py
20 | multiagent/scenarios/competition_3v3.py
21 | multiagent/scenarios/simple.py
22 | multiagent/scenarios/simple_adversary.py
23 | multiagent/scenarios/simple_crypto.py
24 | multiagent/scenarios/simple_push.py
25 | multiagent/scenarios/simple_reference.py
26 | multiagent/scenarios/simple_speaker_listener.py
27 | multiagent/scenarios/simple_spread.py
28 | multiagent/scenarios/simple_tag_v1.py
29 | multiagent/scenarios/simple_world_comm.py


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/not-zip-safe:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | gym
2 | numpy-stl
3 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | bin
2 | multiagent
3 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/.DS_Store


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | # Multiagent envs
 4 | # ----------------------------------------
 5 | 
 6 | register(
 7 |     id='MultiagentSimple-v0',
 8 |     entry_point='multiagent.envs:SimpleEnv',
 9 |     # FIXME(cathywu) currently has to be exactly max_path_length parameters in
10 |     # rllab run script
11 |     max_episode_steps=100,
12 | )
13 | 
14 | register(
15 |     id='MultiagentSimpleSpeakerListener-v0',
16 |     entry_point='multiagent.envs:SimpleSpeakerListenerEnv',
17 |     max_episode_steps=100,
18 | )
19 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/core.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/core.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/environment.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/environment.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/multi_discrete.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/multi_discrete.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/rendering.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/rendering.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/__pycache__/scenario.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/__pycache__/scenario.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/core.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | # physical/external base state of all entites
  4 | class EntityState(object):
  5 |     def __init__(self):
  6 |         # physical position
  7 |         self.p_pos = None
  8 |         # physical velocity
  9 |         self.p_vel = None
 10 | 
 11 | # state of agents (including communication and internal/mental state)
 12 | class AgentState(EntityState):
 13 |     def __init__(self):
 14 |         super(AgentState, self).__init__()
 15 |         # communication utterance
 16 |         self.c = None
 17 | 
 18 | # action of the agent
 19 | class Action(object):
 20 |     def __init__(self):
 21 |         # physical action
 22 |         self.u = None
 23 |         # communication action
 24 |         self.c = None
 25 | 
 26 | # properties and state of physical world entity
 27 | class Entity(object):
 28 |     def __init__(self):
 29 |         # name 
 30 |         self.name = ''
 31 |         # properties:
 32 |         self.size = 0.050
 33 |         # entity can move / be pushed
 34 |         self.movable = False
 35 |         # entity collides with others
 36 |         self.collide = True
 37 |         # material density (affects mass)
 38 |         self.density = 25.0
 39 |         # color
 40 |         self.color = None
 41 |         # max speed and accel
 42 |         self.max_speed = None
 43 |         self.accel = None
 44 |         # state
 45 |         self.state = EntityState()
 46 |         # mass
 47 |         self.initial_mass = 1.0
 48 | 
 49 |     @property
 50 |     def mass(self):
 51 |         return self.initial_mass
 52 | 
 53 | # properties of landmark entities
 54 | class Landmark(Entity):
 55 |      def __init__(self):
 56 |         super(Landmark, self).__init__()
 57 | 
 58 | # properties of agent entities
 59 | class Agent(Entity):
 60 |     def __init__(self):
 61 |         super(Agent, self).__init__()
 62 |         # agents are movable by default
 63 |         self.movable = True
 64 |         # cannot send communication signals
 65 |         self.silent = False
 66 |         # cannot observe the world
 67 |         self.blind = False
 68 |         # physical motor noise amount
 69 |         self.u_noise = None
 70 |         # communication noise amount
 71 |         self.c_noise = None
 72 |         # control range
 73 |         self.u_range = 1.0
 74 |         # state
 75 |         self.state = AgentState()
 76 |         # action
 77 |         self.action = Action()
 78 |         # script behavior to execute
 79 |         self.action_callback = None
 80 | 
 81 |         self.death = False
 82 | 
 83 | # multi-agent world
 84 | class World(object):
 85 |     def __init__(self):
 86 |         # list of agents and entities (can change at execution-time!)
 87 |         self.agents = []
 88 |         self.landmarks = []
 89 |         # communication channel dimensionality
 90 |         self.dim_c = 0
 91 |         # position dimensionality
 92 |         self.dim_p = 2
 93 |         # color dimensionality
 94 |         self.dim_color = 3
 95 |         # simulation timestep
 96 |         self.dt = 0.1
 97 |         # physical damping
 98 |         self.damping = 0.25
 99 |         # contact response parameters
100 |         self.contact_force = 1e+2
101 |         self.contact_margin = 1e-3
102 | 
103 |     # return all entities in the world
104 |     @property
105 |     def entities(self):
106 |         return self.agents + self.landmarks
107 | 
108 |     # return all agents controllable by external policies
109 |     @property
110 |     def policy_agents(self):
111 |         return [agent for agent in self.agents if agent.action_callback is None]
112 | 
113 |     # return all agents controlled by world scripts
114 |     @property
115 |     def scripted_agents(self):
116 |         return [agent for agent in self.agents if agent.action_callback is not None]
117 | 
118 |     # update state of the world
119 |     def step(self):
120 |         # set actions for scripted agents 
121 |         for agent in self.scripted_agents:
122 |             agent.action = agent.action_callback(agent, self)
123 |         # gather forces applied to entities
124 |         p_force = [None] * len(self.entities)
125 |         # apply agent physical controls
126 |         p_force = self.apply_action_force(p_force)
127 |         # apply environment forces
128 |         p_force = self.apply_environment_force(p_force)
129 |         # integrate physical state
130 |         self.integrate_state(p_force)
131 |         # update agent state
132 |         for agent in self.agents:
133 |             self.update_agent_state(agent)
134 | 
135 |     # gather agent action forces
136 |     def apply_action_force(self, p_force):
137 |         # set applied forces
138 |         for i,agent in enumerate(self.agents):
139 |             if agent.movable:
140 |                 noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
141 |                 p_force[i] = agent.action.u + noise                
142 |         return p_force
143 | 
144 |     # gather physical forces acting on entities
145 |     def apply_environment_force(self, p_force):
146 |         # simple (but inefficient) collision response
147 |         for a,entity_a in enumerate(self.entities):
148 |             for b,entity_b in enumerate(self.entities):
149 |                 if(b <= a): continue
150 |                 [f_a, f_b] = self.get_collision_force(entity_a, entity_b)
151 |                 if(f_a is not None):
152 |                     if(p_force[a] is None): p_force[a] = 0.0
153 |                     p_force[a] = f_a + p_force[a] 
154 |                 if(f_b is not None):
155 |                     if(p_force[b] is None): p_force[b] = 0.0
156 |                     p_force[b] = f_b + p_force[b]        
157 |         return p_force
158 | 
159 |     # integrate physical state
160 |     def integrate_state(self, p_force):
161 |         for i,entity in enumerate(self.entities):
162 |             if not entity.movable: continue
163 |             entity.state.p_vel = entity.state.p_vel * (1 - self.damping)
164 |             if (p_force[i] is not None):
165 |                 entity.state.p_vel += (p_force[i] / entity.mass) * self.dt
166 |             if entity.max_speed is not None:
167 |                 speed = np.sqrt(np.square(entity.state.p_vel[0]) + np.square(entity.state.p_vel[1]))
168 |                 if speed > entity.max_speed:
169 |                     entity.state.p_vel = entity.state.p_vel / np.sqrt(np.square(entity.state.p_vel[0]) +
170 |                                                                   np.square(entity.state.p_vel[1])) * entity.max_speed
171 |             entity.state.p_pos += entity.state.p_vel * self.dt
172 | 
173 |     def update_agent_state(self, agent):
174 |         # set communication state (directly for now)
175 |         if agent.silent:
176 |             agent.state.c = np.zeros(self.dim_c)
177 |         else:
178 |             noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0
179 |             agent.state.c = agent.action.c + noise      
180 | 
181 |     # get collision forces for any contact between two entities
182 |     def get_collision_force(self, entity_a, entity_b):
183 |         if (not entity_a.collide) or (not entity_b.collide):
184 |             return [None, None] # not a collider
185 |         if (entity_a is entity_b):
186 |             return [None, None] # don't collide against itself
187 |         # compute actual distance between entities
188 |         delta_pos = entity_a.state.p_pos - entity_b.state.p_pos
189 |         dist = np.sqrt(np.sum(np.square(delta_pos)))
190 |         # minimum allowable distance
191 |         dist_min = entity_a.size + entity_b.size
192 |         # softmax penetration
193 |         k = self.contact_margin
194 |         penetration = np.logaddexp(0, -(dist - dist_min)/k)*k
195 |         force = self.contact_force * delta_pos / dist * penetration
196 |         force_a = +force if entity_a.movable else None
197 |         force_b = -force if entity_b.movable else None
198 |         return [force_a, force_b]


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/multi_discrete.py:
--------------------------------------------------------------------------------
 1 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
 2 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
 3 | 
 4 | import numpy as np
 5 | 
 6 | import gym
 7 | from gym.spaces import prng
 8 | 
 9 | class MultiDiscrete(gym.Space):
10 |     """
11 |     - The multi-discrete action space consists of a series of discrete action spaces with different parameters
12 |     - It can be adapted to both a Discrete action space or a continuous (Box) action space
13 |     - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
14 |     - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
15 |        where the discrete action space can take any integers from `min` to `max` (both inclusive)
16 |     Note: A value of 0 always need to represent the NOOP action.
17 |     e.g. Nintendo Game Controller
18 |     - Can be conceptualized as 3 discrete action spaces:
19 |         1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
20 |         2) Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
21 |         3) Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
22 |     - Can be initialized as
23 |         MultiDiscrete([ [0,4], [0,1], [0,1] ])
24 |     """
25 |     def __init__(self, array_of_param_array):
26 |         self.low = np.array([x[0] for x in array_of_param_array])
27 |         self.high = np.array([x[1] for x in array_of_param_array])
28 |         self.num_discrete_space = self.low.shape[0]
29 | 
30 |     def sample(self):
31 |         """ Returns a array with one sample from each discrete action space """
32 |         # For each row: round(random .* (max - min) + min, 0)
33 |         random_array = prng.np_random.rand(self.num_discrete_space)
34 |         return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
35 |     def contains(self, x):
36 |         return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
37 | 
38 |     @property
39 |     def shape(self):
40 |         return self.num_discrete_space
41 |     def __repr__(self):
42 |         return "MultiDiscrete" + str(self.num_discrete_space)
43 |     def __eq__(self, other):
44 |         return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/policy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from pyglet.window import key
 3 | 
 4 | # individual agent policy
 5 | class Policy(object):
 6 |     def __init__(self):
 7 |         pass
 8 |     def action(self, obs):
 9 |         raise NotImplementedError()
10 | 
11 | # interactive policy based on keyboard input
12 | # hard-coded to deal only with movement, not communication
13 | class InteractivePolicy(Policy):
14 |     def __init__(self, env, agent_index):
15 |         super(InteractivePolicy, self).__init__()
16 |         self.env = env
17 |         # hard-coded keyboard events
18 |         self.move = [False for i in range(4)]
19 |         self.comm = [False for i in range(env.world.dim_c)]
20 |         # register keyboard events with this environment's window
21 |         env.viewers[agent_index].window.on_key_press = self.key_press
22 |         env.viewers[agent_index].window.on_key_release = self.key_release
23 | 
24 |     def action(self, obs):
25 |         # ignore observation and just act based on keyboard events
26 |         if self.env.discrete_action_input:
27 |             u = 0
28 |             if self.move[0]: u = 1
29 |             if self.move[1]: u = 2
30 |             if self.move[2]: u = 4
31 |             if self.move[3]: u = 3
32 |         else:
33 |             u = np.zeros(5) # 5-d because of no-move action
34 |             if self.move[0]: u[1] += 1.0
35 |             if self.move[1]: u[2] += 1.0
36 |             if self.move[3]: u[3] += 1.0
37 |             if self.move[2]: u[4] += 1.0
38 |             if True not in self.move:
39 |                 u[0] += 1.0
40 |         return np.concatenate([u, np.zeros(self.env.world.dim_c)])
41 | 
42 |     # keyboard event callbacks
43 |     def key_press(self, k, mod):
44 |         if k==key.LEFT:  self.move[0] = True
45 |         if k==key.RIGHT: self.move[1] = True
46 |         if k==key.UP:    self.move[2] = True
47 |         if k==key.DOWN:  self.move[3] = True
48 |     def key_release(self, k, mod):
49 |         if k==key.LEFT:  self.move[0] = False
50 |         if k==key.RIGHT: self.move[1] = False
51 |         if k==key.UP:    self.move[2] = False
52 |         if k==key.DOWN:  self.move[3] = False
53 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/rendering.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 2D rendering framework
  3 | """
  4 | from __future__ import division
  5 | import os
  6 | import six
  7 | import sys
  8 | 
  9 | if "Apple" in sys.version:
 10 |     if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
 11 |         os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
 12 |         # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
 13 | 
 14 | from gym.utils import reraise
 15 | from gym import error
 16 | 
 17 | try:
 18 |     import pyglet
 19 | except ImportError as e:
 20 |     reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
 21 | 
 22 | try:
 23 |     from pyglet.gl import *
 24 | except ImportError as e:
 25 |     reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
 26 | 
 27 | import math
 28 | import numpy as np
 29 | 
 30 | RAD2DEG = 57.29577951308232
 31 | 
 32 | def get_display(spec):
 33 |     """Convert a display specification (such as :0) into an actual Display
 34 |     object.
 35 | 
 36 |     Pyglet only supports multiple Displays on Linux.
 37 |     """
 38 |     if spec is None:
 39 |         return None
 40 |     elif isinstance(spec, six.string_types):
 41 |         return pyglet.canvas.Display(spec)
 42 |     else:
 43 |         raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
 44 | 
 45 | class Viewer(object):
 46 |     def __init__(self, width, height, display=None):
 47 |         display = get_display(display)
 48 | 
 49 |         self.width = width
 50 |         self.height = height
 51 | 
 52 |         self.window = pyglet.window.Window(width=width, height=height, display=display)
 53 |         self.window.on_close = self.window_closed_by_user
 54 |         self.geoms = []
 55 |         self.onetime_geoms = []
 56 |         self.transform = Transform()
 57 | 
 58 |         glEnable(GL_BLEND)
 59 |         # glEnable(GL_MULTISAMPLE)
 60 |         glEnable(GL_LINE_SMOOTH)
 61 |         # glHint(GL_LINE_SMOOTH_HINT, GL_DONT_CARE)
 62 |         glHint(GL_LINE_SMOOTH_HINT, GL_NICEST)
 63 |         glLineWidth(2.0)
 64 |         glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
 65 | 
 66 |     def close(self):
 67 |         self.window.close()
 68 | 
 69 |     def window_closed_by_user(self):
 70 |         self.close()
 71 | 
 72 |     def set_bounds(self, left, right, bottom, top):
 73 |         assert right > left and top > bottom
 74 |         scalex = self.width/(right-left)
 75 |         scaley = self.height/(top-bottom)
 76 |         self.transform = Transform(
 77 |             translation=(-left*scalex, -bottom*scaley),
 78 |             scale=(scalex, scaley))
 79 | 
 80 |     def add_geom(self, geom):
 81 |         self.geoms.append(geom)
 82 | 
 83 |     def add_onetime(self, geom):
 84 |         self.onetime_geoms.append(geom)
 85 | 
 86 |     def render(self, return_rgb_array=False):
 87 |         glClearColor(1,1,1,1)
 88 |         self.window.clear()
 89 |         self.window.switch_to()
 90 |         self.window.dispatch_events()
 91 |         self.transform.enable()
 92 |         for geom in self.geoms:
 93 |             geom.render()
 94 |         for geom in self.onetime_geoms:
 95 |             geom.render()
 96 |         self.transform.disable()
 97 |         arr = None
 98 |         if return_rgb_array:
 99 |             buffer = pyglet.image.get_buffer_manager().get_color_buffer()
100 |             image_data = buffer.get_image_data()
101 |             arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
102 |             # In https://github.com/openai/gym-http-api/issues/2, we
103 |             # discovered that someone using Xmonad on Arch was having
104 |             # a window of size 598 x 398, though a 600 x 400 window
105 |             # was requested. (Guess Xmonad was preserving a pixel for
106 |             # the boundary.) So we use the buffer height/width rather
107 |             # than the requested one.
108 |             arr = arr.reshape(buffer.height, buffer.width, 4)
109 |             arr = arr[::-1,:,0:3]
110 |         self.window.flip()
111 |         self.onetime_geoms = []
112 |         return arr
113 | 
114 |     # Convenience
115 |     def draw_circle(self, radius=10, res=30, filled=True, **attrs):
116 |         geom = make_circle(radius=radius, res=res, filled=filled)
117 |         _add_attrs(geom, attrs)
118 |         self.add_onetime(geom)
119 |         return geom
120 | 
121 |     def draw_polygon(self, v, filled=True, **attrs):
122 |         geom = make_polygon(v=v, filled=filled)
123 |         _add_attrs(geom, attrs)
124 |         self.add_onetime(geom)
125 |         return geom
126 | 
127 |     def draw_polyline(self, v, **attrs):
128 |         geom = make_polyline(v=v)
129 |         _add_attrs(geom, attrs)
130 |         self.add_onetime(geom)
131 |         return geom
132 | 
133 |     def draw_line(self, start, end, **attrs):
134 |         geom = Line(start, end)
135 |         _add_attrs(geom, attrs)
136 |         self.add_onetime(geom)
137 |         return geom
138 | 
139 |     def get_array(self):
140 |         self.window.flip()
141 |         image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
142 |         self.window.flip()
143 |         arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
144 |         arr = arr.reshape(self.height, self.width, 4)
145 |         return arr[::-1,:,0:3]
146 | 
147 | def _add_attrs(geom, attrs):
148 |     if "color" in attrs:
149 |         geom.set_color(*attrs["color"])
150 |     if "linewidth" in attrs:
151 |         geom.set_linewidth(attrs["linewidth"])
152 | 
153 | class Geom(object):
154 |     def __init__(self):
155 |         self._color=Color((0, 0, 0, 1.0))
156 |         self.attrs = [self._color]
157 |     def render(self):
158 |         for attr in reversed(self.attrs):
159 |             attr.enable()
160 |         self.render1()
161 |         for attr in self.attrs:
162 |             attr.disable()
163 |     def render1(self):
164 |         raise NotImplementedError
165 |     def add_attr(self, attr):
166 |         self.attrs.append(attr)
167 |     def set_color(self, r, g, b, alpha=1):
168 |         self._color.vec4 = (r, g, b, alpha)
169 | 
170 | class Attr(object):
171 |     def enable(self):
172 |         raise NotImplementedError
173 |     def disable(self):
174 |         pass
175 | 
176 | class Transform(Attr):
177 |     def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
178 |         self.set_translation(*translation)
179 |         self.set_rotation(rotation)
180 |         self.set_scale(*scale)
181 |     def enable(self):
182 |         glPushMatrix()
183 |         glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
184 |         glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
185 |         glScalef(self.scale[0], self.scale[1], 1)
186 |     def disable(self):
187 |         glPopMatrix()
188 |     def set_translation(self, newx, newy):
189 |         self.translation = (float(newx), float(newy))
190 |     def set_rotation(self, new):
191 |         self.rotation = float(new)
192 |     def set_scale(self, newx, newy):
193 |         self.scale = (float(newx), float(newy))
194 | 
195 | class Color(Attr):
196 |     def __init__(self, vec4):
197 |         self.vec4 = vec4
198 |     def enable(self):
199 |         glColor4f(*self.vec4)
200 | 
201 | class LineStyle(Attr):
202 |     def __init__(self, style):
203 |         self.style = style
204 |     def enable(self):
205 |         glEnable(GL_LINE_STIPPLE)
206 |         glLineStipple(1, self.style)
207 |     def disable(self):
208 |         glDisable(GL_LINE_STIPPLE)
209 | 
210 | class LineWidth(Attr):
211 |     def __init__(self, stroke):
212 |         self.stroke = stroke
213 |     def enable(self):
214 |         glLineWidth(self.stroke)
215 | 
216 | class Point(Geom):
217 |     def __init__(self):
218 |         Geom.__init__(self)
219 |     def render1(self):
220 |         glBegin(GL_POINTS) # draw point
221 |         glVertex3f(0.0, 0.0, 0.0)
222 |         glEnd()
223 | 
224 | class FilledPolygon(Geom):
225 |     def __init__(self, v):
226 |         Geom.__init__(self)
227 |         self.v = v
228 |     def render1(self):
229 |         if   len(self.v) == 4 : glBegin(GL_QUADS)
230 |         elif len(self.v)  > 4 : glBegin(GL_POLYGON)
231 |         else: glBegin(GL_TRIANGLES)
232 |         for p in self.v:
233 |             glVertex3f(p[0], p[1],0)  # draw each vertex
234 |         glEnd()
235 | 
236 |         color = (self._color.vec4[0] * 0.5, self._color.vec4[1] * 0.5, self._color.vec4[2] * 0.5, self._color.vec4[3] * 0.5)
237 |         glColor4f(*color)
238 |         glBegin(GL_LINE_LOOP)
239 |         for p in self.v:
240 |             glVertex3f(p[0], p[1],0)  # draw each vertex
241 |         glEnd()
242 | 
243 | def make_circle(radius=10, res=30, filled=True):
244 |     points = []
245 |     for i in range(res):
246 |         ang = 2*math.pi*i / res
247 |         points.append((math.cos(ang)*radius, math.sin(ang)*radius))
248 |     if filled:
249 |         return FilledPolygon(points)
250 |     else:
251 |         return PolyLine(points, True)
252 | 
253 | def make_polygon(v, filled=True):
254 |     if filled: return FilledPolygon(v)
255 |     else: return PolyLine(v, True)
256 | 
257 | def make_polyline(v):
258 |     return PolyLine(v, False)
259 | 
260 | def make_line(start,end):
261 |     return Line(start,end)
262 | 
263 | def make_capsule(length, width):
264 |     l, r, t, b = 0, length, width/2, -width/2
265 |     box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
266 |     circ0 = make_circle(width/2)
267 |     circ1 = make_circle(width/2)
268 |     circ1.add_attr(Transform(translation=(length, 0)))
269 |     geom = Compound([box, circ0, circ1])
270 |     return geom
271 | 
272 | class Compound(Geom):
273 |     def __init__(self, gs):
274 |         Geom.__init__(self)
275 |         self.gs = gs
276 |         for g in self.gs:
277 |             g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
278 |     def render1(self):
279 |         for g in self.gs:
280 |             g.render()
281 | 
282 | class PolyLine(Geom):
283 |     def __init__(self, v, close):
284 |         Geom.__init__(self)
285 |         self.v = v
286 |         self.close = close
287 |         self.linewidth = LineWidth(1)
288 |         self.add_attr(self.linewidth)
289 |     def render1(self):
290 |         glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
291 |         for p in self.v:
292 |             glVertex3f(p[0], p[1],0)  # draw each vertex
293 |         glEnd()
294 |     def set_linewidth(self, x):
295 |         self.linewidth.stroke = x
296 | 
297 | class Line(Geom):
298 |     def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
299 |         Geom.__init__(self)
300 |         self.start = start
301 |         self.end = end
302 |         self.linewidth = LineWidth(1)
303 |         self.add_attr(self.linewidth)
304 | 
305 |     def render1(self):
306 |         glBegin(GL_LINES)
307 |         glVertex2f(*self.start)
308 |         glVertex2f(*self.end)
309 |         glEnd()
310 | 
311 | class Image(Geom):
312 |     def __init__(self, fname, width, height):
313 |         Geom.__init__(self)
314 |         self.width = width
315 |         self.height = height
316 |         img = pyglet.image.load(fname)
317 |         self.img = img
318 |         self.flip = False
319 |     def render1(self):
320 |         self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
321 | 
322 | # ================================================================
323 | 
324 | class SimpleImageViewer(object):
325 |     def __init__(self, display=None):
326 |         self.window = None
327 |         self.isopen = False
328 |         self.display = display
329 |     def imshow(self, arr):
330 |         if self.window is None:
331 |             height, width, channels = arr.shape
332 |             self.window = pyglet.window.Window(width=width, height=height, display=self.display)
333 |             self.width = width
334 |             self.height = height
335 |             self.isopen = True
336 |         assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape"
337 |         image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3)
338 |         self.window.clear()
339 |         self.window.switch_to()
340 |         self.window.dispatch_events()
341 |         image.blit(0,0)
342 |         self.window.flip()
343 |     def close(self):
344 |         if self.isopen:
345 |             self.window.close()
346 |             self.isopen = False
347 |     def __del__(self):
348 |         self.close()


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenario.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # defines scenario upon which the world is built
 4 | class BaseScenario(object):
 5 |     # create elements of the world
 6 |     def make_world(self):
 7 |         raise NotImplementedError()
 8 |     # create initial conditions of the world
 9 |     def reset_world(self, world):
10 |         raise NotImplementedError()
11 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__init__.py:
--------------------------------------------------------------------------------
1 | import imp
2 | import os.path as osp
3 | 
4 | 
5 | def load(name):
6 |     pathname = osp.join(osp.dirname(__file__), name)
7 |     return imp.load_source('', pathname)
8 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/competition_3v3.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/competition_3v3.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple_tag_v1.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple_tag_v1.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple_tag_yuan_v2.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADDPG/multiagent-particle-envs/multiagent/scenarios/__pycache__/simple_tag_yuan_v2.cpython-35.pyc


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/angle_3v3.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from multiagent.core import World, Agent, Landmark
  3 | from multiagent.scenario import BaseScenario
  4 | import math
  5 | 
  6 | attack_angle = 90
  7 | defense_angle = 90
  8 | fire_range = 0.1
  9 | 
 10 | 
 11 | class Scenario(BaseScenario):
 12 |     def make_world(self):
 13 |         world = World()
 14 |         # set any world properties first
 15 |         world.dim_c = 2
 16 |         num_good_agents = 2
 17 |         num_adversaries = 2
 18 |         num_agents = num_adversaries + num_good_agents
 19 |         num_landmarks = 0
 20 |         # add agents
 21 |         world.agents = [Agent() for i in range(num_agents)]
 22 |         for i, agent in enumerate(world.agents):
 23 |             agent.name = 'agent %d' % i
 24 |             agent.collide = True
 25 |             agent.silent = True
 26 |             agent.adversary = True if i < num_adversaries else False
 27 |             agent.size = 0.03 if agent.adversary else 0.03
 28 |             agent.accel = 3.0 if agent.adversary else 4.0
 29 |             #agent.accel = 20.0 if agent.adversary else 25.0
 30 |             agent.max_speed = 1.3 if agent.adversary else 1.0
 31 |             #agent.max_speed = 1.0 if agent.adversary else 0.0  ###changed by liyuan
 32 |             agent.death = False
 33 |         # add landmarks
 34 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
 35 |         for i, landmark in enumerate(world.landmarks):
 36 |             landmark.name = 'landmark %d' % i
 37 |             landmark.collide = True
 38 |             landmark.movable = False
 39 |             landmark.size = 0.2
 40 |             landmark.boundary = False
 41 |         # make initial conditions
 42 |         self.reset_world(world)
 43 |         return world
 44 | 
 45 | 
 46 |     def reset_world(self, world):
 47 |         # random properties for agents
 48 |         for i, agent in enumerate(world.agents):
 49 |             agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35])
 50 |             # random properties for landmarks
 51 |         for i, landmark in enumerate(world.landmarks):
 52 |             landmark.color = np.array([0.25, 0.25, 0.25])
 53 |         # set random initial states
 54 |         for agent in world.agents:
 55 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 56 |             agent.state.p_vel = np.zeros(world.dim_p)
 57 |             agent.state.c = np.zeros(world.dim_c)
 58 |             agent.death = False
 59 |         for i, landmark in enumerate(world.landmarks):
 60 |             if not landmark.boundary:
 61 |                 landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
 62 |                 landmark.state.p_vel = np.zeros(world.dim_p)
 63 | 
 64 | 
 65 |     def benchmark_data(self, agent, world):
 66 |         # returns data for benchmarking purposes
 67 |         if agent.adversary:
 68 |             collisions = 0
 69 |             for a in self.good_agents(world):
 70 |                 if self.is_collision(a, agent) and a.death == False:
 71 |                     collisions += 1
 72 |             return collisions
 73 |         else:
 74 |             return 0
 75 | 
 76 |     '''
 77 |     def is_collision(self, agent1, agent2):
 78 |         if agent1.death or agent2.death:
 79 |             return False
 80 |         delta_pos = agent1.state.p_pos - agent2.state.p_pos
 81 |         dist = np.sqrt(np.sum(np.square(delta_pos)))
 82 |         dist_min = agent1.size + agent2.size
 83 |         return True if dist < dist_min else False
 84 |     '''
 85 |     ##liyuan: compute the number of lokcing number of the agent
 86 |     def compute_lock_num(self, agent, world):
 87 |         opponent = []
 88 |         if agent.adversary:
 89 |             opponent = self.good_agents(world)
 90 |         else:
 91 |             opponent = self.adversaries(world)
 92 |    
 93 |         for i, opp in enumerate(opponent):
 94 |             capture,flag = self.is_collision(opp,agent)
 95 |             if flag == 1:
 96 |                 agent.lock_num[i] += 1
 97 |             else:
 98 |                 agent.lock_num[i] = 0
 99 | 
100 |     def is_collision(self, agent1, agent2):
101 |         if agent1.death or agent2.death:
102 |             return False,0
103 | 
104 |         ###liyuan:judged by angle
105 |         delta_pos = agent2.state.p_pos - agent1.state.p_pos
106 |         distance = np.sqrt(np.sum(np.square(delta_pos)))
107 |         if distance <= 0.0001:
108 |             return False,0
109 |         
110 |         agent1_cross = (delta_pos[0]*agent1.state.p_vel[0]+delta_pos[1]*agent1.state.p_vel[1])/(distance)
111 |         if agent1_cross < -1:
112 |            agent1_cross  = -1
113 |         if agent1_cross > 1:
114 |            agent1_cross = 1
115 |         agent1_angle = math.acos(agent1_cross)
116 | 
117 |         agent2_cross = (-delta_pos[0]*agent2.state.p_vel[0]-delta_pos[1]*agent2.state.p_vel[1])/(distance)
118 |         if agent2_cross < -1:
119 |            agent2_cross  = -1
120 |         if agent2_cross > 1:
121 |            agent2_cross = 1
122 |         agent2_angle = math.acos(agent2_cross)
123 | 
124 |         if distance < fire_range and agent2_angle*180/math.pi>defense_angle and agent1_angle*180/math.pi<attack_angle/2:
125 |             return True,1
126 |         elif distance < fire_range and agent2_angle*180/math.pi<attack_angle/2 and agent1_angle*180/math.pi>defense_angle:
127 |             return True,2
128 |         else:
129 |             return False,0
130 | 
131 |     # return all agents that are not adversaries
132 |     def good_agents(self, world):
133 |         return [agent for agent in world.agents if not agent.adversary]
134 | 
135 |     # return all adversarial agents
136 |     def adversaries(self, world):
137 |         return [agent for agent in world.agents if agent.adversary]
138 | 
139 | 
140 |     def reward(self, agent, world):
141 |         # Agents are rewarded based on minimum agent distance to each landmark
142 |         main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
143 |         return main_reward
144 | 
145 |     def agent_reward(self, agent, world):
146 |         ####added by liyuan
147 |         if agent.death == True:
148 |             return 0
149 |         # Agents are negatively rewarded if caught by adversaries
150 |         rew = 0
151 |         #shape = False
152 |         shape = True
153 |         adversaries = self.adversaries(world)
154 |         if shape:  # reward can optionally be shaped (increased reward for increased distance from adversary)
155 |             for adv in adversaries:
156 |                 ###changed by liyuan
157 |                 if adv.death == True:
158 |                     continue
159 |                 rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
160 |         if agent.collide:
161 |             for a in adversaries:
162 |                 ###changed by liyuan
163 |                 if self.is_collision(a, agent) and a.death == False:
164 |                     rew -= 10
165 |                     agent.death = True
166 | 
167 |         # agents are penalized for exiting the screen, so that they can be caught by the adversaries
168 |         def bound(x):
169 |             if x < 0.9:
170 |                 return 0
171 |             if x < 1.0:
172 |                 return (x - 0.9) * 10
173 |             return min(np.exp(2 * x - 2), 10)
174 |         for p in range(world.dim_p):
175 |             x = abs(agent.state.p_pos[p])
176 |             rew -= bound(x)
177 |         
178 |         for p in range(world.dim_p):
179 |             x = abs(agent.state.p_pos[p])
180 |             if (x > 1.0):
181 |                 rew -= 5
182 |                 break
183 | 
184 |         return rew
185 | 
186 |     def adversary_reward(self, agent, world):
187 |         ####added by liyuan
188 |         if agent.death == True:
189 |             return 0
190 |         # Adversaries are rewarded for collisions with agents
191 |         rew = 0
192 |         #shape = False
193 |         shape = True
194 |         agents = self.good_agents(world)
195 |         adversaries = self.adversaries(world)
196 |         
197 |         '''
198 |         if shape:  # reward can optionally be shaped (decreased reward for increased distance from agents)
199 |             for adv in adversaries:
200 |                 ###rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents])
201 |                 if adv.death == False:
202 |                     dis = []
203 |                     for a in agents:
204 |                         if a.death == False:
205 |                             dis.append(np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))))
206 |                     if len(dis) > 0:
207 |                         rew -= 0.1 * min(dis)
208 |         '''
209 |         if shape: 
210 |             dis = []
211 |             for a in agents:
212 |                 if a.death == False:
213 |                     dis.append(np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))))
214 |             if len(dis) > 0:
215 |                 rew -= 0.1 * min(dis)
216 |         
217 | 
218 |         if agent.collide:
219 |             for ag in agents:
220 |                 for adv in adversaries:
221 |                     ###changed by liyuan
222 |                     if self.is_collision(ag, adv) and ag.death == False and adv.death == False:
223 |                         if adv is agent:
224 |                             rew += 50
225 |                         else:
226 |                             rew += 30
227 |                         break
228 | 
229 |         
230 |         for adv in adversaries:
231 |             if adv.death == False:
232 |                 exceed = False
233 |                 for p in range(world.dim_p):
234 |                     x = abs(adv.state.p_pos[p])
235 |                     if (x > 1.0):
236 |                         exceed = True
237 |                         break
238 |                 if exceed == True:
239 |                     if adv is agent:
240 |                         rew -= 20
241 |                     else:
242 |                         rew -=10
243 |                     break
244 | 
245 |         return rew
246 | 
247 |     def observation(self, agent, world):
248 |         # get positions of all entities in this agent's reference frame
249 |         entity_pos = []
250 |         for entity in world.landmarks:
251 |             if not entity.boundary:
252 |                 entity_pos.append(entity.state.p_pos - agent.state.p_pos)
253 |         # communication of all other agents
254 |         comm = []
255 |         other_pos = []
256 |         other_vel = []
257 |         for other in world.agents:
258 |             if other is agent: continue            
259 |             ###changed by liyuan
260 |             if other.death:
261 |                 comm.append(np.zeros(world.dim_c))
262 |                 other_pos.append(np.zeros(world.dim_p))
263 |                 other_vel.append(np.zeros(world.dim_p))
264 |             else:
265 |                 comm.append(other.state.c)
266 |                 other_pos.append(other.state.p_pos - agent.state.p_pos)
267 |                 #if not other.adversary:
268 |                 other_vel.append(other.state.p_vel)
269 | 
270 |             #comm.append(other.state.c)
271 |             #other_pos.append(other.state.p_pos - agent.state.p_pos)
272 |             #if not other.adversary:
273 |                 #other_vel.append(other.state.p_vel)
274 |         return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
275 | 
276 |     ##added by liyuan: if all green nodes die, this epsoid is over.
277 |     def done(self, agent, world):
278 |         allDie = True
279 |         agents = self.good_agents(world)
280 |         for agent in agents:
281 |             if agent.death == False:
282 |                 allDie = False
283 |                 break
284 |         return allDie
285 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from multiagent.core import World, Agent, Landmark
 3 | from multiagent.scenario import BaseScenario
 4 | 
 5 | class Scenario(BaseScenario):
 6 |     def make_world(self):
 7 |         world = World()
 8 |         # add agents
 9 |         world.agents = [Agent() for i in range(1)]
10 |         for i, agent in enumerate(world.agents):
11 |             agent.name = 'agent %d' % i
12 |             agent.collide = False
13 |             agent.silent = True
14 |         # add landmarks
15 |         world.landmarks = [Landmark() for i in range(1)]
16 |         for i, landmark in enumerate(world.landmarks):
17 |             landmark.name = 'landmark %d' % i
18 |             landmark.collide = False
19 |             landmark.movable = False
20 |         # make initial conditions
21 |         self.reset_world(world)
22 |         return world
23 | 
24 |     def reset_world(self, world):
25 |         # random properties for agents
26 |         for i, agent in enumerate(world.agents):
27 |             agent.color = np.array([0.25,0.25,0.25])
28 |         # random properties for landmarks
29 |         for i, landmark in enumerate(world.landmarks):
30 |             landmark.color = np.array([0.75,0.75,0.75])
31 |         world.landmarks[0].color = np.array([0.75,0.25,0.25])
32 |         # set random initial states
33 |         for agent in world.agents:
34 |             agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
35 |             agent.state.p_vel = np.zeros(world.dim_p)
36 |             agent.state.c = np.zeros(world.dim_c)
37 |         for i, landmark in enumerate(world.landmarks):
38 |             landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
39 |             landmark.state.p_vel = np.zeros(world.dim_p)
40 | 
41 |     def reward(self, agent, world):
42 |         dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
43 |         return -dist2
44 | 
45 |     def observation(self, agent, world):
46 |         # get positions of all entities in this agent's reference frame
47 |         entity_pos = []
48 |         for entity in world.landmarks:
49 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
50 |         return np.concatenate([agent.state.p_vel] + entity_pos)
51 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_adversary.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from multiagent.core import World, Agent, Landmark
  3 | from multiagent.scenario import BaseScenario
  4 | 
  5 | 
  6 | class Scenario(BaseScenario):
  7 | 
  8 |     def make_world(self):
  9 |         world = World()
 10 |         # set any world properties first
 11 |         world.dim_c = 2
 12 |         num_agents = 3
 13 |         world.num_agents = num_agents
 14 |         num_adversaries = 1
 15 |         num_landmarks = num_agents - 1
 16 |         # add agents
 17 |         world.agents = [Agent() for i in range(num_agents)]
 18 |         for i, agent in enumerate(world.agents):
 19 |             agent.name = 'agent %d' % i
 20 |             agent.collide = False
 21 |             agent.silent = True
 22 |             agent.adversary = True if i < num_adversaries else False
 23 |             agent.size = 0.15
 24 |         # add landmarks
 25 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
 26 |         for i, landmark in enumerate(world.landmarks):
 27 |             landmark.name = 'landmark %d' % i
 28 |             landmark.collide = False
 29 |             landmark.movable = False
 30 |             landmark.size = 0.08
 31 |         # make initial conditions
 32 |         self.reset_world(world)
 33 |         return world
 34 | 
 35 |     def reset_world(self, world):
 36 |         # random properties for agents
 37 |         world.agents[0].color = np.array([0.85, 0.35, 0.35])
 38 |         for i in range(1, world.num_agents):
 39 |             world.agents[i].color = np.array([0.35, 0.35, 0.85])
 40 |         # random properties for landmarks
 41 |         for i, landmark in enumerate(world.landmarks):
 42 |             landmark.color = np.array([0.15, 0.15, 0.15])
 43 |         # set goal landmark
 44 |         goal = np.random.choice(world.landmarks)
 45 |         goal.color = np.array([0.15, 0.65, 0.15])
 46 |         for agent in world.agents:
 47 |             agent.goal_a = goal
 48 |         # set random initial states
 49 |         for agent in world.agents:
 50 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 51 |             agent.state.p_vel = np.zeros(world.dim_p)
 52 |             agent.state.c = np.zeros(world.dim_c)
 53 |         for i, landmark in enumerate(world.landmarks):
 54 |             landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 55 |             landmark.state.p_vel = np.zeros(world.dim_p)
 56 | 
 57 |     def benchmark_data(self, agent, world):
 58 |         # returns data for benchmarking purposes
 59 |         if agent.adversary:
 60 |             return np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
 61 |         else:
 62 |             dists = []
 63 |             for l in world.landmarks:
 64 |                 dists.append(np.sum(np.square(agent.state.p_pos - l.state.p_pos)))
 65 |             dists.append(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
 66 |             return tuple(dists)
 67 | 
 68 |     # return all agents that are not adversaries
 69 |     def good_agents(self, world):
 70 |         return [agent for agent in world.agents if not agent.adversary]
 71 | 
 72 |     # return all adversarial agents
 73 |     def adversaries(self, world):
 74 |         return [agent for agent in world.agents if agent.adversary]
 75 | 
 76 |     def reward(self, agent, world):
 77 |         # Agents are rewarded based on minimum agent distance to each landmark
 78 |         return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
 79 | 
 80 |     def agent_reward(self, agent, world):
 81 |         # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
 82 |         shaped_reward = True
 83 |         shaped_adv_reward = True
 84 | 
 85 |         # Calculate negative reward for adversary
 86 |         adversary_agents = self.adversaries(world)
 87 |         if shaped_adv_reward:  # distance-based adversary reward
 88 |             adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
 89 |         else:  # proximity-based adversary reward (binary)
 90 |             adv_rew = 0
 91 |             for a in adversary_agents:
 92 |                 if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
 93 |                     adv_rew -= 5
 94 | 
 95 |         # Calculate positive reward for agents
 96 |         good_agents = self.good_agents(world)
 97 |         if shaped_reward:  # distance-based agent reward
 98 |             pos_rew = -min(
 99 |                 [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
100 |         else:  # proximity-based agent reward (binary)
101 |             pos_rew = 0
102 |             if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \
103 |                     < 2 * agent.goal_a.size:
104 |                 pos_rew += 5
105 |             pos_rew -= min(
106 |                 [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
107 |         return pos_rew + adv_rew
108 | 
109 |     def adversary_reward(self, agent, world):
110 |         # Rewarded based on proximity to the goal landmark
111 |         shaped_reward = True
112 |         if shaped_reward:  # distance-based reward
113 |             return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
114 |         else:  # proximity-based reward (binary)
115 |             adv_rew = 0
116 |             if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size:
117 |                 adv_rew += 5
118 |             return adv_rew
119 | 
120 | 
121 |     def observation(self, agent, world):
122 |         # get positions of all entities in this agent's reference frame
123 |         entity_pos = []
124 |         for entity in world.landmarks:
125 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
126 |         # entity colors
127 |         entity_color = []
128 |         for entity in world.landmarks:
129 |             entity_color.append(entity.color)
130 |         # communication of all other agents
131 |         other_pos = []
132 |         for other in world.agents:
133 |             if other is agent: continue
134 |             other_pos.append(other.state.p_pos - agent.state.p_pos)
135 | 
136 |         if not agent.adversary:
137 |             return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos)
138 |         else:
139 |             return np.concatenate(entity_pos + other_pos)
140 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_crypto.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Scenario:
  3 | 1 speaker, 2 listeners (one of which is an adversary). Good agents rewarded for proximity to goal, and distance from
  4 | adversary to goal. Adversary is rewarded for its distance to the goal.
  5 | """
  6 | 
  7 | 
  8 | import numpy as np
  9 | from multiagent.core import World, Agent, Landmark
 10 | from multiagent.scenario import BaseScenario
 11 | import random
 12 | 
 13 | 
 14 | class CryptoAgent(Agent):
 15 |     def __init__(self):
 16 |         super(CryptoAgent, self).__init__()
 17 |         self.key = None
 18 | 
 19 | class Scenario(BaseScenario):
 20 | 
 21 |     def make_world(self):
 22 |         world = World()
 23 |         # set any world properties first
 24 |         num_agents = 3
 25 |         num_adversaries = 1
 26 |         num_landmarks = 2
 27 |         world.dim_c = 4
 28 |         # add agents
 29 |         world.agents = [CryptoAgent() for i in range(num_agents)]
 30 |         for i, agent in enumerate(world.agents):
 31 |             agent.name = 'agent %d' % i
 32 |             agent.collide = False
 33 |             agent.adversary = True if i < num_adversaries else False
 34 |             agent.speaker = True if i == 2 else False
 35 |             agent.movable = False
 36 |         # add landmarks
 37 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
 38 |         for i, landmark in enumerate(world.landmarks):
 39 |             landmark.name = 'landmark %d' % i
 40 |             landmark.collide = False
 41 |             landmark.movable = False
 42 |         # make initial conditions
 43 |         self.reset_world(world)
 44 |         return world
 45 | 
 46 | 
 47 |     def reset_world(self, world):
 48 |         # random properties for agents
 49 |         for i, agent in enumerate(world.agents):
 50 |             agent.color = np.array([0.25, 0.25, 0.25])
 51 |             if agent.adversary:
 52 |                 agent.color = np.array([0.75, 0.25, 0.25])
 53 |             agent.key = None
 54 |         # random properties for landmarks
 55 |         color_list = [np.zeros(world.dim_c) for i in world.landmarks]
 56 |         for i, color in enumerate(color_list):
 57 |             color[i] += 1
 58 |         for color, landmark in zip(color_list, world.landmarks):
 59 |             landmark.color = color
 60 |         # set goal landmark
 61 |         goal = np.random.choice(world.landmarks)
 62 |         world.agents[1].color = goal.color
 63 |         world.agents[2].key = np.random.choice(world.landmarks).color
 64 | 
 65 |         for agent in world.agents:
 66 |             agent.goal_a = goal
 67 | 
 68 |         # set random initial states
 69 |         for agent in world.agents:
 70 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 71 |             agent.state.p_vel = np.zeros(world.dim_p)
 72 |             agent.state.c = np.zeros(world.dim_c)
 73 |         for i, landmark in enumerate(world.landmarks):
 74 |             landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 75 |             landmark.state.p_vel = np.zeros(world.dim_p)
 76 | 
 77 | 
 78 |     def benchmark_data(self, agent, world):
 79 |         # returns data for benchmarking purposes
 80 |         return (agent.state.c, agent.goal_a.color)
 81 | 
 82 |     # return all agents that are not adversaries
 83 |     def good_listeners(self, world):
 84 |         return [agent for agent in world.agents if not agent.adversary and not agent.speaker]
 85 | 
 86 |     # return all agents that are not adversaries
 87 |     def good_agents(self, world):
 88 |         return [agent for agent in world.agents if not agent.adversary]
 89 | 
 90 |     # return all adversarial agents
 91 |     def adversaries(self, world):
 92 |         return [agent for agent in world.agents if agent.adversary]
 93 | 
 94 |     def reward(self, agent, world):
 95 |         return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
 96 | 
 97 |     def agent_reward(self, agent, world):
 98 |         # Agents rewarded if Bob can reconstruct message, but adversary (Eve) cannot
 99 |         good_listeners = self.good_listeners(world)
100 |         adversaries = self.adversaries(world)
101 |         good_rew = 0
102 |         adv_rew = 0
103 |         for a in good_listeners:
104 |             if (a.state.c == np.zeros(world.dim_c)).all():
105 |                 continue
106 |             else:
107 |                 good_rew -= np.sum(np.square(a.state.c - agent.goal_a.color))
108 |         for a in adversaries:
109 |             if (a.state.c == np.zeros(world.dim_c)).all():
110 |                 continue
111 |             else:
112 |                 adv_l1 = np.sum(np.square(a.state.c - agent.goal_a.color))
113 |                 adv_rew += adv_l1
114 |         return adv_rew + good_rew
115 | 
116 |     def adversary_reward(self, agent, world):
117 |         # Adversary (Eve) is rewarded if it can reconstruct original goal
118 |         rew = 0
119 |         if not (agent.state.c == np.zeros(world.dim_c)).all():
120 |             rew -= np.sum(np.square(agent.state.c - agent.goal_a.color))
121 |         return rew
122 | 
123 | 
124 |     def observation(self, agent, world):
125 |         # goal color
126 |         goal_color = np.zeros(world.dim_color)
127 |         if agent.goal_a is not None:
128 |             goal_color = agent.goal_a.color
129 | 
130 |         # get positions of all entities in this agent's reference frame
131 |         entity_pos = []
132 |         for entity in world.landmarks:
133 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
134 |         # communication of all other agents
135 |         comm = []
136 |         for other in world.agents:
137 |             if other is agent or (other.state.c is None) or not other.speaker: continue
138 |             comm.append(other.state.c)
139 | 
140 |         confer = np.array([0])
141 | 
142 |         if world.agents[2].key is None:
143 |             confer = np.array([1])
144 |             key = np.zeros(world.dim_c)
145 |             goal_color = np.zeros(world.dim_c)
146 |         else:
147 |             key = world.agents[2].key
148 | 
149 |         prnt = False
150 |         # speaker
151 |         if agent.speaker:
152 |             if prnt:
153 |                 print('speaker')
154 |                 print(agent.state.c)
155 |                 print(np.concatenate([goal_color] + [key] + [confer] + [np.random.randn(1)]))
156 |             return np.concatenate([goal_color] + [key])
157 |         # listener
158 |         if not agent.speaker and not agent.adversary:
159 |             if prnt:
160 |                 print('listener')
161 |                 print(agent.state.c)
162 |                 print(np.concatenate([key] + comm + [confer]))
163 |             return np.concatenate([key] + comm)
164 |         if not agent.speaker and agent.adversary:
165 |             if prnt:
166 |                 print('adversary')
167 |                 print(agent.state.c)
168 |                 print(np.concatenate(comm + [confer]))
169 |             return np.concatenate(comm)
170 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_push.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from multiagent.core import World, Agent, Landmark
 3 | from multiagent.scenario import BaseScenario
 4 | 
 5 | class Scenario(BaseScenario):
 6 |     def make_world(self):
 7 |         world = World()
 8 |         # set any world properties first
 9 |         world.dim_c = 2
10 |         num_agents = 2
11 |         num_adversaries = 1
12 |         num_landmarks = 2
13 |         # add agents
14 |         world.agents = [Agent() for i in range(num_agents)]
15 |         for i, agent in enumerate(world.agents):
16 |             agent.name = 'agent %d' % i
17 |             agent.collide = True
18 |             agent.silent = True
19 |             if i < num_adversaries:
20 |                 agent.adversary = True
21 |             else:
22 |                 agent.adversary = False
23 |         # add landmarks
24 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
25 |         for i, landmark in enumerate(world.landmarks):
26 |             landmark.name = 'landmark %d' % i
27 |             landmark.collide = False
28 |             landmark.movable = False
29 |         # make initial conditions
30 |         self.reset_world(world)
31 |         return world
32 | 
33 |     def reset_world(self, world):
34 |         # random properties for landmarks
35 |         for i, landmark in enumerate(world.landmarks):
36 |             landmark.color = np.array([0.1, 0.1, 0.1])
37 |             landmark.color[i + 1] += 0.8
38 |             landmark.index = i
39 |         # set goal landmark
40 |         goal = np.random.choice(world.landmarks)
41 |         for i, agent in enumerate(world.agents):
42 |             agent.goal_a = goal
43 |             agent.color = np.array([0.25, 0.25, 0.25])
44 |             if agent.adversary:
45 |                 agent.color = np.array([0.75, 0.25, 0.25])
46 |             else:
47 |                 j = goal.index
48 |                 agent.color[j + 1] += 0.5
49 |         # set random initial states
50 |         for agent in world.agents:
51 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
52 |             agent.state.p_vel = np.zeros(world.dim_p)
53 |             agent.state.c = np.zeros(world.dim_c)
54 |         for i, landmark in enumerate(world.landmarks):
55 |             landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
56 |             landmark.state.p_vel = np.zeros(world.dim_p)
57 | 
58 |     def reward(self, agent, world):
59 |         # Agents are rewarded based on minimum agent distance to each landmark
60 |         return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
61 | 
62 |     def agent_reward(self, agent, world):
63 |         # the distance to the goal
64 |         return -np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
65 | 
66 |     def adversary_reward(self, agent, world):
67 |         # keep the nearest good agents away from the goal
68 |         agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary]
69 |         pos_rew = min(agent_dist)
70 |         #nearest_agent = world.good_agents[np.argmin(agent_dist)]
71 |         #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos)))
72 |         neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos)))
73 |         #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents])
74 |         return pos_rew - neg_rew
75 |                
76 |     def observation(self, agent, world):
77 |         # get positions of all entities in this agent's reference frame
78 |         entity_pos = []
79 |         for entity in world.landmarks:  # world.entities:
80 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
81 |         # entity colors
82 |         entity_color = []
83 |         for entity in world.landmarks:  # world.entities:
84 |             entity_color.append(entity.color)
85 |         # communication of all other agents
86 |         comm = []
87 |         other_pos = []
88 |         for other in world.agents:
89 |             if other is agent: continue
90 |             comm.append(other.state.c)
91 |             other_pos.append(other.state.p_pos - agent.state.p_pos)
92 |         if not agent.adversary:
93 |             return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos)
94 |         else:
95 |             #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos  # randomize position of other agents in adversary network
96 |             return np.concatenate([agent.state.p_vel] + entity_pos + other_pos)
97 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_reference.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from multiagent.core import World, Agent, Landmark
 3 | from multiagent.scenario import BaseScenario
 4 | 
 5 | class Scenario(BaseScenario):
 6 |     def make_world(self):
 7 |         world = World()
 8 |         # set any world properties first
 9 |         world.dim_c = 10
10 |         world.collaborative = True  # whether agents share rewards
11 |         # add agents
12 |         world.agents = [Agent() for i in range(2)]
13 |         for i, agent in enumerate(world.agents):
14 |             agent.name = 'agent %d' % i
15 |             agent.collide = False
16 |         # add landmarks
17 |         world.landmarks = [Landmark() for i in range(3)]
18 |         for i, landmark in enumerate(world.landmarks):
19 |             landmark.name = 'landmark %d' % i
20 |             landmark.collide = False
21 |             landmark.movable = False
22 |         # make initial conditions
23 |         self.reset_world(world)
24 |         return world
25 | 
26 |     def reset_world(self, world):
27 |         # assign goals to agents
28 |         for agent in world.agents:
29 |             agent.goal_a = None
30 |             agent.goal_b = None
31 |         # want other agent to go to the goal landmark
32 |         world.agents[0].goal_a = world.agents[1]
33 |         world.agents[0].goal_b = np.random.choice(world.landmarks)
34 |         world.agents[1].goal_a = world.agents[0]
35 |         world.agents[1].goal_b = np.random.choice(world.landmarks)
36 |         # random properties for agents
37 |         for i, agent in enumerate(world.agents):
38 |             agent.color = np.array([0.25,0.25,0.25])               
39 |         # random properties for landmarks
40 |         world.landmarks[0].color = np.array([0.75,0.25,0.25]) 
41 |         world.landmarks[1].color = np.array([0.25,0.75,0.25]) 
42 |         world.landmarks[2].color = np.array([0.25,0.25,0.75]) 
43 |         # special colors for goals
44 |         world.agents[0].goal_a.color = world.agents[0].goal_b.color                
45 |         world.agents[1].goal_a.color = world.agents[1].goal_b.color                               
46 |         # set random initial states
47 |         for agent in world.agents:
48 |             agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
49 |             agent.state.p_vel = np.zeros(world.dim_p)
50 |             agent.state.c = np.zeros(world.dim_c)
51 |         for i, landmark in enumerate(world.landmarks):
52 |             landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
53 |             landmark.state.p_vel = np.zeros(world.dim_p)
54 | 
55 |     def reward(self, agent, world):
56 |         if agent.goal_a is None or agent.goal_b is None:
57 |             return 0.0
58 |         dist2 = np.sum(np.square(agent.goal_a.state.p_pos - agent.goal_b.state.p_pos))
59 |         return -dist2
60 | 
61 |     def observation(self, agent, world):
62 |         # goal color
63 |         goal_color = [np.zeros(world.dim_color), np.zeros(world.dim_color)]
64 |         if agent.goal_b is not None:
65 |             goal_color[1] = agent.goal_b.color 
66 | 
67 |         # get positions of all entities in this agent's reference frame
68 |         entity_pos = []
69 |         for entity in world.landmarks:
70 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
71 |         # entity colors
72 |         entity_color = []
73 |         for entity in world.landmarks:
74 |             entity_color.append(entity.color)
75 |         # communication of all other agents
76 |         comm = []
77 |         for other in world.agents:
78 |             if other is agent: continue
79 |             comm.append(other.state.c)
80 |         return np.concatenate([agent.state.p_vel] + entity_pos + [goal_color[1]] + comm)
81 |             


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_speaker_listener.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from multiagent.core import World, Agent, Landmark
 3 | from multiagent.scenario import BaseScenario
 4 | 
 5 | class Scenario(BaseScenario):
 6 |     def make_world(self):
 7 |         world = World()
 8 |         # set any world properties first
 9 |         world.dim_c = 3
10 |         num_landmarks = 3
11 |         world.collaborative = True
12 |         # add agents
13 |         world.agents = [Agent() for i in range(2)]
14 |         for i, agent in enumerate(world.agents):
15 |             agent.name = 'agent %d' % i
16 |             agent.collide = False
17 |             agent.size = 0.075
18 |         # speaker
19 |         world.agents[0].movable = False
20 |         # listener
21 |         world.agents[1].silent = True
22 |         # add landmarks
23 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
24 |         for i, landmark in enumerate(world.landmarks):
25 |             landmark.name = 'landmark %d' % i
26 |             landmark.collide = False
27 |             landmark.movable = False
28 |             landmark.size = 0.04
29 |         # make initial conditions
30 |         self.reset_world(world)
31 |         return world
32 | 
33 |     def reset_world(self, world):
34 |         # assign goals to agents
35 |         for agent in world.agents:
36 |             agent.goal_a = None
37 |             agent.goal_b = None
38 |         # want listener to go to the goal landmark
39 |         world.agents[0].goal_a = world.agents[1]
40 |         world.agents[0].goal_b = np.random.choice(world.landmarks)
41 |         # random properties for agents
42 |         for i, agent in enumerate(world.agents):
43 |             agent.color = np.array([0.25,0.25,0.25])               
44 |         # random properties for landmarks
45 |         world.landmarks[0].color = np.array([0.65,0.15,0.15])
46 |         world.landmarks[1].color = np.array([0.15,0.65,0.15])
47 |         world.landmarks[2].color = np.array([0.15,0.15,0.65])
48 |         # special colors for goals
49 |         world.agents[0].goal_a.color = world.agents[0].goal_b.color + np.array([0.45, 0.45, 0.45])
50 |         # set random initial states
51 |         for agent in world.agents:
52 |             agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
53 |             agent.state.p_vel = np.zeros(world.dim_p)
54 |             agent.state.c = np.zeros(world.dim_c)
55 |         for i, landmark in enumerate(world.landmarks):
56 |             landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
57 |             landmark.state.p_vel = np.zeros(world.dim_p)
58 | 
59 |     def benchmark_data(self, agent, world):
60 |         # returns data for benchmarking purposes
61 |         return self.reward(agent, reward)
62 | 
63 |     def reward(self, agent, world):
64 |         # squared distance from listener to landmark
65 |         a = world.agents[0]
66 |         dist2 = np.sum(np.square(a.goal_a.state.p_pos - a.goal_b.state.p_pos))
67 |         return -dist2
68 | 
69 |     def observation(self, agent, world):
70 |         # goal color
71 |         goal_color = np.zeros(world.dim_color)
72 |         if agent.goal_b is not None:
73 |             goal_color = agent.goal_b.color
74 | 
75 |         # get positions of all entities in this agent's reference frame
76 |         entity_pos = []
77 |         for entity in world.landmarks:
78 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
79 | 
80 |         # communication of all other agents
81 |         comm = []
82 |         for other in world.agents:
83 |             if other is agent or (other.state.c is None): continue
84 |             comm.append(other.state.c)
85 |         
86 |         # speaker
87 |         if not agent.movable:
88 |             return np.concatenate([goal_color])
89 |         # listener
90 |         if agent.silent:
91 |             return np.concatenate([agent.state.p_vel] + entity_pos + comm)
92 |             
93 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_spread.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from multiagent.core import World, Agent, Landmark
  3 | from multiagent.scenario import BaseScenario
  4 | 
  5 | 
  6 | class Scenario(BaseScenario):
  7 |     def make_world(self):
  8 |         world = World()
  9 |         # set any world properties first
 10 |         world.dim_c = 2
 11 |         num_agents = 3
 12 |         num_landmarks = 3
 13 |         world.collaborative = True
 14 |         # add agents
 15 |         world.agents = [Agent() for i in range(num_agents)]
 16 |         for i, agent in enumerate(world.agents):
 17 |             agent.name = 'agent %d' % i
 18 |             agent.collide = True
 19 |             agent.silent = True
 20 |             agent.size = 0.15
 21 |         # add landmarks
 22 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
 23 |         for i, landmark in enumerate(world.landmarks):
 24 |             landmark.name = 'landmark %d' % i
 25 |             landmark.collide = False
 26 |             landmark.movable = False
 27 |         # make initial conditions
 28 |         self.reset_world(world)
 29 |         return world
 30 | 
 31 |     def reset_world(self, world):
 32 |         # random properties for agents
 33 |         for i, agent in enumerate(world.agents):
 34 |             agent.color = np.array([0.35, 0.35, 0.85])
 35 |         # random properties for landmarks
 36 |         for i, landmark in enumerate(world.landmarks):
 37 |             landmark.color = np.array([0.25, 0.25, 0.25])
 38 |         # set random initial states
 39 |         for agent in world.agents:
 40 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 41 |             agent.state.p_vel = np.zeros(world.dim_p)
 42 |             agent.state.c = np.zeros(world.dim_c)
 43 |         for i, landmark in enumerate(world.landmarks):
 44 |             landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 45 |             landmark.state.p_vel = np.zeros(world.dim_p)
 46 | 
 47 |     def benchmark_data(self, agent, world):
 48 |         rew = 0
 49 |         collisions = 0
 50 |         occupied_landmarks = 0
 51 |         min_dists = 0
 52 |         for l in world.landmarks:
 53 |             dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
 54 |             min_dists += min(dists)
 55 |             rew -= min(dists)
 56 |             if min(dists) < 0.1:
 57 |                 occupied_landmarks += 1
 58 |         if agent.collide:
 59 |             for a in world.agents:
 60 |                 if self.is_collision(a, agent):
 61 |                     rew -= 1
 62 |                     collisions += 1
 63 |         return (rew, collisions, min_dists, occupied_landmarks)
 64 | 
 65 | 
 66 |     def is_collision(self, agent1, agent2):
 67 |         delta_pos = agent1.state.p_pos - agent2.state.p_pos
 68 |         dist = np.sqrt(np.sum(np.square(delta_pos)))
 69 |         dist_min = agent1.size + agent2.size
 70 |         return True if dist < dist_min else False
 71 | 
 72 |     def reward(self, agent, world):
 73 |         # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
 74 |         rew = 0
 75 |         for l in world.landmarks:
 76 |             dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
 77 |             rew -= min(dists)
 78 |         if agent.collide:
 79 |             for a in world.agents:
 80 |                 if self.is_collision(a, agent):
 81 |                     rew -= 1
 82 |         return rew
 83 | 
 84 |     def observation(self, agent, world):
 85 |         # get positions of all entities in this agent's reference frame
 86 |         entity_pos = []
 87 |         for entity in world.landmarks:  # world.entities:
 88 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
 89 |         # entity colors
 90 |         entity_color = []
 91 |         for entity in world.landmarks:  # world.entities:
 92 |             entity_color.append(entity.color)
 93 |         # communication of all other agents
 94 |         comm = []
 95 |         other_pos = []
 96 |         for other in world.agents:
 97 |             if other is agent: continue
 98 |             comm.append(other.state.c)
 99 |             other_pos.append(other.state.p_pos - agent.state.p_pos)
100 |         return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm)
101 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/multiagent/scenarios/simple_tag_v1.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from multiagent.core import World, Agent, Landmark
  3 | from multiagent.scenario import BaseScenario
  4 | 
  5 | 
  6 | class Scenario(BaseScenario):
  7 |     def make_world(self):
  8 |         world = World()
  9 |         # set any world properties first
 10 |         world.dim_c = 2
 11 |         num_good_agents = 1
 12 |         num_adversaries = 2
 13 |         num_agents = num_adversaries + num_good_agents
 14 |         num_landmarks = 0
 15 |         # add agents
 16 |         world.agents = [Agent() for i in range(num_agents)]
 17 |         for i, agent in enumerate(world.agents):
 18 |             agent.name = 'agent %d' % i
 19 |             agent.collide = True
 20 |             agent.silent = True
 21 |             agent.adversary = True if i < num_adversaries else False
 22 |             agent.size = 0.075 if agent.adversary else 0.05
 23 |             agent.accel = 3.0 if agent.adversary else 4.0
 24 |             #agent.accel = 20.0 if agent.adversary else 25.0
 25 |             agent.max_speed = 1.0 if agent.adversary else 1.3
 26 |         # add landmarks
 27 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
 28 |         for i, landmark in enumerate(world.landmarks):
 29 |             landmark.name = 'landmark %d' % i
 30 |             landmark.collide = True
 31 |             landmark.movable = False
 32 |             landmark.size = 0.2
 33 |             landmark.boundary = False
 34 |         # make initial conditions
 35 |         self.reset_world(world)
 36 |         return world
 37 | 
 38 | 
 39 |     def reset_world(self, world):
 40 |         # random properties for agents
 41 |         for i, agent in enumerate(world.agents):
 42 |             agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35])
 43 |             # random properties for landmarks
 44 |         for i, landmark in enumerate(world.landmarks):
 45 |             landmark.color = np.array([0.25, 0.25, 0.25])
 46 |         # set random initial states
 47 |         for agent in world.agents:
 48 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 49 |             agent.state.p_vel = np.zeros(world.dim_p)
 50 |             agent.state.c = np.zeros(world.dim_c)
 51 |         for i, landmark in enumerate(world.landmarks):
 52 |             if not landmark.boundary:
 53 |                 landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
 54 |                 landmark.state.p_vel = np.zeros(world.dim_p)
 55 | 
 56 | 
 57 |     def benchmark_data(self, agent, world):
 58 |         # returns data for benchmarking purposes
 59 |         if agent.adversary:
 60 |             collisions = 0
 61 |             for a in self.good_agents(world):
 62 |                 if self.is_collision(a, agent):
 63 |                     collisions += 1
 64 |             return collisions
 65 |         else:
 66 |             return 0
 67 | 
 68 | 
 69 |     def is_collision(self, agent1, agent2):
 70 |         delta_pos = agent1.state.p_pos - agent2.state.p_pos
 71 |         dist = np.sqrt(np.sum(np.square(delta_pos)))
 72 |         dist_min = agent1.size + agent2.size
 73 |         return True if dist < dist_min else False
 74 | 
 75 |     # return all agents that are not adversaries
 76 |     def good_agents(self, world):
 77 |         return [agent for agent in world.agents if not agent.adversary]
 78 | 
 79 |     # return all adversarial agents
 80 |     def adversaries(self, world):
 81 |         return [agent for agent in world.agents if agent.adversary]
 82 | 
 83 | 
 84 |     def reward(self, agent, world):
 85 |         # Agents are rewarded based on minimum agent distance to each landmark
 86 |         main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
 87 |         return main_reward
 88 | 
 89 |     def agent_reward(self, agent, world):
 90 |         # Agents are negatively rewarded if caught by adversaries
 91 |         rew = 0
 92 |         #shape = False
 93 |         shape = True
 94 |         adversaries = self.adversaries(world)
 95 |         if shape:  # reward can optionally be shaped (increased reward for increased distance from adversary)
 96 |             for adv in adversaries:
 97 |                 rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
 98 |         if agent.collide:
 99 |             for a in adversaries:
100 |                 if self.is_collision(a, agent):
101 |                     rew -= 10
102 | 
103 |         # agents are penalized for exiting the screen, so that they can be caught by the adversaries
104 |         def bound(x):
105 |             if x < 0.9:
106 |                 return 0
107 |             if x < 1.0:
108 |                 return (x - 0.9) * 10
109 |             return min(np.exp(2 * x - 2), 10)
110 |         for p in range(world.dim_p):
111 |             x = abs(agent.state.p_pos[p])
112 |             rew -= bound(x)
113 | 
114 |         return rew
115 | 
116 |     def adversary_reward(self, agent, world):
117 |         # Adversaries are rewarded for collisions with agents
118 |         rew = 0
119 |         #shape = False
120 |         shape = True
121 |         agents = self.good_agents(world)
122 |         adversaries = self.adversaries(world)
123 |         if shape:  # reward can optionally be shaped (decreased reward for increased distance from agents)
124 |             for adv in adversaries:
125 |                 rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents])
126 |         if agent.collide:
127 |             for ag in agents:
128 |                 for adv in adversaries:
129 |                     if self.is_collision(ag, adv):
130 |                         rew += 10
131 |         return rew
132 | 
133 |     def observation(self, agent, world):
134 |         # get positions of all entities in this agent's reference frame
135 |         entity_pos = []
136 |         for entity in world.landmarks:
137 |             if not entity.boundary:
138 |                 entity_pos.append(entity.state.p_pos - agent.state.p_pos)
139 |         # communication of all other agents
140 |         comm = []
141 |         other_pos = []
142 |         other_vel = []
143 |         for other in world.agents:
144 |             if other is agent: continue
145 |             comm.append(other.state.c)
146 |             other_pos.append(other.state.p_pos - agent.state.p_pos)
147 |             if not other.adversary:
148 |                 other_vel.append(other.state.p_vel)
149 |         return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
150 | 


--------------------------------------------------------------------------------
/MADDPG/multiagent-particle-envs/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(name='multiagent',
 4 |       version='0.0.1',
 5 |       description='Multi-Agent Goal-Driven Communication Environment',
 6 |       url='https://github.com/openai/multiagent-public',
 7 |       author='Igor Mordatch',
 8 |       author_email='mordatch@openai.com',
 9 |       packages=find_packages(),
10 |       include_package_data=True,
11 |       zip_safe=False,
12 |       install_requires=['gym', 'numpy-stl']
13 | )
14 | 


--------------------------------------------------------------------------------
/MADDPG/reward setting:
--------------------------------------------------------------------------------
 1 | red reward
 2 | 1 distance -0.1*min_dis
 3 | 2 红方任何人抓住一个蓝方 +10
 4 | 3 红方离开屏幕，-50
 5 | 4 抓住所有蓝方 +100
 6 | 
 7 | 蓝方
 8 | 1、距离 +0.1 * 距离所有红方的距离
 9 | 2、被抓住 -10
10 | 3、离开屏幕 -5


--------------------------------------------------------------------------------
/MADQN/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.egg-info/
3 | *.pyc
4 | .vscode/settings.json
5 | multiagent/.DS_Store
6 | .DS_Store
7 | 


--------------------------------------------------------------------------------
/MADQN/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MADQN/README.md:
--------------------------------------------------------------------------------
 1 | **Status:** Archive (code is provided as-is, no updates expected)
 2 | 
 3 | # Multi-Agent Particle Environment
 4 | 
 5 | A simple multi-agent particle world with a continuous observation and discrete action space, along with some basic simulated physics.
 6 | Used in the paper [Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments](https://arxiv.org/pdf/1706.02275.pdf).
 7 | 
 8 | ## Getting started:
 9 | 
10 | - To install, `cd` into the root directory and type `pip install -e .`
11 | 
12 | - To interactively view moving to landmark scenario (see others in ./scenarios/):
13 | `bin/interactive.py --scenario simple.py`
14 | 
15 | - Known dependencies: Python (3.5.4), OpenAI gym (0.10.5), numpy (1.14.5)
16 | 
17 | - To use the environments, look at the code for importing them in `make_env.py`.
18 | 
19 | 


--------------------------------------------------------------------------------
/MADQN/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADQN/bin/__init__.py


--------------------------------------------------------------------------------
/MADQN/bin/interactive.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os,sys
 3 | sys.path.insert(1, os.path.join(sys.path[0], '..'))
 4 | import argparse
 5 | 
 6 | from multiagent.environment import MultiAgentEnv
 7 | from multiagent.policy import InteractivePolicy
 8 | import multiagent.scenarios as scenarios
 9 | 
10 | if __name__ == '__main__':
11 |     # parse arguments
12 |     parser = argparse.ArgumentParser(description=None)
13 |     parser.add_argument('-s', '--scenario', default='simple.py', help='Path of the scenario Python script.')
14 |     args = parser.parse_args()
15 | 
16 |     # load scenario from script
17 |     scenario = scenarios.load(args.scenario).Scenario()
18 |     # create world
19 |     world = scenario.make_world()
20 |     # create multiagent environment
21 |     env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer = False)
22 |     # render call to create viewer window (necessary only for interactive policies)
23 |     env.render()
24 |     # create interactive policies for each agent
25 |     policies = [InteractivePolicy(env,i) for i in range(env.n)]
26 |     # execution loop
27 |     obs_n = env.reset()
28 |     while True:
29 |         # query for action from each agent's policy
30 |         act_n = []
31 |         for i, policy in enumerate(policies):
32 |             act_n.append(policy.action(obs_n[i]))
33 |         # step environment
34 |         obs_n, reward_n, done_n, _ = env.step(act_n)
35 |         # render all agent views
36 |         env.render()
37 |         # display rewards
38 |         #for agent in env.world.agents:
39 |         #    print(agent.name + " reward: %0.3f" % env._get_reward(agent))
40 | 


--------------------------------------------------------------------------------
/MADQN/dqn.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | from keras.models import Sequential
 4 | from keras.layers import Dense
 5 | from keras.optimizers import Adam
 6 | 
 7 | 
 8 | class DQN:
 9 |     def __init__(self, n_actions, state_size, gamma=0.9, learning_rate=0.001,
10 |                  eps_greedy=0.5, eps_increment=1e-5, replace_target_freq=2000):
11 |         self.n_actions = n_actions
12 |         self.state_size = state_size
13 |         self.gamma = gamma
14 |         self.learning_rate = learning_rate
15 |         self.eps_greedy = eps_greedy
16 |         self.eps_increment = eps_increment
17 |         self.learning_step = 0
18 |         self.replace_target_freq = replace_target_freq
19 |         self.eval_network = self.build_network()
20 |         self.target_network = self.build_network()
21 |         self.update_target_weights()
22 | 
23 |     def build_network(self):
24 |         model = Sequential()
25 |         model.add(Dense(50, input_dim=self.state_size, activation='relu'))
26 |         model.add(Dense(50, activation='relu'))
27 |         model.add(Dense(self.n_actions, activation='linear'))
28 |         model.compile(loss='mse', optimizer=Adam(self.learning_rate))
29 | 
30 |         return model
31 | 
32 |     def update_target_weights(self):
33 |         self.target_network.set_weights(self.eval_network.get_weights())
34 | 
35 |     def choose_action(self, state):
36 |         p = np.random.random()
37 |         if p < self.eps_greedy:
38 |             action_probs = self.eval_network.predict(state[np.newaxis, :])
39 |             return np.argmax(action_probs[0])
40 |         else:
41 |             return random.randrange(self.n_actions)
42 | 
43 |     def learn(self, states, actions, rewards, states_next, done):
44 |         if self.learning_step % self.replace_target_freq == 0:
45 |             self.update_target_weights()
46 | 
47 |         rows = np.arange(done.shape[0])
48 |         not_done = np.logical_not(done)
49 | 
50 |         eval_next = self.eval_network.predict(states_next)
51 |         target_next = self.target_network.predict(states_next)
52 |         discounted_rewards = self.gamma * \
53 |             target_next[rows, np.argmax(eval_next, axis=1)]
54 | 
55 |         y = self.eval_network.predict(states)
56 |         y[rows, actions] = rewards
57 |         y[not_done, actions[not_done]] += discounted_rewards[not_done]
58 | 
59 |         history = self.eval_network.fit(states, y, epochs=1, verbose=0)
60 |         self.learning_step += 1
61 |         if self.eps_greedy < 0.9:
62 |             self.eps_greedy += self.eps_increment
63 | 
64 |         return history
65 | 
66 |     def load(self, name):
67 |         self.eval_network.load_weights(name)
68 |         self.update_target_weights()
69 | 
70 |     def save(self, name):
71 |         self.eval_network.save_weights(name)
72 | 


--------------------------------------------------------------------------------
/MADQN/dqn_tag.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | import argparse
  5 | import itertools
  6 | import time
  7 | import os
  8 | import pickle
  9 | import code
 10 | import random
 11 | 
 12 | from dqn import DQN
 13 | from memory import Memory
 14 | from make_env import make_env
 15 | import general_utilities
 16 | import simple_tag_utilities
 17 | 
 18 | 
 19 | def play(episodes, is_render, is_testing, checkpoint_interval,
 20 |          weights_filename_prefix, csv_filename_prefix, batch_size,env):
 21 |     # init statistics. NOTE: simple tag specific!
 22 |     statistics_header = ["episode"]
 23 |     statistics_header.append("steps")
 24 |     statistics_header.extend(["reward_{}".format(i) for i in range(env.n)])
 25 |     statistics_header.extend(["loss_{}".format(i) for i in range(env.n)])
 26 |     statistics_header.extend(["eps_greedy_{}".format(i) for i in range(env.n)])
 27 |     statistics_header.extend(["collisions_{}".format(i) for i in range(env.n)])
 28 |     print("Collecting statistics {}:".format(" ".join(statistics_header)))
 29 |     statistics = general_utilities.Time_Series_Statistics_Store(
 30 |         statistics_header)
 31 | 
 32 |     for episode in range(args.episodes):
 33 |         states = env.reset()
 34 |         episode_losses = np.zeros(env.n)
 35 |         episode_rewards = np.zeros(env.n)
 36 |         collision_count = np.zeros(env.n)
 37 |         steps = 0
 38 | 
 39 |         while True:
 40 |             steps += 1
 41 |             mysteps=steps
 42 |             #steps
 43 |             terminal = (mysteps >= 50)
 44 | 
 45 |             # render
 46 |             if args.render:
 47 |                 env.render()
 48 | 
 49 |             # act
 50 |             actions = []
 51 |             actions_onehot = []
 52 |             for i in range(env.n):
 53 |                 action = dqns[i].choose_action(states[i])
 54 |                 speed = 0.9 if env.agents[i].adversary else 1
 55 | 
 56 |                 onehot_action = np.zeros(n_actions[i])
 57 |                 onehot_action[action] = speed
 58 |                 actions_onehot.append(onehot_action)
 59 |                 actions.append(action)
 60 | 
 61 |             # step
 62 |             states_next, rewards, done, info = env.step(actions_onehot)
 63 | 
 64 |             # learn
 65 |             if not args.testing:
 66 |                 size = memories[0].pointer
 67 |                 batch = random.sample(range(size), size) if size < batch_size else random.sample(
 68 |                     range(size), batch_size)
 69 | 
 70 |                 for i in range(env.n):
 71 |                     if done[i]:
 72 |                         rewards[i] -= 50
 73 | 
 74 |                     memories[i].remember(states[i], actions[i],
 75 |                                          rewards[i], states_next[i], done[i])
 76 | 
 77 |                     if memories[i].pointer > batch_size * 10:
 78 |                         history = dqns[i].learn(*memories[i].sample(batch))
 79 |                         episode_losses[i] += history.history["loss"][0]
 80 |                     else:
 81 |                         episode_losses[i] = -1
 82 | 
 83 |             states = states_next
 84 |             episode_rewards += rewards
 85 |             collision_count += np.array(
 86 |                 simple_tag_utilities.count_agent_collisions(env))
 87 | 
 88 |             # reset states if done
 89 |             if (any(done) or terminal):
 90 |                 mysteps=0
 91 |                 episode_rewards = episode_rewards / steps
 92 |                 episode_losses = episode_losses / steps
 93 | 
 94 |                 statistic = [episode]
 95 |                 statistic.append(steps)
 96 |                 statistic.extend([episode_rewards[i] for i in range(env.n)])
 97 |                 statistic.extend([episode_losses[i] for i in range(env.n)])
 98 |                 statistic.extend([dqns[i].eps_greedy for i in range(env.n)])
 99 |                 statistic.extend(collision_count.tolist())
100 |                 statistics.add_statistics(statistic)
101 |                 
102 |                 if episode % 25 == 0:
103 |                     print(statistics.summarize_last())
104 |                 break
105 | 
106 |         if episode % checkpoint_interval == 0:
107 |             statistics.dump("{}_{}.csv".format(csv_filename_prefix,
108 |                                                episode))
109 |             general_utilities.save_dqn_weights(dqns,
110 |                                                "{}_{}_".format(weights_filename_prefix, episode))
111 |             if episode >= checkpoint_interval:
112 |                 os.remove("{}_{}.csv".format(csv_filename_prefix,
113 |                                              episode - checkpoint_interval))
114 | 
115 |     return statistics
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     print ("Have enter main")
120 |     parser = argparse.ArgumentParser()
121 |     parser.add_argument('--env', default='simple_tag_guided', type=str)
122 |     parser.add_argument('--learning_rate', default=0.001, type=float)
123 |     parser.add_argument('--episodes', default=60000, type=int)
124 |     parser.add_argument('--render', default=False, action="store_true")
125 |     parser.add_argument('--benchmark', default=False, action="store_true")
126 |     parser.add_argument('--experiment_prefix', default=".",
127 |                         help="directory to store all experiment data")
128 |     parser.add_argument('--weights_filename_prefix', default='/save/tag-dqn',
129 |                         help="where to store/load network weights")
130 |     parser.add_argument('--csv_filename_prefix', default='/save/statistics-dqn',
131 |                         help="where to store statistics")
132 |     parser.add_argument('--checkpoint_frequency', default=500,
133 |                         help="how often to checkpoint")
134 |     parser.add_argument('--testing', default=False, action="store_true",
135 |                         help="reduces exploration substantially")
136 |     parser.add_argument('--random_seed', default=2, type=int)
137 |     parser.add_argument('--memory_size', default=10000, type=int)
138 |     parser.add_argument('--batch_size', default=128, type=int)
139 |     parser.add_argument('--epsilon_greedy', nargs='+', type=float,
140 |                         help="Epsilon greedy parameter for each agent")
141 |     args = parser.parse_args()
142 | 
143 |     general_utilities.dump_dict_as_json(vars(args),
144 |                                         args.experiment_prefix + "/save/run_parameters.json")
145 |     # init env
146 |     print ("Have init env")
147 |     env = make_env(args.env, args.benchmark)
148 | 
149 |     if args.epsilon_greedy is not None:
150 |         if len(args.epsilon_greedy) == env.n:
151 |             epsilon_greedy = args.epsilon_greedy
152 |         else:
153 |             raise ValueError("Must have enough epsilon_greedy for all agents")
154 |     else:
155 |         epsilon_greedy = [0.5 for i in range(env.n)]
156 | 
157 |     # set random seed
158 |     print ("Have set seed")
159 |     env.seed(args.random_seed)
160 |     random.seed(args.random_seed)
161 |     np.random.seed(args.random_seed)
162 |     tf.set_random_seed(args.random_seed)
163 | 
164 |     # init DQNs
165 |     print ("Have init DQN")
166 |     n_actions = [env.action_space[i].n for i in range(env.n)]
167 |     state_sizes = [env.observation_space[i].shape[0] for i in range(env.n)]
168 |     memories = [Memory(args.memory_size) for i in range(env.n)]
169 |     dqns = [DQN(n_actions[i], state_sizes[i], eps_greedy=epsilon_greedy[i])
170 |             for i in range(env.n)]
171 | 
172 |     general_utilities.load_dqn_weights_if_exist(
173 |         dqns, args.experiment_prefix + args.weights_filename_prefix)
174 | 
175 |     start_time = time.time()
176 | 
177 |     # play
178 |     print ("Have play")
179 |     statistics = play(args.episodes, args.render, args.testing,
180 |                       args.checkpoint_frequency,
181 |                       args.experiment_prefix + args.weights_filename_prefix,
182 |                       args.experiment_prefix + args.csv_filename_prefix,
183 |                       args.batch_size,env)
184 | 
185 |     # bookkeeping
186 |     print("Finished {} episodes in {} seconds".format(args.episodes,
187 |                                                       time.time() - start_time))
188 |     general_utilities.save_dqn_weights(
189 |         dqns, args.experiment_prefix + args.weights_filename_prefix)
190 |     statistics.dump(args.experiment_prefix + args.csv_filename_prefix + ".csv")
191 | 


--------------------------------------------------------------------------------
/MADQN/make_env.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code for creating a multiagent environment with one of the scenarios listed
 3 | in ./scenarios/.
 4 | Can be called by using, for example:
 5 |     env = make_env('simple_speaker_listener')
 6 | After producing the env object, can be used similarly to an OpenAI gym
 7 | environment.
 8 | 
 9 | A policy using this environment must output actions in the form of a list
10 | for all agents. Each element of the list should be a numpy array,
11 | of size (env.world.dim_p + env.world.dim_c, 1). Physical actions precede
12 | communication actions in this array. See environment.py for more details.
13 | """
14 | 
15 | def make_env(scenario_name, benchmark=False):
16 |     '''
17 |     Creates a MultiAgentEnv object as env. This can be used similar to a gym
18 |     environment by calling env.reset() and env.step().
19 |     Use env.render() to view the environment on the screen.
20 | 
21 |     Input:
22 |         scenario_name   :   name of the scenario from ./scenarios/ to be Returns
23 |                             (without the .py extension)
24 |         benchmark       :   whether you want to produce benchmarking data
25 |                             (usually only done during evaluation)
26 | 
27 |     Some useful env properties (see environment.py):
28 |         .observation_space  :   Returns the observation space for each agent
29 |         .action_space       :   Returns the action space for each agent
30 |         .n                  :   Returns the number of Agents
31 |     '''
32 |     from multiagent.environment import MultiAgentEnv
33 |     import multiagent.scenarios as scenarios
34 | 
35 |     # load scenario from script
36 |     scenario = scenarios.load(scenario_name + ".py").Scenario()
37 |     # create world
38 |     world = scenario.make_world()
39 |     # create multiagent environment
40 |     if benchmark:        
41 |         env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, scenario.benchmark_data)
42 |     else:
43 |         env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation)# done_callback=scenario.done)
44 |     return env
45 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | # Multiagent envs
 4 | # ----------------------------------------
 5 | 
 6 | register(
 7 |     id='MultiagentSimple-v0',
 8 |     entry_point='multiagent.envs:SimpleEnv',
 9 |     # FIXME(cathywu) currently has to be exactly max_path_length parameters in
10 |     # rllab run script
11 |     max_episode_steps=100,
12 | )
13 | 
14 | register(
15 |     id='MultiagentSimpleSpeakerListener-v0',
16 |     entry_point='multiagent.envs:SimpleSpeakerListenerEnv',
17 |     max_episode_steps=100,
18 | )
19 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/core.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | # physical/external base state of all entites
  4 | class EntityState(object):
  5 |     def __init__(self):
  6 |         # physical position
  7 |         self.p_pos = None
  8 |         # physical velocity
  9 |         self.p_vel = None
 10 | 
 11 | # state of agents (including communication and internal/mental state)
 12 | class AgentState(EntityState):
 13 |     def __init__(self):
 14 |         super(AgentState, self).__init__()
 15 |         # communication utterance
 16 |         self.c = None
 17 | 
 18 | # action of the agent
 19 | class Action(object):
 20 |     def __init__(self):
 21 |         # physical action
 22 |         self.u = None
 23 |         # communication action
 24 |         self.c = None
 25 | 
 26 | # properties and state of physical world entity
 27 | class Entity(object):
 28 |     def __init__(self):
 29 |         # name 
 30 |         self.name = ''
 31 |         # properties:
 32 |         self.size = 0.050
 33 |         # entity can move / be pushed
 34 |         self.movable = False
 35 |         # entity collides with others
 36 |         self.collide = True
 37 |         # material density (affects mass)
 38 |         self.density = 25.0
 39 |         # color
 40 |         self.color = None
 41 |         # max speed and accel
 42 |         self.max_speed = None
 43 |         self.accel = None
 44 |         # state
 45 |         self.state = EntityState()
 46 |         # mass
 47 |         self.initial_mass = 1.0
 48 | 
 49 |     @property
 50 |     def mass(self):
 51 |         return self.initial_mass
 52 | 
 53 | # properties of landmark entities
 54 | class Landmark(Entity):
 55 |      def __init__(self):
 56 |         super(Landmark, self).__init__()
 57 | 
 58 | # properties of agent entities
 59 | class Agent(Entity):
 60 |     def __init__(self):
 61 |         super(Agent, self).__init__()
 62 |         # agents are movable by default
 63 |         self.movable = True
 64 |         # cannot send communication signals
 65 |         self.silent = False
 66 |         # cannot observe the world
 67 |         self.blind = False
 68 |         # physical motor noise amount
 69 |         self.u_noise = None
 70 |         # communication noise amount
 71 |         self.c_noise = None
 72 |         # control range
 73 |         self.u_range = 1.0
 74 |         # state
 75 |         self.state = AgentState()
 76 |         # action
 77 |         self.action = Action()
 78 |         # script behavior to execute
 79 |         self.action_callback = None
 80 | 
 81 | # multi-agent world
 82 | class World(object):
 83 |     def __init__(self):
 84 |         # list of agents and entities (can change at execution-time!)
 85 |         self.agents = []
 86 |         self.landmarks = []
 87 |         # communication channel dimensionality
 88 |         self.dim_c = 0
 89 |         # position dimensionality
 90 |         self.dim_p = 2
 91 |         # color dimensionality
 92 |         self.dim_color = 3
 93 |         # simulation timestep
 94 |         self.dt = 0.1
 95 |         # physical damping
 96 |         self.damping = 0.25
 97 |         # contact response parameters
 98 |         self.contact_force = 1e+2
 99 |         self.contact_margin = 1e-3
100 | 
101 |     # return all entities in the world
102 |     @property
103 |     def entities(self):
104 |         return self.agents + self.landmarks
105 | 
106 |     # return all agents controllable by external policies
107 |     @property
108 |     def policy_agents(self):
109 |         return [agent for agent in self.agents if agent.action_callback is None]
110 | 
111 |     # return all agents controlled by world scripts
112 |     @property
113 |     def scripted_agents(self):
114 |         return [agent for agent in self.agents if agent.action_callback is not None]
115 | 
116 |     # update state of the world
117 |     def step(self):
118 |         # set actions for scripted agents 
119 |         for agent in self.scripted_agents:
120 |             agent.action = agent.action_callback(agent, self)
121 |         # gather forces applied to entities
122 |         p_force = [None] * len(self.entities)
123 |         # apply agent physical controls
124 |         p_force = self.apply_action_force(p_force)
125 |         # apply environment forces
126 |         p_force = self.apply_environment_force(p_force)
127 |         # integrate physical state
128 |         self.integrate_state(p_force)
129 |         # update agent state
130 |         for agent in self.agents:
131 |             self.update_agent_state(agent)
132 | 
133 |     # gather agent action forces
134 |     def apply_action_force(self, p_force):
135 |         # set applied forces
136 |         for i,agent in enumerate(self.agents):
137 |             if agent.movable:
138 |                 noise = np.random.randn(*agent.action.u.shape) * agent.u_noise if agent.u_noise else 0.0
139 |                 p_force[i] = agent.action.u + noise                
140 |         return p_force
141 | 
142 |     # gather physical forces acting on entities
143 |     def apply_environment_force(self, p_force):
144 |         # simple (but inefficient) collision response
145 |         for a,entity_a in enumerate(self.entities):
146 |             for b,entity_b in enumerate(self.entities):
147 |                 if(b <= a): continue
148 |                 [f_a, f_b] = self.get_collision_force(entity_a, entity_b)
149 |                 if(f_a is not None):
150 |                     if(p_force[a] is None): p_force[a] = 0.0
151 |                     p_force[a] = f_a + p_force[a] 
152 |                 if(f_b is not None):
153 |                     if(p_force[b] is None): p_force[b] = 0.0
154 |                     p_force[b] = f_b + p_force[b]        
155 |         return p_force
156 | 
157 |     # integrate physical state
158 |     def integrate_state(self, p_force):
159 |         for i,entity in enumerate(self.entities):
160 |             if not entity.movable: continue
161 |             entity.state.p_vel = entity.state.p_vel * (1 - self.damping)
162 |             if (p_force[i] is not None):
163 |                 entity.state.p_vel += (p_force[i] / entity.mass) * self.dt
164 |             if entity.max_speed is not None:
165 |                 speed = np.sqrt(np.square(entity.state.p_vel[0]) + np.square(entity.state.p_vel[1]))
166 |                 if speed > entity.max_speed:
167 |                     entity.state.p_vel = entity.state.p_vel / np.sqrt(np.square(entity.state.p_vel[0]) +
168 |                                                                   np.square(entity.state.p_vel[1])) * entity.max_speed
169 |             entity.state.p_pos += entity.state.p_vel * self.dt
170 | 
171 |     def update_agent_state(self, agent):
172 |         # set communication state (directly for now)
173 |         if agent.silent:
174 |             agent.state.c = np.zeros(self.dim_c)
175 |         else:
176 |             noise = np.random.randn(*agent.action.c.shape) * agent.c_noise if agent.c_noise else 0.0
177 |             agent.state.c = agent.action.c + noise      
178 | 
179 |     # get collision forces for any contact between two entities
180 |     def get_collision_force(self, entity_a, entity_b):
181 |         if (not entity_a.collide) or (not entity_b.collide):
182 |             return [None, None] # not a collider
183 |         if (entity_a is entity_b):
184 |             return [None, None] # don't collide against itself
185 |         # compute actual distance between entities
186 |         delta_pos = entity_a.state.p_pos - entity_b.state.p_pos
187 |         dist = np.sqrt(np.sum(np.square(delta_pos)))
188 |         # minimum allowable distance
189 |         dist_min = entity_a.size + entity_b.size
190 |         # softmax penetration
191 |         k = self.contact_margin
192 |         penetration = np.logaddexp(0, -(dist - dist_min)/k)*k
193 |         force = self.contact_force * delta_pos / dist * penetration
194 |         force_a = +force if entity_a.movable else None
195 |         force_b = -force if entity_b.movable else None
196 |         return [force_a, force_b]


--------------------------------------------------------------------------------
/MADQN/multiagent/multi_discrete.py:
--------------------------------------------------------------------------------
 1 | # An old version of OpenAI Gym's multi_discrete.py. (Was getting affected by Gym updates)
 2 | # (https://github.com/openai/gym/blob/1fb81d4e3fb780ccf77fec731287ba07da35eb84/gym/spaces/multi_discrete.py)
 3 | 
 4 | import numpy as np
 5 | 
 6 | import gym
 7 | from gym.spaces import prng
 8 | 
 9 | class MultiDiscrete(gym.Space):
10 |     """
11 |     - The multi-discrete action space consists of a series of discrete action spaces with different parameters
12 |     - It can be adapted to both a Discrete action space or a continuous (Box) action space
13 |     - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space
14 |     - It is parametrized by passing an array of arrays containing [min, max] for each discrete action space
15 |        where the discrete action space can take any integers from `min` to `max` (both inclusive)
16 |     Note: A value of 0 always need to represent the NOOP action.
17 |     e.g. Nintendo Game Controller
18 |     - Can be conceptualized as 3 discrete action spaces:
19 |         1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
20 |         2) Button A:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
21 |         3) Button B:   Discrete 2  - NOOP[0], Pressed[1] - params: min: 0, max: 1
22 |     - Can be initialized as
23 |         MultiDiscrete([ [0,4], [0,1], [0,1] ])
24 |     """
25 |     def __init__(self, array_of_param_array):
26 |         self.low = np.array([x[0] for x in array_of_param_array])
27 |         self.high = np.array([x[1] for x in array_of_param_array])
28 |         self.num_discrete_space = self.low.shape[0]
29 | 
30 |     def sample(self):
31 |         """ Returns a array with one sample from each discrete action space """
32 |         # For each row: round(random .* (max - min) + min, 0)
33 |         random_array = prng.np_random.rand(self.num_discrete_space)
34 |         return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
35 |     def contains(self, x):
36 |         return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()
37 | 
38 |     @property
39 |     def shape(self):
40 |         return self.num_discrete_space
41 |     def __repr__(self):
42 |         return "MultiDiscrete" + str(self.num_discrete_space)
43 |     def __eq__(self, other):
44 |         return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)


--------------------------------------------------------------------------------
/MADQN/multiagent/policy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from pyglet.window import key
 3 | 
 4 | # individual agent policy
 5 | class Policy(object):
 6 |     def __init__(self):
 7 |         pass
 8 |     def action(self, obs):
 9 |         raise NotImplementedError()
10 | 
11 | # interactive policy based on keyboard input
12 | # hard-coded to deal only with movement, not communication
13 | class InteractivePolicy(Policy):
14 |     def __init__(self, env, agent_index):
15 |         super(InteractivePolicy, self).__init__()
16 |         self.env = env
17 |         # hard-coded keyboard events
18 |         self.move = [False for i in range(4)]
19 |         self.comm = [False for i in range(env.world.dim_c)]
20 |         # register keyboard events with this environment's window
21 |         env.viewers[agent_index].window.on_key_press = self.key_press
22 |         env.viewers[agent_index].window.on_key_release = self.key_release
23 | 
24 |     def action(self, obs):
25 |         # ignore observation and just act based on keyboard events
26 |         if self.env.discrete_action_input:
27 |             u = 0
28 |             if self.move[0]: u = 1
29 |             if self.move[1]: u = 2
30 |             if self.move[2]: u = 4
31 |             if self.move[3]: u = 3
32 |         else:
33 |             u = np.zeros(5) # 5-d because of no-move action
34 |             if self.move[0]: u[1] += 1.0
35 |             if self.move[1]: u[2] += 1.0
36 |             if self.move[3]: u[3] += 1.0
37 |             if self.move[2]: u[4] += 1.0
38 |             if True not in self.move:
39 |                 u[0] += 1.0
40 |         return np.concatenate([u, np.zeros(self.env.world.dim_c)])
41 | 
42 |     # keyboard event callbacks
43 |     def key_press(self, k, mod):
44 |         if k==key.LEFT:  self.move[0] = True
45 |         if k==key.RIGHT: self.move[1] = True
46 |         if k==key.UP:    self.move[2] = True
47 |         if k==key.DOWN:  self.move[3] = True
48 |     def key_release(self, k, mod):
49 |         if k==key.LEFT:  self.move[0] = False
50 |         if k==key.RIGHT: self.move[1] = False
51 |         if k==key.UP:    self.move[2] = False
52 |         if k==key.DOWN:  self.move[3] = False
53 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/rendering.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 2D rendering framework
  3 | """
  4 | from __future__ import division
  5 | import os
  6 | import six
  7 | import sys
  8 | 
  9 | if "Apple" in sys.version:
 10 |     if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
 11 |         os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
 12 |         # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
 13 | 
 14 | from gym.utils import reraise
 15 | from gym import error
 16 | 
 17 | try:
 18 |     import pyglet
 19 | except ImportError as e:
 20 |     reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
 21 | 
 22 | try:
 23 |     from pyglet.gl import *
 24 | except ImportError as e:
 25 |     reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
 26 | 
 27 | import math
 28 | import numpy as np
 29 | 
 30 | RAD2DEG = 57.29577951308232
 31 | 
 32 | def get_display(spec):
 33 |     """Convert a display specification (such as :0) into an actual Display
 34 |     object.
 35 | 
 36 |     Pyglet only supports multiple Displays on Linux.
 37 |     """
 38 |     if spec is None:
 39 |         return None
 40 |     elif isinstance(spec, six.string_types):
 41 |         return pyglet.canvas.Display(spec)
 42 |     else:
 43 |         raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
 44 | 
 45 | class Viewer(object):
 46 |     def __init__(self, width, height, display=None):
 47 |         display = get_display(display)
 48 | 
 49 |         self.width = width
 50 |         self.height = height
 51 | 
 52 |         self.window = pyglet.window.Window(width=width, height=height, display=display)
 53 |         self.window.on_close = self.window_closed_by_user
 54 |         self.geoms = []
 55 |         self.onetime_geoms = []
 56 |         self.transform = Transform()
 57 | 
 58 |         glEnable(GL_BLEND)
 59 |         # glEnable(GL_MULTISAMPLE)
 60 |         glEnable(GL_LINE_SMOOTH)
 61 |         # glHint(GL_LINE_SMOOTH_HINT, GL_DONT_CARE)
 62 |         glHint(GL_LINE_SMOOTH_HINT, GL_NICEST)
 63 |         glLineWidth(2.0)
 64 |         glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
 65 | 
 66 |     def close(self):
 67 |         self.window.close()
 68 | 
 69 |     def window_closed_by_user(self):
 70 |         self.close()
 71 | 
 72 |     def set_bounds(self, left, right, bottom, top):
 73 |         assert right > left and top > bottom
 74 |         scalex = self.width/(right-left)
 75 |         scaley = self.height/(top-bottom)
 76 |         self.transform = Transform(
 77 |             translation=(-left*scalex, -bottom*scaley),
 78 |             scale=(scalex, scaley))
 79 | 
 80 |     def add_geom(self, geom):
 81 |         self.geoms.append(geom)
 82 | 
 83 |     def add_onetime(self, geom):
 84 |         self.onetime_geoms.append(geom)
 85 | 
 86 |     def render(self, return_rgb_array=False):
 87 |         glClearColor(1,1,1,1)
 88 |         self.window.clear()
 89 |         self.window.switch_to()
 90 |         self.window.dispatch_events()
 91 |         self.transform.enable()
 92 |         for geom in self.geoms:
 93 |             geom.render()
 94 |         for geom in self.onetime_geoms:
 95 |             geom.render()
 96 |         self.transform.disable()
 97 |         arr = None
 98 |         if return_rgb_array:
 99 |             buffer = pyglet.image.get_buffer_manager().get_color_buffer()
100 |             image_data = buffer.get_image_data()
101 |             arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
102 |             # In https://github.com/openai/gym-http-api/issues/2, we
103 |             # discovered that someone using Xmonad on Arch was having
104 |             # a window of size 598 x 398, though a 600 x 400 window
105 |             # was requested. (Guess Xmonad was preserving a pixel for
106 |             # the boundary.) So we use the buffer height/width rather
107 |             # than the requested one.
108 |             arr = arr.reshape(buffer.height, buffer.width, 4)
109 |             arr = arr[::-1,:,0:3]
110 |         self.window.flip()
111 |         self.onetime_geoms = []
112 |         return arr
113 | 
114 |     # Convenience
115 |     def draw_circle(self, radius=10, res=30, filled=True, **attrs):
116 |         geom = make_circle(radius=radius, res=res, filled=filled)
117 |         _add_attrs(geom, attrs)
118 |         self.add_onetime(geom)
119 |         return geom
120 | 
121 |     def draw_polygon(self, v, filled=True, **attrs):
122 |         geom = make_polygon(v=v, filled=filled)
123 |         _add_attrs(geom, attrs)
124 |         self.add_onetime(geom)
125 |         return geom
126 | 
127 |     def draw_polyline(self, v, **attrs):
128 |         geom = make_polyline(v=v)
129 |         _add_attrs(geom, attrs)
130 |         self.add_onetime(geom)
131 |         return geom
132 | 
133 |     def draw_line(self, start, end, **attrs):
134 |         geom = Line(start, end)
135 |         _add_attrs(geom, attrs)
136 |         self.add_onetime(geom)
137 |         return geom
138 | 
139 |     def get_array(self):
140 |         self.window.flip()
141 |         image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
142 |         self.window.flip()
143 |         arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
144 |         arr = arr.reshape(self.height, self.width, 4)
145 |         return arr[::-1,:,0:3]
146 | 
147 | def _add_attrs(geom, attrs):
148 |     if "color" in attrs:
149 |         geom.set_color(*attrs["color"])
150 |     if "linewidth" in attrs:
151 |         geom.set_linewidth(attrs["linewidth"])
152 | 
153 | class Geom(object):
154 |     def __init__(self):
155 |         self._color=Color((0, 0, 0, 1.0))
156 |         self.attrs = [self._color]
157 |     def render(self):
158 |         for attr in reversed(self.attrs):
159 |             attr.enable()
160 |         self.render1()
161 |         for attr in self.attrs:
162 |             attr.disable()
163 |     def render1(self):
164 |         raise NotImplementedError
165 |     def add_attr(self, attr):
166 |         self.attrs.append(attr)
167 |     def set_color(self, r, g, b, alpha=1):
168 |         self._color.vec4 = (r, g, b, alpha)
169 | 
170 | class Attr(object):
171 |     def enable(self):
172 |         raise NotImplementedError
173 |     def disable(self):
174 |         pass
175 | 
176 | class Transform(Attr):
177 |     def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
178 |         self.set_translation(*translation)
179 |         self.set_rotation(rotation)
180 |         self.set_scale(*scale)
181 |     def enable(self):
182 |         glPushMatrix()
183 |         glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
184 |         glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
185 |         glScalef(self.scale[0], self.scale[1], 1)
186 |     def disable(self):
187 |         glPopMatrix()
188 |     def set_translation(self, newx, newy):
189 |         self.translation = (float(newx), float(newy))
190 |     def set_rotation(self, new):
191 |         self.rotation = float(new)
192 |     def set_scale(self, newx, newy):
193 |         self.scale = (float(newx), float(newy))
194 | 
195 | class Color(Attr):
196 |     def __init__(self, vec4):
197 |         self.vec4 = vec4
198 |     def enable(self):
199 |         glColor4f(*self.vec4)
200 | 
201 | class LineStyle(Attr):
202 |     def __init__(self, style):
203 |         self.style = style
204 |     def enable(self):
205 |         glEnable(GL_LINE_STIPPLE)
206 |         glLineStipple(1, self.style)
207 |     def disable(self):
208 |         glDisable(GL_LINE_STIPPLE)
209 | 
210 | class LineWidth(Attr):
211 |     def __init__(self, stroke):
212 |         self.stroke = stroke
213 |     def enable(self):
214 |         glLineWidth(self.stroke)
215 | 
216 | class Point(Geom):
217 |     def __init__(self):
218 |         Geom.__init__(self)
219 |     def render1(self):
220 |         glBegin(GL_POINTS) # draw point
221 |         glVertex3f(0.0, 0.0, 0.0)
222 |         glEnd()
223 | 
224 | class FilledPolygon(Geom):
225 |     def __init__(self, v):
226 |         Geom.__init__(self)
227 |         self.v = v
228 |     def render1(self):
229 |         if   len(self.v) == 4 : glBegin(GL_QUADS)
230 |         elif len(self.v)  > 4 : glBegin(GL_POLYGON)
231 |         else: glBegin(GL_TRIANGLES)
232 |         for p in self.v:
233 |             glVertex3f(p[0], p[1],0)  # draw each vertex
234 |         glEnd()
235 | 
236 |         color = (self._color.vec4[0] * 0.5, self._color.vec4[1] * 0.5, self._color.vec4[2] * 0.5, self._color.vec4[3] * 0.5)
237 |         glColor4f(*color)
238 |         glBegin(GL_LINE_LOOP)
239 |         for p in self.v:
240 |             glVertex3f(p[0], p[1],0)  # draw each vertex
241 |         glEnd()
242 | 
243 | def make_circle(radius=10, res=30, filled=True):
244 |     points = []
245 |     for i in range(res):
246 |         ang = 2*math.pi*i / res
247 |         points.append((math.cos(ang)*radius, math.sin(ang)*radius))
248 |     if filled:
249 |         return FilledPolygon(points)
250 |     else:
251 |         return PolyLine(points, True)
252 | 
253 | def make_polygon(v, filled=True):
254 |     if filled: return FilledPolygon(v)
255 |     else: return PolyLine(v, True)
256 | 
257 | def make_polyline(v):
258 |     return PolyLine(v, False)
259 | 
260 | def make_capsule(length, width):
261 |     l, r, t, b = 0, length, width/2, -width/2
262 |     box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
263 |     circ0 = make_circle(width/2)
264 |     circ1 = make_circle(width/2)
265 |     circ1.add_attr(Transform(translation=(length, 0)))
266 |     geom = Compound([box, circ0, circ1])
267 |     return geom
268 | 
269 | class Compound(Geom):
270 |     def __init__(self, gs):
271 |         Geom.__init__(self)
272 |         self.gs = gs
273 |         for g in self.gs:
274 |             g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
275 |     def render1(self):
276 |         for g in self.gs:
277 |             g.render()
278 | 
279 | class PolyLine(Geom):
280 |     def __init__(self, v, close):
281 |         Geom.__init__(self)
282 |         self.v = v
283 |         self.close = close
284 |         self.linewidth = LineWidth(1)
285 |         self.add_attr(self.linewidth)
286 |     def render1(self):
287 |         glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
288 |         for p in self.v:
289 |             glVertex3f(p[0], p[1],0)  # draw each vertex
290 |         glEnd()
291 |     def set_linewidth(self, x):
292 |         self.linewidth.stroke = x
293 | 
294 | class Line(Geom):
295 |     def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
296 |         Geom.__init__(self)
297 |         self.start = start
298 |         self.end = end
299 |         self.linewidth = LineWidth(1)
300 |         self.add_attr(self.linewidth)
301 | 
302 |     def render1(self):
303 |         glBegin(GL_LINES)
304 |         glVertex2f(*self.start)
305 |         glVertex2f(*self.end)
306 |         glEnd()
307 | 
308 | class Image(Geom):
309 |     def __init__(self, fname, width, height):
310 |         Geom.__init__(self)
311 |         self.width = width
312 |         self.height = height
313 |         img = pyglet.image.load(fname)
314 |         self.img = img
315 |         self.flip = False
316 |     def render1(self):
317 |         self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
318 | 
319 | # ================================================================
320 | 
321 | class SimpleImageViewer(object):
322 |     def __init__(self, display=None):
323 |         self.window = None
324 |         self.isopen = False
325 |         self.display = display
326 |     def imshow(self, arr):
327 |         if self.window is None:
328 |             height, width, channels = arr.shape
329 |             self.window = pyglet.window.Window(width=width, height=height, display=self.display)
330 |             self.width = width
331 |             self.height = height
332 |             self.isopen = True
333 |         assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape"
334 |         image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3)
335 |         self.window.clear()
336 |         self.window.switch_to()
337 |         self.window.dispatch_events()
338 |         image.blit(0,0)
339 |         self.window.flip()
340 |     def close(self):
341 |         if self.isopen:
342 |             self.window.close()
343 |             self.isopen = False
344 |     def __del__(self):
345 |         self.close()


--------------------------------------------------------------------------------
/MADQN/multiagent/scenario.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # defines scenario upon which the world is built
 4 | class BaseScenario(object):
 5 |     # create elements of the world
 6 |     def make_world(self):
 7 |         raise NotImplementedError()
 8 |     # create initial conditions of the world
 9 |     def reset_world(self, world):
10 |         raise NotImplementedError()
11 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/__init__.py:
--------------------------------------------------------------------------------
1 | import imp
2 | import os.path as osp
3 | 
4 | 
5 | def load(name):
6 |     pathname = osp.join(osp.dirname(__file__), name)
7 |     return imp.load_source('', pathname)
8 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from multiagent.core import World, Agent, Landmark
 3 | from multiagent.scenario import BaseScenario
 4 | 
 5 | class Scenario(BaseScenario):
 6 |     def make_world(self):
 7 |         world = World()
 8 |         # add agents
 9 |         world.agents = [Agent() for i in range(1)]
10 |         for i, agent in enumerate(world.agents):
11 |             agent.name = 'agent %d' % i
12 |             agent.collide = False
13 |             agent.silent = True
14 |         # add landmarks
15 |         world.landmarks = [Landmark() for i in range(1)]
16 |         for i, landmark in enumerate(world.landmarks):
17 |             landmark.name = 'landmark %d' % i
18 |             landmark.collide = False
19 |             landmark.movable = False
20 |         # make initial conditions
21 |         self.reset_world(world)
22 |         return world
23 | 
24 |     def reset_world(self, world):
25 |         # random properties for agents
26 |         for i, agent in enumerate(world.agents):
27 |             agent.color = np.array([0.25,0.25,0.25])
28 |         # random properties for landmarks
29 |         for i, landmark in enumerate(world.landmarks):
30 |             landmark.color = np.array([0.75,0.75,0.75])
31 |         world.landmarks[0].color = np.array([0.75,0.25,0.25])
32 |         # set random initial states
33 |         for agent in world.agents:
34 |             agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
35 |             agent.state.p_vel = np.zeros(world.dim_p)
36 |             agent.state.c = np.zeros(world.dim_c)
37 |         for i, landmark in enumerate(world.landmarks):
38 |             landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
39 |             landmark.state.p_vel = np.zeros(world.dim_p)
40 | 
41 |     def reward(self, agent, world):
42 |         dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
43 |         return -dist2
44 | 
45 |     def observation(self, agent, world):
46 |         # get positions of all entities in this agent's reference frame
47 |         entity_pos = []
48 |         for entity in world.landmarks:
49 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
50 |         return np.concatenate([agent.state.p_vel] + entity_pos)
51 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_adversary.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from multiagent.core import World, Agent, Landmark
  3 | from multiagent.scenario import BaseScenario
  4 | 
  5 | 
  6 | class Scenario(BaseScenario):
  7 | 
  8 |     def make_world(self):
  9 |         world = World()
 10 |         # set any world properties first
 11 |         world.dim_c = 2
 12 |         num_agents = 3
 13 |         world.num_agents = num_agents
 14 |         num_adversaries = 1
 15 |         num_landmarks = num_agents - 1
 16 |         # add agents
 17 |         world.agents = [Agent() for i in range(num_agents)]
 18 |         for i, agent in enumerate(world.agents):
 19 |             agent.name = 'agent %d' % i
 20 |             agent.collide = False
 21 |             agent.silent = True
 22 |             agent.adversary = True if i < num_adversaries else False
 23 |             agent.size = 0.15
 24 |         # add landmarks
 25 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
 26 |         for i, landmark in enumerate(world.landmarks):
 27 |             landmark.name = 'landmark %d' % i
 28 |             landmark.collide = False
 29 |             landmark.movable = False
 30 |             landmark.size = 0.08
 31 |         # make initial conditions
 32 |         self.reset_world(world)
 33 |         return world
 34 | 
 35 |     def reset_world(self, world):
 36 |         # random properties for agents
 37 |         world.agents[0].color = np.array([0.85, 0.35, 0.35])
 38 |         for i in range(1, world.num_agents):
 39 |             world.agents[i].color = np.array([0.35, 0.35, 0.85])
 40 |         # random properties for landmarks
 41 |         for i, landmark in enumerate(world.landmarks):
 42 |             landmark.color = np.array([0.15, 0.15, 0.15])
 43 |         # set goal landmark
 44 |         goal = np.random.choice(world.landmarks)
 45 |         goal.color = np.array([0.15, 0.65, 0.15])
 46 |         for agent in world.agents:
 47 |             agent.goal_a = goal
 48 |         # set random initial states
 49 |         for agent in world.agents:
 50 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 51 |             agent.state.p_vel = np.zeros(world.dim_p)
 52 |             agent.state.c = np.zeros(world.dim_c)
 53 |         for i, landmark in enumerate(world.landmarks):
 54 |             landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 55 |             landmark.state.p_vel = np.zeros(world.dim_p)
 56 | 
 57 |     def benchmark_data(self, agent, world):
 58 |         # returns data for benchmarking purposes
 59 |         if agent.adversary:
 60 |             return np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
 61 |         else:
 62 |             dists = []
 63 |             for l in world.landmarks:
 64 |                 dists.append(np.sum(np.square(agent.state.p_pos - l.state.p_pos)))
 65 |             dists.append(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
 66 |             return tuple(dists)
 67 | 
 68 |     # return all agents that are not adversaries
 69 |     def good_agents(self, world):
 70 |         return [agent for agent in world.agents if not agent.adversary]
 71 | 
 72 |     # return all adversarial agents
 73 |     def adversaries(self, world):
 74 |         return [agent for agent in world.agents if agent.adversary]
 75 | 
 76 |     def reward(self, agent, world):
 77 |         # Agents are rewarded based on minimum agent distance to each landmark
 78 |         return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
 79 | 
 80 |     def agent_reward(self, agent, world):
 81 |         # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
 82 |         shaped_reward = True
 83 |         shaped_adv_reward = True
 84 | 
 85 |         # Calculate negative reward for adversary
 86 |         adversary_agents = self.adversaries(world)
 87 |         if shaped_adv_reward:  # distance-based adversary reward
 88 |             adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
 89 |         else:  # proximity-based adversary reward (binary)
 90 |             adv_rew = 0
 91 |             for a in adversary_agents:
 92 |                 if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
 93 |                     adv_rew -= 5
 94 | 
 95 |         # Calculate positive reward for agents
 96 |         good_agents = self.good_agents(world)
 97 |         if shaped_reward:  # distance-based agent reward
 98 |             pos_rew = -min(
 99 |                 [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
100 |         else:  # proximity-based agent reward (binary)
101 |             pos_rew = 0
102 |             if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \
103 |                     < 2 * agent.goal_a.size:
104 |                 pos_rew += 5
105 |             pos_rew -= min(
106 |                 [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
107 |         return pos_rew + adv_rew
108 | 
109 |     def adversary_reward(self, agent, world):
110 |         # Rewarded based on proximity to the goal landmark
111 |         shaped_reward = True
112 |         if shaped_reward:  # distance-based reward
113 |             return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
114 |         else:  # proximity-based reward (binary)
115 |             adv_rew = 0
116 |             if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size:
117 |                 adv_rew += 5
118 |             return adv_rew
119 | 
120 | 
121 |     def observation(self, agent, world):
122 |         # get positions of all entities in this agent's reference frame
123 |         entity_pos = []
124 |         for entity in world.landmarks:
125 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
126 |         # entity colors
127 |         entity_color = []
128 |         for entity in world.landmarks:
129 |             entity_color.append(entity.color)
130 |         # communication of all other agents
131 |         other_pos = []
132 |         for other in world.agents:
133 |             if other is agent: continue
134 |             other_pos.append(other.state.p_pos - agent.state.p_pos)
135 | 
136 |         if not agent.adversary:
137 |             return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos)
138 |         else:
139 |             return np.concatenate(entity_pos + other_pos)
140 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_crypto.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Scenario:
  3 | 1 speaker, 2 listeners (one of which is an adversary). Good agents rewarded for proximity to goal, and distance from
  4 | adversary to goal. Adversary is rewarded for its distance to the goal.
  5 | """
  6 | 
  7 | 
  8 | import numpy as np
  9 | from multiagent.core import World, Agent, Landmark
 10 | from multiagent.scenario import BaseScenario
 11 | import random
 12 | 
 13 | 
 14 | class CryptoAgent(Agent):
 15 |     def __init__(self):
 16 |         super(CryptoAgent, self).__init__()
 17 |         self.key = None
 18 | 
 19 | class Scenario(BaseScenario):
 20 | 
 21 |     def make_world(self):
 22 |         world = World()
 23 |         # set any world properties first
 24 |         num_agents = 3
 25 |         num_adversaries = 1
 26 |         num_landmarks = 2
 27 |         world.dim_c = 4
 28 |         # add agents
 29 |         world.agents = [CryptoAgent() for i in range(num_agents)]
 30 |         for i, agent in enumerate(world.agents):
 31 |             agent.name = 'agent %d' % i
 32 |             agent.collide = False
 33 |             agent.adversary = True if i < num_adversaries else False
 34 |             agent.speaker = True if i == 2 else False
 35 |             agent.movable = False
 36 |         # add landmarks
 37 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
 38 |         for i, landmark in enumerate(world.landmarks):
 39 |             landmark.name = 'landmark %d' % i
 40 |             landmark.collide = False
 41 |             landmark.movable = False
 42 |         # make initial conditions
 43 |         self.reset_world(world)
 44 |         return world
 45 | 
 46 | 
 47 |     def reset_world(self, world):
 48 |         # random properties for agents
 49 |         for i, agent in enumerate(world.agents):
 50 |             agent.color = np.array([0.25, 0.25, 0.25])
 51 |             if agent.adversary:
 52 |                 agent.color = np.array([0.75, 0.25, 0.25])
 53 |             agent.key = None
 54 |         # random properties for landmarks
 55 |         color_list = [np.zeros(world.dim_c) for i in world.landmarks]
 56 |         for i, color in enumerate(color_list):
 57 |             color[i] += 1
 58 |         for color, landmark in zip(color_list, world.landmarks):
 59 |             landmark.color = color
 60 |         # set goal landmark
 61 |         goal = np.random.choice(world.landmarks)
 62 |         world.agents[1].color = goal.color
 63 |         world.agents[2].key = np.random.choice(world.landmarks).color
 64 | 
 65 |         for agent in world.agents:
 66 |             agent.goal_a = goal
 67 | 
 68 |         # set random initial states
 69 |         for agent in world.agents:
 70 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 71 |             agent.state.p_vel = np.zeros(world.dim_p)
 72 |             agent.state.c = np.zeros(world.dim_c)
 73 |         for i, landmark in enumerate(world.landmarks):
 74 |             landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 75 |             landmark.state.p_vel = np.zeros(world.dim_p)
 76 | 
 77 | 
 78 |     def benchmark_data(self, agent, world):
 79 |         # returns data for benchmarking purposes
 80 |         return (agent.state.c, agent.goal_a.color)
 81 | 
 82 |     # return all agents that are not adversaries
 83 |     def good_listeners(self, world):
 84 |         return [agent for agent in world.agents if not agent.adversary and not agent.speaker]
 85 | 
 86 |     # return all agents that are not adversaries
 87 |     def good_agents(self, world):
 88 |         return [agent for agent in world.agents if not agent.adversary]
 89 | 
 90 |     # return all adversarial agents
 91 |     def adversaries(self, world):
 92 |         return [agent for agent in world.agents if agent.adversary]
 93 | 
 94 |     def reward(self, agent, world):
 95 |         return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
 96 | 
 97 |     def agent_reward(self, agent, world):
 98 |         # Agents rewarded if Bob can reconstruct message, but adversary (Eve) cannot
 99 |         good_listeners = self.good_listeners(world)
100 |         adversaries = self.adversaries(world)
101 |         good_rew = 0
102 |         adv_rew = 0
103 |         for a in good_listeners:
104 |             if (a.state.c == np.zeros(world.dim_c)).all():
105 |                 continue
106 |             else:
107 |                 good_rew -= np.sum(np.square(a.state.c - agent.goal_a.color))
108 |         for a in adversaries:
109 |             if (a.state.c == np.zeros(world.dim_c)).all():
110 |                 continue
111 |             else:
112 |                 adv_l1 = np.sum(np.square(a.state.c - agent.goal_a.color))
113 |                 adv_rew += adv_l1
114 |         return adv_rew + good_rew
115 | 
116 |     def adversary_reward(self, agent, world):
117 |         # Adversary (Eve) is rewarded if it can reconstruct original goal
118 |         rew = 0
119 |         if not (agent.state.c == np.zeros(world.dim_c)).all():
120 |             rew -= np.sum(np.square(agent.state.c - agent.goal_a.color))
121 |         return rew
122 | 
123 | 
124 |     def observation(self, agent, world):
125 |         # goal color
126 |         goal_color = np.zeros(world.dim_color)
127 |         if agent.goal_a is not None:
128 |             goal_color = agent.goal_a.color
129 | 
130 |         # get positions of all entities in this agent's reference frame
131 |         entity_pos = []
132 |         for entity in world.landmarks:
133 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
134 |         # communication of all other agents
135 |         comm = []
136 |         for other in world.agents:
137 |             if other is agent or (other.state.c is None) or not other.speaker: continue
138 |             comm.append(other.state.c)
139 | 
140 |         confer = np.array([0])
141 | 
142 |         if world.agents[2].key is None:
143 |             confer = np.array([1])
144 |             key = np.zeros(world.dim_c)
145 |             goal_color = np.zeros(world.dim_c)
146 |         else:
147 |             key = world.agents[2].key
148 | 
149 |         prnt = False
150 |         # speaker
151 |         if agent.speaker:
152 |             if prnt:
153 |                 print('speaker')
154 |                 print(agent.state.c)
155 |                 print(np.concatenate([goal_color] + [key] + [confer] + [np.random.randn(1)]))
156 |             return np.concatenate([goal_color] + [key])
157 |         # listener
158 |         if not agent.speaker and not agent.adversary:
159 |             if prnt:
160 |                 print('listener')
161 |                 print(agent.state.c)
162 |                 print(np.concatenate([key] + comm + [confer]))
163 |             return np.concatenate([key] + comm)
164 |         if not agent.speaker and agent.adversary:
165 |             if prnt:
166 |                 print('adversary')
167 |                 print(agent.state.c)
168 |                 print(np.concatenate(comm + [confer]))
169 |             return np.concatenate(comm)
170 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_push.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from multiagent.core import World, Agent, Landmark
 3 | from multiagent.scenario import BaseScenario
 4 | 
 5 | class Scenario(BaseScenario):
 6 |     def make_world(self):
 7 |         world = World()
 8 |         # set any world properties first
 9 |         world.dim_c = 2
10 |         num_agents = 2
11 |         num_adversaries = 1
12 |         num_landmarks = 2
13 |         # add agents
14 |         world.agents = [Agent() for i in range(num_agents)]
15 |         for i, agent in enumerate(world.agents):
16 |             agent.name = 'agent %d' % i
17 |             agent.collide = True
18 |             agent.silent = True
19 |             if i < num_adversaries:
20 |                 agent.adversary = True
21 |             else:
22 |                 agent.adversary = False
23 |         # add landmarks
24 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
25 |         for i, landmark in enumerate(world.landmarks):
26 |             landmark.name = 'landmark %d' % i
27 |             landmark.collide = False
28 |             landmark.movable = False
29 |         # make initial conditions
30 |         self.reset_world(world)
31 |         return world
32 | 
33 |     def reset_world(self, world):
34 |         # random properties for landmarks
35 |         for i, landmark in enumerate(world.landmarks):
36 |             landmark.color = np.array([0.1, 0.1, 0.1])
37 |             landmark.color[i + 1] += 0.8
38 |             landmark.index = i
39 |         # set goal landmark
40 |         goal = np.random.choice(world.landmarks)
41 |         for i, agent in enumerate(world.agents):
42 |             agent.goal_a = goal
43 |             agent.color = np.array([0.25, 0.25, 0.25])
44 |             if agent.adversary:
45 |                 agent.color = np.array([0.75, 0.25, 0.25])
46 |             else:
47 |                 j = goal.index
48 |                 agent.color[j + 1] += 0.5
49 |         # set random initial states
50 |         for agent in world.agents:
51 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
52 |             agent.state.p_vel = np.zeros(world.dim_p)
53 |             agent.state.c = np.zeros(world.dim_c)
54 |         for i, landmark in enumerate(world.landmarks):
55 |             landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
56 |             landmark.state.p_vel = np.zeros(world.dim_p)
57 | 
58 |     def reward(self, agent, world):
59 |         # Agents are rewarded based on minimum agent distance to each landmark
60 |         return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
61 | 
62 |     def agent_reward(self, agent, world):
63 |         # the distance to the goal
64 |         return -np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
65 | 
66 |     def adversary_reward(self, agent, world):
67 |         # keep the nearest good agents away from the goal
68 |         agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary]
69 |         pos_rew = min(agent_dist)
70 |         #nearest_agent = world.good_agents[np.argmin(agent_dist)]
71 |         #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos)))
72 |         neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos)))
73 |         #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents])
74 |         return pos_rew - neg_rew
75 |                
76 |     def observation(self, agent, world):
77 |         # get positions of all entities in this agent's reference frame
78 |         entity_pos = []
79 |         for entity in world.landmarks:  # world.entities:
80 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
81 |         # entity colors
82 |         entity_color = []
83 |         for entity in world.landmarks:  # world.entities:
84 |             entity_color.append(entity.color)
85 |         # communication of all other agents
86 |         comm = []
87 |         other_pos = []
88 |         for other in world.agents:
89 |             if other is agent: continue
90 |             comm.append(other.state.c)
91 |             other_pos.append(other.state.p_pos - agent.state.p_pos)
92 |         if not agent.adversary:
93 |             return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos)
94 |         else:
95 |             #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos  # randomize position of other agents in adversary network
96 |             return np.concatenate([agent.state.p_vel] + entity_pos + other_pos)
97 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_reference.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from multiagent.core import World, Agent, Landmark
 3 | from multiagent.scenario import BaseScenario
 4 | 
 5 | class Scenario(BaseScenario):
 6 |     def make_world(self):
 7 |         world = World()
 8 |         # set any world properties first
 9 |         world.dim_c = 10
10 |         world.collaborative = True  # whether agents share rewards
11 |         # add agents
12 |         world.agents = [Agent() for i in range(2)]
13 |         for i, agent in enumerate(world.agents):
14 |             agent.name = 'agent %d' % i
15 |             agent.collide = False
16 |         # add landmarks
17 |         world.landmarks = [Landmark() for i in range(3)]
18 |         for i, landmark in enumerate(world.landmarks):
19 |             landmark.name = 'landmark %d' % i
20 |             landmark.collide = False
21 |             landmark.movable = False
22 |         # make initial conditions
23 |         self.reset_world(world)
24 |         return world
25 | 
26 |     def reset_world(self, world):
27 |         # assign goals to agents
28 |         for agent in world.agents:
29 |             agent.goal_a = None
30 |             agent.goal_b = None
31 |         # want other agent to go to the goal landmark
32 |         world.agents[0].goal_a = world.agents[1]
33 |         world.agents[0].goal_b = np.random.choice(world.landmarks)
34 |         world.agents[1].goal_a = world.agents[0]
35 |         world.agents[1].goal_b = np.random.choice(world.landmarks)
36 |         # random properties for agents
37 |         for i, agent in enumerate(world.agents):
38 |             agent.color = np.array([0.25,0.25,0.25])               
39 |         # random properties for landmarks
40 |         world.landmarks[0].color = np.array([0.75,0.25,0.25]) 
41 |         world.landmarks[1].color = np.array([0.25,0.75,0.25]) 
42 |         world.landmarks[2].color = np.array([0.25,0.25,0.75]) 
43 |         # special colors for goals
44 |         world.agents[0].goal_a.color = world.agents[0].goal_b.color                
45 |         world.agents[1].goal_a.color = world.agents[1].goal_b.color                               
46 |         # set random initial states
47 |         for agent in world.agents:
48 |             agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
49 |             agent.state.p_vel = np.zeros(world.dim_p)
50 |             agent.state.c = np.zeros(world.dim_c)
51 |         for i, landmark in enumerate(world.landmarks):
52 |             landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
53 |             landmark.state.p_vel = np.zeros(world.dim_p)
54 | 
55 |     def reward(self, agent, world):
56 |         if agent.goal_a is None or agent.goal_b is None:
57 |             return 0.0
58 |         dist2 = np.sum(np.square(agent.goal_a.state.p_pos - agent.goal_b.state.p_pos))
59 |         return -dist2
60 | 
61 |     def observation(self, agent, world):
62 |         # goal color
63 |         goal_color = [np.zeros(world.dim_color), np.zeros(world.dim_color)]
64 |         if agent.goal_b is not None:
65 |             goal_color[1] = agent.goal_b.color 
66 | 
67 |         # get positions of all entities in this agent's reference frame
68 |         entity_pos = []
69 |         for entity in world.landmarks:
70 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
71 |         # entity colors
72 |         entity_color = []
73 |         for entity in world.landmarks:
74 |             entity_color.append(entity.color)
75 |         # communication of all other agents
76 |         comm = []
77 |         for other in world.agents:
78 |             if other is agent: continue
79 |             comm.append(other.state.c)
80 |         return np.concatenate([agent.state.p_vel] + entity_pos + [goal_color[1]] + comm)
81 |             


--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_speaker_listener.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from multiagent.core import World, Agent, Landmark
 3 | from multiagent.scenario import BaseScenario
 4 | 
 5 | class Scenario(BaseScenario):
 6 |     def make_world(self):
 7 |         world = World()
 8 |         # set any world properties first
 9 |         world.dim_c = 3
10 |         num_landmarks = 3
11 |         world.collaborative = True
12 |         # add agents
13 |         world.agents = [Agent() for i in range(2)]
14 |         for i, agent in enumerate(world.agents):
15 |             agent.name = 'agent %d' % i
16 |             agent.collide = False
17 |             agent.size = 0.075
18 |         # speaker
19 |         world.agents[0].movable = False
20 |         # listener
21 |         world.agents[1].silent = True
22 |         # add landmarks
23 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
24 |         for i, landmark in enumerate(world.landmarks):
25 |             landmark.name = 'landmark %d' % i
26 |             landmark.collide = False
27 |             landmark.movable = False
28 |             landmark.size = 0.04
29 |         # make initial conditions
30 |         self.reset_world(world)
31 |         return world
32 | 
33 |     def reset_world(self, world):
34 |         # assign goals to agents
35 |         for agent in world.agents:
36 |             agent.goal_a = None
37 |             agent.goal_b = None
38 |         # want listener to go to the goal landmark
39 |         world.agents[0].goal_a = world.agents[1]
40 |         world.agents[0].goal_b = np.random.choice(world.landmarks)
41 |         # random properties for agents
42 |         for i, agent in enumerate(world.agents):
43 |             agent.color = np.array([0.25,0.25,0.25])               
44 |         # random properties for landmarks
45 |         world.landmarks[0].color = np.array([0.65,0.15,0.15])
46 |         world.landmarks[1].color = np.array([0.15,0.65,0.15])
47 |         world.landmarks[2].color = np.array([0.15,0.15,0.65])
48 |         # special colors for goals
49 |         world.agents[0].goal_a.color = world.agents[0].goal_b.color + np.array([0.45, 0.45, 0.45])
50 |         # set random initial states
51 |         for agent in world.agents:
52 |             agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
53 |             agent.state.p_vel = np.zeros(world.dim_p)
54 |             agent.state.c = np.zeros(world.dim_c)
55 |         for i, landmark in enumerate(world.landmarks):
56 |             landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
57 |             landmark.state.p_vel = np.zeros(world.dim_p)
58 | 
59 |     def benchmark_data(self, agent, world):
60 |         # returns data for benchmarking purposes
61 |         return self.reward(agent, reward)
62 | 
63 |     def reward(self, agent, world):
64 |         # squared distance from listener to landmark
65 |         a = world.agents[0]
66 |         dist2 = np.sum(np.square(a.goal_a.state.p_pos - a.goal_b.state.p_pos))
67 |         return -dist2
68 | 
69 |     def observation(self, agent, world):
70 |         # goal color
71 |         goal_color = np.zeros(world.dim_color)
72 |         if agent.goal_b is not None:
73 |             goal_color = agent.goal_b.color
74 | 
75 |         # get positions of all entities in this agent's reference frame
76 |         entity_pos = []
77 |         for entity in world.landmarks:
78 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
79 | 
80 |         # communication of all other agents
81 |         comm = []
82 |         for other in world.agents:
83 |             if other is agent or (other.state.c is None): continue
84 |             comm.append(other.state.c)
85 |         
86 |         # speaker
87 |         if not agent.movable:
88 |             return np.concatenate([goal_color])
89 |         # listener
90 |         if agent.silent:
91 |             return np.concatenate([agent.state.p_vel] + entity_pos + comm)
92 |             
93 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_spread.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from multiagent.core import World, Agent, Landmark
  3 | from multiagent.scenario import BaseScenario
  4 | 
  5 | 
  6 | class Scenario(BaseScenario):
  7 |     def make_world(self):
  8 |         world = World()
  9 |         # set any world properties first
 10 |         world.dim_c = 2
 11 |         num_agents = 3
 12 |         num_landmarks = 3
 13 |         world.collaborative = True
 14 |         # add agents
 15 |         world.agents = [Agent() for i in range(num_agents)]
 16 |         for i, agent in enumerate(world.agents):
 17 |             agent.name = 'agent %d' % i
 18 |             agent.collide = True
 19 |             agent.silent = True
 20 |             agent.size = 0.15
 21 |         # add landmarks
 22 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
 23 |         for i, landmark in enumerate(world.landmarks):
 24 |             landmark.name = 'landmark %d' % i
 25 |             landmark.collide = False
 26 |             landmark.movable = False
 27 |         # make initial conditions
 28 |         self.reset_world(world)
 29 |         return world
 30 | 
 31 |     def reset_world(self, world):
 32 |         # random properties for agents
 33 |         for i, agent in enumerate(world.agents):
 34 |             agent.color = np.array([0.35, 0.35, 0.85])
 35 |         # random properties for landmarks
 36 |         for i, landmark in enumerate(world.landmarks):
 37 |             landmark.color = np.array([0.25, 0.25, 0.25])
 38 |         # set random initial states
 39 |         for agent in world.agents:
 40 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 41 |             agent.state.p_vel = np.zeros(world.dim_p)
 42 |             agent.state.c = np.zeros(world.dim_c)
 43 |         for i, landmark in enumerate(world.landmarks):
 44 |             landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 45 |             landmark.state.p_vel = np.zeros(world.dim_p)
 46 | 
 47 |     def benchmark_data(self, agent, world):
 48 |         rew = 0
 49 |         collisions = 0
 50 |         occupied_landmarks = 0
 51 |         min_dists = 0
 52 |         for l in world.landmarks:
 53 |             dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
 54 |             min_dists += min(dists)
 55 |             rew -= min(dists)
 56 |             if min(dists) < 0.1:
 57 |                 occupied_landmarks += 1
 58 |         if agent.collide:
 59 |             for a in world.agents:
 60 |                 if self.is_collision(a, agent):
 61 |                     rew -= 1
 62 |                     collisions += 1
 63 |         return (rew, collisions, min_dists, occupied_landmarks)
 64 | 
 65 | 
 66 |     def is_collision(self, agent1, agent2):
 67 |         delta_pos = agent1.state.p_pos - agent2.state.p_pos
 68 |         dist = np.sqrt(np.sum(np.square(delta_pos)))
 69 |         dist_min = agent1.size + agent2.size
 70 |         return True if dist < dist_min else False
 71 | 
 72 |     def reward(self, agent, world):
 73 |         # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
 74 |         rew = 0
 75 |         for l in world.landmarks:
 76 |             dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
 77 |             rew -= min(dists)
 78 |         if agent.collide:
 79 |             for a in world.agents:
 80 |                 if self.is_collision(a, agent):
 81 |                     rew -= 1
 82 |         return rew
 83 | 
 84 |     def observation(self, agent, world):
 85 |         # get positions of all entities in this agent's reference frame
 86 |         entity_pos = []
 87 |         for entity in world.landmarks:  # world.entities:
 88 |             entity_pos.append(entity.state.p_pos - agent.state.p_pos)
 89 |         # entity colors
 90 |         entity_color = []
 91 |         for entity in world.landmarks:  # world.entities:
 92 |             entity_color.append(entity.color)
 93 |         # communication of all other agents
 94 |         comm = []
 95 |         other_pos = []
 96 |         for other in world.agents:
 97 |             if other is agent: continue
 98 |             comm.append(other.state.c)
 99 |             other_pos.append(other.state.p_pos - agent.state.p_pos)
100 |         return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm)
101 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_tag.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from multiagent.core import World, Agent, Landmark
  3 | from multiagent.scenario import BaseScenario
  4 | 
  5 | 
  6 | class Scenario(BaseScenario):
  7 |     def make_world(self):
  8 |         world = World()
  9 |         # set any world properties first
 10 |         world.dim_c = 2
 11 |         num_good_agents = 1
 12 |         num_adversaries = 3
 13 |         num_agents = num_adversaries + num_good_agents
 14 |         num_landmarks = 2
 15 |         # add agents
 16 |         world.agents = [Agent() for i in range(num_agents)]
 17 |         for i, agent in enumerate(world.agents):
 18 |             agent.name = 'agent %d' % i
 19 |             agent.collide = True
 20 |             agent.silent = True
 21 |             agent.adversary = True if i < num_adversaries else False
 22 |             agent.size = 0.075 if agent.adversary else 0.05
 23 |             agent.accel = 3.0 if agent.adversary else 4.0
 24 |             #agent.accel = 20.0 if agent.adversary else 25.0
 25 |             agent.max_speed = 1.0 if agent.adversary else 1.3
 26 |         # add landmarks
 27 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
 28 |         for i, landmark in enumerate(world.landmarks):
 29 |             landmark.name = 'landmark %d' % i
 30 |             landmark.collide = True
 31 |             landmark.movable = False
 32 |             landmark.size = 0.2
 33 |             landmark.boundary = False
 34 |         # make initial conditions
 35 |         self.reset_world(world)
 36 |         return world
 37 | 
 38 | 
 39 |     def reset_world(self, world):
 40 |         # random properties for agents
 41 |         for i, agent in enumerate(world.agents):
 42 |             agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35])
 43 |             # random properties for landmarks
 44 |         for i, landmark in enumerate(world.landmarks):
 45 |             landmark.color = np.array([0.25, 0.25, 0.25])
 46 |         # set random initial states
 47 |         for agent in world.agents:
 48 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 49 |             agent.state.p_vel = np.zeros(world.dim_p)
 50 |             agent.state.c = np.zeros(world.dim_c)
 51 |         for i, landmark in enumerate(world.landmarks):
 52 |             if not landmark.boundary:
 53 |                 landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
 54 |                 landmark.state.p_vel = np.zeros(world.dim_p)
 55 | 
 56 | 
 57 |     def benchmark_data(self, agent, world):
 58 |         # returns data for benchmarking purposes
 59 |         if agent.adversary:
 60 |             collisions = 0
 61 |             for a in self.good_agents(world):
 62 |                 if self.is_collision(a, agent):
 63 |                     collisions += 1
 64 |             return collisions
 65 |         else:
 66 |             return 0
 67 | 
 68 | 
 69 |     def is_collision(self, agent1, agent2):
 70 |         delta_pos = agent1.state.p_pos - agent2.state.p_pos
 71 |         dist = np.sqrt(np.sum(np.square(delta_pos)))
 72 |         dist_min = agent1.size + agent2.size
 73 |         return True if dist < dist_min else False
 74 | 
 75 |     # return all agents that are not adversaries
 76 |     def good_agents(self, world):
 77 |         return [agent for agent in world.agents if not agent.adversary]
 78 | 
 79 |     # return all adversarial agents
 80 |     def adversaries(self, world):
 81 |         return [agent for agent in world.agents if agent.adversary]
 82 | 
 83 | 
 84 |     def reward(self, agent, world):
 85 |         # Agents are rewarded based on minimum agent distance to each landmark
 86 |         main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
 87 |         return main_reward
 88 | 
 89 |     def agent_reward(self, agent, world):
 90 |         # Agents are negatively rewarded if caught by adversaries
 91 |         rew = 0
 92 |         shape = False
 93 |         adversaries = self.adversaries(world)
 94 |         if shape:  # reward can optionally be shaped (increased reward for increased distance from adversary)
 95 |             for adv in adversaries:
 96 |                 rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
 97 |         if agent.collide:
 98 |             for a in adversaries:
 99 |                 if self.is_collision(a, agent):
100 |                     rew -= 10
101 | 
102 |         # agents are penalized for exiting the screen, so that they can be caught by the adversaries
103 |         def bound(x):
104 |             if x < 0.9:
105 |                 return 0
106 |             if x < 1.0:
107 |                 return (x - 0.9) * 10
108 |             return min(np.exp(2 * x - 2), 10)
109 |         for p in range(world.dim_p):
110 |             x = abs(agent.state.p_pos[p])
111 |             rew -= bound(x)
112 | 
113 |         return rew
114 | 
115 |     def adversary_reward(self, agent, world):
116 |         # Adversaries are rewarded for collisions with agents
117 |         rew = 0
118 |         shape = False
119 |         agents = self.good_agents(world)
120 |         adversaries = self.adversaries(world)
121 |         if shape:  # reward can optionally be shaped (decreased reward for increased distance from agents)
122 |             for adv in adversaries:
123 |                 rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents])
124 |         if agent.collide:
125 |             for ag in agents:
126 |                 for adv in adversaries:
127 |                     if self.is_collision(ag, adv):
128 |                         rew += 10
129 |         return rew
130 | 
131 |     def observation(self, agent, world):
132 |         # get positions of all entities in this agent's reference frame
133 |         entity_pos = []
134 |         for entity in world.landmarks:
135 |             if not entity.boundary:
136 |                 entity_pos.append(entity.state.p_pos - agent.state.p_pos)
137 |         # communication of all other agents
138 |         comm = []
139 |         other_pos = []
140 |         other_vel = []
141 |         for other in world.agents:
142 |             if other is agent: continue
143 |             comm.append(other.state.c)
144 |             other_pos.append(other.state.p_pos - agent.state.p_pos)
145 |             if not other.adversary:
146 |                 other_vel.append(other.state.p_vel)
147 |         return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
148 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_tag_v1.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from multiagent.core import World, Agent, Landmark
  3 | from multiagent.scenario import BaseScenario
  4 | 
  5 | 
  6 | class Scenario(BaseScenario):
  7 |     def make_world(self):
  8 |         world = World()
  9 |         # set any world properties first
 10 |         world.dim_c = 2
 11 |         num_good_agents = 1
 12 |         num_adversaries = 2
 13 |         num_agents = num_adversaries + num_good_agents
 14 |         num_landmarks = 0
 15 |         # add agents
 16 |         world.agents = [Agent() for i in range(num_agents)]
 17 |         for i, agent in enumerate(world.agents):
 18 |             agent.name = 'agent %d' % i
 19 |             agent.collide = True
 20 |             agent.silent = True
 21 |             agent.adversary = True if i < num_adversaries else False
 22 |             agent.size = 0.075 if agent.adversary else 0.05
 23 |             agent.accel = 3.0 if agent.adversary else 4.0
 24 |             #agent.accel = 20.0 if agent.adversary else 25.0
 25 |             agent.max_speed = 1.0 if agent.adversary else 1.3
 26 |         # add landmarks
 27 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
 28 |         for i, landmark in enumerate(world.landmarks):
 29 |             landmark.name = 'landmark %d' % i
 30 |             landmark.collide = True
 31 |             landmark.movable = False
 32 |             landmark.size = 0.2
 33 |             landmark.boundary = False
 34 |         # make initial conditions
 35 |         self.reset_world(world)
 36 |         return world
 37 | 
 38 | 
 39 |     def reset_world(self, world):
 40 |         # random properties for agents
 41 |         for i, agent in enumerate(world.agents):
 42 |             agent.color = np.array([0.35, 0.85, 0.35]) if not agent.adversary else np.array([0.85, 0.35, 0.35])
 43 |             # random properties for landmarks
 44 |         for i, landmark in enumerate(world.landmarks):
 45 |             landmark.color = np.array([0.25, 0.25, 0.25])
 46 |         # set random initial states
 47 |         for agent in world.agents:
 48 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
 49 |             agent.state.p_vel = np.zeros(world.dim_p)
 50 |             agent.state.c = np.zeros(world.dim_c)
 51 |         for i, landmark in enumerate(world.landmarks):
 52 |             if not landmark.boundary:
 53 |                 landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
 54 |                 landmark.state.p_vel = np.zeros(world.dim_p)
 55 | 
 56 | 
 57 |     def benchmark_data(self, agent, world):
 58 |         # returns data for benchmarking purposes
 59 |         if agent.adversary:
 60 |             collisions = 0
 61 |             for a in self.good_agents(world):
 62 |                 if self.is_collision(a, agent):
 63 |                     collisions += 1
 64 |             return collisions
 65 |         else:
 66 |             return 0
 67 | 
 68 | 
 69 |     def is_collision(self, agent1, agent2):
 70 |         delta_pos = agent1.state.p_pos - agent2.state.p_pos
 71 |         dist = np.sqrt(np.sum(np.square(delta_pos)))
 72 |         dist_min = agent1.size + agent2.size
 73 |         return True if dist < dist_min else False
 74 | 
 75 |     # return all agents that are not adversaries
 76 |     def good_agents(self, world):
 77 |         return [agent for agent in world.agents if not agent.adversary]
 78 | 
 79 |     # return all adversarial agents
 80 |     def adversaries(self, world):
 81 |         return [agent for agent in world.agents if agent.adversary]
 82 | 
 83 | 
 84 |     def reward(self, agent, world):
 85 |         # Agents are rewarded based on minimum agent distance to each landmark
 86 |         main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
 87 |         return main_reward
 88 | 
 89 |     def agent_reward(self, agent, world):
 90 |         # Agents are negatively rewarded if caught by adversaries
 91 |         rew = 0
 92 |         #shape = False
 93 |         shape = True
 94 |         adversaries = self.adversaries(world)
 95 |         if shape:  # reward can optionally be shaped (increased reward for increased distance from adversary)
 96 |             for adv in adversaries:
 97 |                 rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
 98 |         if agent.collide:
 99 |             for a in adversaries:
100 |                 if self.is_collision(a, agent):
101 |                     rew -= 10
102 | 
103 |         # agents are penalized for exiting the screen, so that they can be caught by the adversaries
104 |         def bound(x):
105 |             if x < 0.9:
106 |                 return 0
107 |             if x < 1.0:
108 |                 return (x - 0.9) * 10
109 |             return min(np.exp(2 * x - 2), 10)
110 |         for p in range(world.dim_p):
111 |             x = abs(agent.state.p_pos[p])
112 |             rew -= bound(x)
113 | 
114 |         return rew
115 | 
116 |     def adversary_reward(self, agent, world):
117 |         # Adversaries are rewarded for collisions with agents
118 |         rew = 0
119 |         #shape = False
120 |         shape = True
121 |         agents = self.good_agents(world)
122 |         adversaries = self.adversaries(world)
123 |         if shape:  # reward can optionally be shaped (decreased reward for increased distance from agents)
124 |             for adv in adversaries:
125 |                 rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - adv.state.p_pos))) for a in agents])
126 |         if agent.collide:
127 |             for ag in agents:
128 |                 for adv in adversaries:
129 |                     if self.is_collision(ag, adv):
130 |                         rew += 10
131 |         return rew
132 | 
133 |     def observation(self, agent, world):
134 |         # get positions of all entities in this agent's reference frame
135 |         entity_pos = []
136 |         for entity in world.landmarks:
137 |             if not entity.boundary:
138 |                 entity_pos.append(entity.state.p_pos - agent.state.p_pos)
139 |         # communication of all other agents
140 |         comm = []
141 |         other_pos = []
142 |         other_vel = []
143 |         for other in world.agents:
144 |             if other is agent: continue
145 |             comm.append(other.state.c)
146 |             other_pos.append(other.state.p_pos - agent.state.p_pos)
147 |             if not other.adversary:
148 |                 other_vel.append(other.state.p_vel)
149 |         return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
150 | 
151 |     def done(self, agent, world):
152 |         for p in range(world.dim_p):
153 |             x = abs(agent.state.p_pos[p])
154 |             if (x > 1.0):
155 |                 return True
156 | 
157 |         return False
158 | 
159 | 


--------------------------------------------------------------------------------
/MADQN/multiagent/scenarios/simple_world_comm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from multiagent.core import World, Agent, Landmark
  3 | from multiagent.scenario import BaseScenario
  4 | 
  5 | 
  6 | class Scenario(BaseScenario):
  7 |     def make_world(self):
  8 |         world = World()
  9 |         # set any world properties first
 10 |         world.dim_c = 4
 11 |         #world.damping = 1
 12 |         num_good_agents = 2
 13 |         num_adversaries = 4
 14 |         num_agents = num_adversaries + num_good_agents
 15 |         num_landmarks = 1
 16 |         num_food = 2
 17 |         num_forests = 2
 18 |         # add agents
 19 |         world.agents = [Agent() for i in range(num_agents)]
 20 |         for i, agent in enumerate(world.agents):
 21 |             agent.name = 'agent %d' % i
 22 |             agent.collide = True
 23 |             agent.leader = True if i == 0 else False
 24 |             agent.silent = True if i > 0 else False
 25 |             agent.adversary = True if i < num_adversaries else False
 26 |             agent.size = 0.075 if agent.adversary else 0.045
 27 |             agent.accel = 3.0 if agent.adversary else 4.0
 28 |             #agent.accel = 20.0 if agent.adversary else 25.0
 29 |             agent.max_speed = 1.0 if agent.adversary else 1.3
 30 |         # add landmarks
 31 |         world.landmarks = [Landmark() for i in range(num_landmarks)]
 32 |         for i, landmark in enumerate(world.landmarks):
 33 |             landmark.name = 'landmark %d' % i
 34 |             landmark.collide = True
 35 |             landmark.movable = False
 36 |             landmark.size = 0.2
 37 |             landmark.boundary = False
 38 |         world.food = [Landmark() for i in range(num_food)]
 39 |         for i, landmark in enumerate(world.food):
 40 |             landmark.name = 'food %d' % i
 41 |             landmark.collide = False
 42 |             landmark.movable = False
 43 |             landmark.size = 0.03
 44 |             landmark.boundary = False
 45 |         world.forests = [Landmark() for i in range(num_forests)]
 46 |         for i, landmark in enumerate(world.forests):
 47 |             landmark.name = 'forest %d' % i
 48 |             landmark.collide = False
 49 |             landmark.movable = False
 50 |             landmark.size = 0.3
 51 |             landmark.boundary = False
 52 |         world.landmarks += world.food
 53 |         world.landmarks += world.forests
 54 |         #world.landmarks += self.set_boundaries(world)  # world boundaries now penalized with negative reward
 55 |         # make initial conditions
 56 |         self.reset_world(world)
 57 |         return world
 58 | 
 59 |     def set_boundaries(self, world):
 60 |         boundary_list = []
 61 |         landmark_size = 1
 62 |         edge = 1 + landmark_size
 63 |         num_landmarks = int(edge * 2 / landmark_size)
 64 |         for x_pos in [-edge, edge]:
 65 |             for i in range(num_landmarks):
 66 |                 l = Landmark()
 67 |                 l.state.p_pos = np.array([x_pos, -1 + i * landmark_size])
 68 |                 boundary_list.append(l)
 69 | 
 70 |         for y_pos in [-edge, edge]:
 71 |             for i in range(num_landmarks):
 72 |                 l = Landmark()
 73 |                 l.state.p_pos = np.array([-1 + i * landmark_size, y_pos])
 74 |                 boundary_list.append(l)
 75 | 
 76 |         for i, l in enumerate(boundary_list):
 77 |             l.name = 'boundary %d' % i
 78 |             l.collide = True
 79 |             l.movable = False
 80 |             l.boundary = True
 81 |             l.color = np.array([0.75, 0.75, 0.75])
 82 |             l.size = landmark_size
 83 |             l.state.p_vel = np.zeros(world.dim_p)
 84 | 
 85 |         return boundary_list
 86 | 
 87 | 
 88 |     def reset_world(self, world):
 89 |         # random properties for agents
 90 |         for i, agent in enumerate(world.agents):
 91 |             agent.color = np.array([0.45, 0.95, 0.45]) if not agent.adversary else np.array([0.95, 0.45, 0.45])
 92 |             agent.color -= np.array([0.3, 0.3, 0.3]) if agent.leader else np.array([0, 0, 0])
 93 |             # random properties for landmarks
 94 |         for i, landmark in enumerate(world.landmarks):
 95 |             landmark.color = np.array([0.25, 0.25, 0.25])
 96 |         for i, landmark in enumerate(world.food):
 97 |             landmark.color = np.array([0.15, 0.15, 0.65])
 98 |         for i, landmark in enumerate(world.forests):
 99 |             landmark.color = np.array([0.6, 0.9, 0.6])
100 |         # set random initial states
101 |         for agent in world.agents:
102 |             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
103 |             agent.state.p_vel = np.zeros(world.dim_p)
104 |             agent.state.c = np.zeros(world.dim_c)
105 |         for i, landmark in enumerate(world.landmarks):
106 |             landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
107 |             landmark.state.p_vel = np.zeros(world.dim_p)
108 |         for i, landmark in enumerate(world.food):
109 |             landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
110 |             landmark.state.p_vel = np.zeros(world.dim_p)
111 |         for i, landmark in enumerate(world.forests):
112 |             landmark.state.p_pos = np.random.uniform(-0.9, +0.9, world.dim_p)
113 |             landmark.state.p_vel = np.zeros(world.dim_p)
114 | 
115 |     def benchmark_data(self, agent, world):
116 |         if agent.adversary:
117 |             collisions = 0
118 |             for a in self.good_agents(world):
119 |                 if self.is_collision(a, agent):
120 |                     collisions += 1
121 |             return collisions
122 |         else:
123 |             return 0
124 | 
125 | 
126 |     def is_collision(self, agent1, agent2):
127 |         delta_pos = agent1.state.p_pos - agent2.state.p_pos
128 |         dist = np.sqrt(np.sum(np.square(delta_pos)))
129 |         dist_min = agent1.size + agent2.size
130 |         return True if dist < dist_min else False
131 | 
132 | 
133 |     # return all agents that are not adversaries
134 |     def good_agents(self, world):
135 |         return [agent for agent in world.agents if not agent.adversary]
136 | 
137 |     # return all adversarial agents
138 |     def adversaries(self, world):
139 |         return [agent for agent in world.agents if agent.adversary]
140 | 
141 | 
142 |     def reward(self, agent, world):
143 |         # Agents are rewarded based on minimum agent distance to each landmark
144 |         #boundary_reward = -10 if self.outside_boundary(agent) else 0
145 |         main_reward = self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
146 |         return main_reward
147 | 
148 |     def outside_boundary(self, agent):
149 |         if agent.state.p_pos[0] > 1 or agent.state.p_pos[0] < -1 or agent.state.p_pos[1] > 1 or agent.state.p_pos[1] < -1:
150 |             return True
151 |         else:
152 |             return False
153 | 
154 | 
155 |     def agent_reward(self, agent, world):
156 |         # Agents are rewarded based on minimum agent distance to each landmark
157 |         rew = 0
158 |         shape = False
159 |         adversaries = self.adversaries(world)
160 |         if shape:
161 |             for adv in adversaries:
162 |                 rew += 0.1 * np.sqrt(np.sum(np.square(agent.state.p_pos - adv.state.p_pos)))
163 |         if agent.collide:
164 |             for a in adversaries:
165 |                 if self.is_collision(a, agent):
166 |                     rew -= 5
167 |         def bound(x):
168 |             if x < 0.9:
169 |                 return 0
170 |             if x < 1.0:
171 |                 return (x - 0.9) * 10
172 |             return min(np.exp(2 * x - 2), 10)  # 1 + (x - 1) * (x - 1)
173 | 
174 |         for p in range(world.dim_p):
175 |             x = abs(agent.state.p_pos[p])
176 |             rew -= 2 * bound(x)
177 | 
178 |         for food in world.food:
179 |             if self.is_collision(agent, food):
180 |                 rew += 2
181 |         rew += 0.05 * min([np.sqrt(np.sum(np.square(food.state.p_pos - agent.state.p_pos))) for food in world.food])
182 | 
183 |         return rew
184 | 
185 |     def adversary_reward(self, agent, world):
186 |         # Agents are rewarded based on minimum agent distance to each landmark
187 |         rew = 0
188 |         shape = True
189 |         agents = self.good_agents(world)
190 |         adversaries = self.adversaries(world)
191 |         if shape:
192 |             rew -= 0.1 * min([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in agents])
193 |         if agent.collide:
194 |             for ag in agents:
195 |                 for adv in adversaries:
196 |                     if self.is_collision(ag, adv):
197 |                         rew += 5
198 |         return rew
199 | 
200 | 
201 |     def observation2(self, agent, world):
202 |         # get positions of all entities in this agent's reference frame
203 |         entity_pos = []
204 |         for entity in world.landmarks:
205 |             if not entity.boundary:
206 |                 entity_pos.append(entity.state.p_pos - agent.state.p_pos)
207 | 
208 |         food_pos = []
209 |         for entity in world.food:
210 |             if not entity.boundary:
211 |                 food_pos.append(entity.state.p_pos - agent.state.p_pos)
212 |         # communication of all other agents
213 |         comm = []
214 |         other_pos = []
215 |         other_vel = []
216 |         for other in world.agents:
217 |             if other is agent: continue
218 |             comm.append(other.state.c)
219 |             other_pos.append(other.state.p_pos - agent.state.p_pos)
220 |             if not other.adversary:
221 |                 other_vel.append(other.state.p_vel)
222 |         return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel)
223 | 
224 |     def observation(self, agent, world):
225 |         # get positions of all entities in this agent's reference frame
226 |         entity_pos = []
227 |         for entity in world.landmarks:
228 |             if not entity.boundary:
229 |                 entity_pos.append(entity.state.p_pos - agent.state.p_pos)
230 | 
231 |         in_forest = [np.array([-1]), np.array([-1])]
232 |         inf1 = False
233 |         inf2 = False
234 |         if self.is_collision(agent, world.forests[0]):
235 |             in_forest[0] = np.array([1])
236 |             inf1= True
237 |         if self.is_collision(agent, world.forests[1]):
238 |             in_forest[1] = np.array([1])
239 |             inf2 = True
240 | 
241 |         food_pos = []
242 |         for entity in world.food:
243 |             if not entity.boundary:
244 |                 food_pos.append(entity.state.p_pos - agent.state.p_pos)
245 |         # communication of all other agents
246 |         comm = []
247 |         other_pos = []
248 |         other_vel = []
249 |         for other in world.agents:
250 |             if other is agent: continue
251 |             comm.append(other.state.c)
252 |             oth_f1 = self.is_collision(other, world.forests[0])
253 |             oth_f2 = self.is_collision(other, world.forests[1])
254 |             if (inf1 and oth_f1) or (inf2 and oth_f2) or (not inf1 and not oth_f1 and not inf2 and not oth_f2) or agent.leader:  #without forest vis
255 |                 other_pos.append(other.state.p_pos - agent.state.p_pos)
256 |                 if not other.adversary:
257 |                     other_vel.append(other.state.p_vel)
258 |             else:
259 |                 other_pos.append([0, 0])
260 |                 if not other.adversary:
261 |                     other_vel.append([0, 0])
262 | 
263 |         # to tell the pred when the prey are in the forest
264 |         prey_forest = []
265 |         ga = self.good_agents(world)
266 |         for a in ga:
267 |             if any([self.is_collision(a, f) for f in world.forests]):
268 |                 prey_forest.append(np.array([1]))
269 |             else:
270 |                 prey_forest.append(np.array([-1]))
271 |         # to tell leader when pred are in forest
272 |         prey_forest_lead = []
273 |         for f in world.forests:
274 |             if any([self.is_collision(a, f) for a in ga]):
275 |                 prey_forest_lead.append(np.array([1]))
276 |             else:
277 |                 prey_forest_lead.append(np.array([-1]))
278 | 
279 |         comm = [world.agents[0].state.c]
280 | 
281 |         if agent.adversary and not agent.leader:
282 |             return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm)
283 |         if agent.leader:
284 |             return np.concatenate(
285 |                 [agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + other_vel + in_forest + comm)
286 |         else:
287 |             return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + in_forest + other_vel)
288 | 
289 | 
290 | 


--------------------------------------------------------------------------------
/MADQN/readme.txt:
--------------------------------------------------------------------------------
1 | In this project we test the MADQN.


--------------------------------------------------------------------------------
/MADQN/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(name='multiagent',
 4 |       version='0.0.1',
 5 |       description='Multi-Agent Goal-Driven Communication Environment',
 6 |       url='https://github.com/openai/multiagent-public',
 7 |       author='Igor Mordatch',
 8 |       author_email='mordatch@openai.com',
 9 |       packages=find_packages(),
10 |       include_package_data=True,
11 |       zip_safe=False,
12 |       install_requires=['gym', 'numpy-stl']
13 | )
14 | 


--------------------------------------------------------------------------------
/MADQN/test/results/dqn_1v2/save/run_parameters.json:
--------------------------------------------------------------------------------
1 | {"random_seed": 2, "render": false, "learning_rate": 0.001, "testing": false, "benchmark": false, "batch_size": 128, "epsilon_greedy": null, "csv_filename_prefix": "/save/statistics-dqn", "episodes": 60000, "weights_filename_prefix": "/save/tag-dqn", "memory_size": 10000, "env": "simple_tag_v1", "experiment_prefix": "./results/dqn_1v2/", "checkpoint_frequency": 500}


--------------------------------------------------------------------------------
/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_0.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_0.h5


--------------------------------------------------------------------------------
/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_1.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_1.h5


--------------------------------------------------------------------------------
/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_2.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/MADQN/test/results/dqn_1v2/save/tag-dqn_21500_2.h5


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Multiagent reinforcement learning algorithms for multiple-UAV confrontation
 2 | This is the source code of "Efficient training techniques for multi-agent reinforcement learning in combatant tasks", 
 3 | we construct a multi-agent confrontation environment originated from a combatant scenario of multiple unman aerial vehicles. 
 4 | To begin with, we consider to solve this confrontation problem with two types of MARL algorithms. 
 5 | One is extended from the classical deep Q-network for multi-agent settings (MADQN). 
 6 | The other one is extended from the state-of-art multi-agent reinforcement method, multi-agent deep deterministic policy gradient (MADDPG). 
 7 | We compare the two methods for the initial confrontation scenario and find that MADDPG outperforms MADQN. 
 8 | Then with MADDPG as the baseline, we propose three efficient training techniques, i.e., scenario-transfer training, self-play training and rule-coupled training.
 9 | 
10 | ![image](https://github.com/sanjinzhi/multiagent-confrontation/blob/master/Rule-coupled%20vs%20Random.gif)
11 | 
12 | Rule-coupled red agents vs Random-move blue agents
13 | 
14 | ![image](https://github.com/sanjinzhi/multiagent-confrontation/blob/master/Rule-coupled%20vs%20Selfplay.gif)
15 | 
16 | Rule-coupled red agents vs Blue agents trained by self-play
17 | 


--------------------------------------------------------------------------------
/Rule-coupled vs Random.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/Rule-coupled vs Random.gif


--------------------------------------------------------------------------------
/Rule-coupled vs Selfplay.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sanjinzhi/Multiagent-reinforcement-learning-algorithms-for-multiple-UAV-confrontation/0181ea9b5e077be1138652d940e0a61a7e6ffca3/Rule-coupled vs Selfplay.gif


--------------------------------------------------------------------------------