├── .gitignore
├── README.md
├── algos
    ├── __init__.py
    └── maTT
    │   ├── __init__.py
    │   ├── core.py
    │   ├── core_behavior.py
    │   ├── dql.py
    │   ├── evaluation.py
    │   ├── evaluation_behavior.py
    │   ├── modules.py
    │   ├── replay_buffer.py
    │   └── run_script.py
├── environment.yml
├── envs
    ├── __init__.py
    ├── maTTenv
    │   ├── __init__.py
    │   ├── agent_models.py
    │   ├── belief_tracker.py
    │   ├── display_wrapper.py
    │   ├── env
    │   │   ├── __init__.py
    │   │   ├── maTracking_Base.py
    │   │   ├── setTracking_model.py
    │   │   ├── setTracking_v0.py
    │   │   ├── setTracking_vGreedy.py
    │   │   ├── setTracking_vGru.py
    │   │   └── setTracking_vkGreedy.py
    │   ├── gen_init_pose.py
    │   ├── maps
    │   │   ├── empty100.cfg
    │   │   ├── empty100.yaml
    │   │   ├── empty1000.cfg
    │   │   ├── empty1000.yaml
    │   │   ├── empty20.cfg
    │   │   ├── empty20.yaml
    │   │   ├── empty8.cfg
    │   │   ├── empty8.yaml
    │   │   ├── emptyMed.cfg
    │   │   ├── emptyMed.yaml
    │   │   ├── emptySmall.cfg
    │   │   ├── emptySmall.yaml
    │   │   └── map_utils.py
    │   ├── metadata.py
    │   └── util.py
    ├── run_ma_tracking.py
    └── utilities
    │   ├── __init__.py
    │   └── ma_time_limit.py
├── requirements.txt
├── setup
└── utils
    ├── __init__.py
    ├── logSpinUp.py
    ├── mpi_pytorch.py
    ├── mpi_tools.py
    └── serialization_utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | #repo specific ignores
  2 | results/
  3 | 
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # scalableMARL
  2 | 
  3 | [Scalable Reinforcement Learning Policies for Multi-Agent Control](https://arxiv.org/abs/2011.08055)
  4 | 
  5 | CD. Hsu, H. Jeong, GJ. Pappas, P. Chaudhari. "Scalable Reinforcement Learning Policies for Multi-Agent Control". IEEE International Conference on Intelligent Robots and Systems (IROS), Prague, Czech Republic, 2021.
  6 | 
  7 | Multi-Agent Reinforcement Learning method to learn scalable control polices for multi-agent target tracking.
  8 | 
  9 | + Author: Christopher Hsu
 10 | + Email: chsu8@seas.upenn.edu
 11 | + Affiliation: 
 12 |     - Department of Electrical and Systems Engineering
 13 |     - GRASP Laboratory
 14 |     - @ University of Pennsylvania
 15 | 
 16 | Currently supports Python3.8 and is developed in Ubuntu 20.04
 17 | 
 18 | ## scalableMARL file structure
 19 | Within scalableMARL (highlighting the important files):
 20 | ```
 21 | scalableMARL
 22 |     |___algos
 23 |         |___maTT                          #RL alg folder for the target tracking environment
 24 |             |___core                      #Self-Attention-based Model Architecture
 25 |             |___core_behavior             #Used for further evaluation (Ablation D.2.)
 26 |             |___dql                       #Soft Double Q-Learning
 27 |             |___evaluation                #Evaluation for Main Results
 28 |             |___evaluation_behavior       #Used for further evaluation (Ablation D.2.)
 29 |             |___modules                   #Self-Attention blocks
 30 |             |___replay_buffer             #RL replay buffer for sets
 31 |             |___run_script                #**Main run script to do training and evaluation
 32 |     |___envs
 33 |         |___maTTenv                       #multi-agent target tracking
 34 |             |___env
 35 |                 |___setTracking_v0        #Standard environment (i.e. 4a4t tasks)
 36 |                 |___setTracking_vGreedy   #Baseline Greedy Heuristic
 37 |                 |___setTracking_vGru      #Experiment with Gru (Ablation D.3)
 38 |                 |___setTracking_vkGreedy  #Experiment with Scalability and Heuristic Mask k=4 (Ablation D.1)
 39 |         |___run_ma_tracking               #Example scipt to run environment
 40 |     |___setup                             #set PYTHONPATH ($source setup)
 41 | ```
 42 | 
 43 | + To setup scalableMARL, follow the instruction below.
 44 | 
 45 | ## Set up python environment for the scalableMARL repository
 46 | 
 47 | ### Install python3.8 (if it is not already installed)
 48 | ```
 49 | #to check python version
 50 | python3 -V
 51 | 
 52 | sudo apt-get update
 53 | sudo apt-get install python3.8-dev
 54 | ```
 55 | 
 56 | ## Set up environment (conda or virtualenv)
 57 | 
 58 | ### Set up with conda
 59 | ```
 60 | conda env create -f environment.yml
 61 | conda activate scalableMARL
 62 | source setup
 63 | ```
 64 | ### Set up virtualenv
 65 | Python virtual environments are used to isolate package installation from the system
 66 | 
 67 | Replace 'virtualenv name' with your choice of folder name
 68 | ```
 69 | sudo apt-get install python3-venv 
 70 | 
 71 | python3 -m venv --system-site-packages ./'virtualenv name'
 72 | # Activate the environment for use, any commands now will be done within this venv
 73 | source ./'virtualenv name'/bin/activate
 74 | 
 75 | # To deactivate (in terminal, exit out of venv), do not use during setup
 76 | deactivate
 77 | ```
 78 | Now that the virtualenv is activated, you can install packages that are isolated from your system
 79 | 
 80 | When the venv is activated, you can now install packages and run scripts
 81 | 
 82 | #### Install isolated packages in your venv
 83 | ```
 84 | sudo apt-get install -y eog python3-tk python3-yaml python3-pip ssh git
 85 | 
 86 | #This command will auto install packages from requirements.txt
 87 | pip3 install --trusted-host pypi.python.org -r requirements.txt
 88 | ```
 89 | 
 90 | ## Current workflow
 91 | ### Setup repos
 92 | ```
 93 | # activate env
 94 | conda activate scalableMARL
 95 | # or
 96 | source ./'virtualenv name'/bin/activate
 97 | # change directory to scalableMARL
 98 | cd ./scalableMARL
 99 | # setup repo  ***important in order to set PYTHONPATH***
100 | source setup
101 | ```
102 | scalableMARL repo is ready to go
103 | 
104 | ### Running an algorithm
105 | ```
106 | # its best to run from the scalableMARL folder so that logging and saving is consistent
107 | cd ./scalableMARL
108 | # run the alg
109 | python3 algos/maTT/run_script.py
110 | 
111 | # you can run the alg with different argument parameters. See within run_script for more options.
112 | # for example
113 | python3 algos/maTT/run_script.py --seed 0 --logdir ./results/maPredPrey --epochs 40
114 | ```
115 | ### To test, evaluate, and render()
116 | ```
117 | # for a general example 
118 | python3 algos/maTT/run_script.py --mode test --render 1 --log_dir ./results/maTT/setTracking-v0_123456789/seed_0/ --nb_test_eps 50
119 | # for a saved policy in saved_results
120 | python3 algos/maTT/run_script.py --mode test --render 1 --log_dir ./saved_results/maTT/setTracking-v0_123456789/seed_0/
121 | ```
122 | ### To see training curves
123 | ```
124 | tensorboard --logdir ./results/maTT/setTracking-v0_123456789/
125 | ```
126 | ### Citing scalableMARL
127 | If you reference or use scalableMARL in your research, please cite:
128 | ```
129 | @misc{hsu2021scalable,
130 |       title={Scalable Reinforcement Learning Policies for Multi-Agent Control}, 
131 |       author={Christopher D. Hsu and Heejin Jeong and George J. Pappas and Pratik Chaudhari},
132 |       year={2021},
133 |       eprint={2011.08055},
134 |       archivePrefix={arXiv},
135 |       primaryClass={cs.MA}
136 | }
137 | 
138 | ```
139 | 
140 | 
141 | 


--------------------------------------------------------------------------------
/algos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grasp-lyrl/scalableMARL/01642fb5275494694db48deb9bbd66fab82ed3a5/algos/__init__.py


--------------------------------------------------------------------------------
/algos/maTT/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grasp-lyrl/scalableMARL/01642fb5275494694db48deb9bbd66fab82ed3a5/algos/maTT/__init__.py


--------------------------------------------------------------------------------
/algos/maTT/core.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | from algos.maTT.modules import *
  5 | 
  6 | __author__ = 'Christopher D Hsu'
  7 | __copyright__ = ''
  8 | __credits__ = ['Christopher D Hsu', 'SpinningUp']
  9 | __license__ = ''
 10 | __version__ = '0.0.1'
 11 | __maintainer__ = 'Christopher D Hsu'
 12 | __email__ = 'chsu8@seas.upenn.edu'
 13 | __status__ = 'Dev'
 14 | 
 15 | def combined_shape(length, shape=None):
 16 |     if shape is None:
 17 |         return (length,)
 18 |     return (length, shape) if np.isscalar(shape) else (length, *shape)
 19 | 
 20 | def count_vars(module):
 21 |     return sum([np.prod(p.shape) for p in module.parameters()])
 22 | 
 23 | 
 24 | class SoftActionSelector(nn.Module):
 25 |     '''
 26 |     Soft parameterization of q value logits,
 27 |     pi_log = (1/Z)*(e^(v(x)) - min(v(x))
 28 |     If determinstic take max value as action,
 29 |     Else (stochastic),
 30 |     Sample from multinomial of the soft logits.
 31 |     '''
 32 |     def __init__(self, act_dim):
 33 |         super().__init__()
 34 |         self.act_dim = act_dim
 35 |         self.logsoftmax = nn.LogSoftmax(dim=1)
 36 | 
 37 | 
 38 |     def forward(self, q, deterministic=False, with_logprob=True):
 39 |         q_soft = q - torch.min(q)
 40 | 
 41 |         # Convert q values to log probability space
 42 |         try:
 43 |             pi_log = self.logsoftmax(q_soft)
 44 |         except:
 45 |             q_soft = q_soft.unsqueeze(0)
 46 |             pi_log = self.logsoftmax(q_soft)
 47 | 
 48 |         # Select action
 49 |         if deterministic:
 50 |             mu = torch.argmax(pi_log)
 51 |             pi_action = mu      
 52 |         else:
 53 |             q_log_dist = torch.distributions.multinomial.Multinomial(1, logits=pi_log)
 54 |             action = q_log_dist.sample()
 55 |             pi_action = torch.argmax(action, dim=1, keepdim=True)
 56 | 
 57 |         # Calculate log probability if training
 58 |         if with_logprob:
 59 |             logp_pi = torch.gather(pi_log,1,pi_action)
 60 |         else:
 61 |             logp_pi = None
 62 |         
 63 |         return pi_action, logp_pi
 64 | 
 65 | class DeepSetAttention(nn.Module):
 66 |     """ Written by Christopher Hsu:
 67 | 
 68 |     """
 69 |     def __init__(self, dim_input, dim_output, num_outputs=1,
 70 |                         dim_hidden=128, num_heads=4, ln=True):
 71 |         super().__init__()
 72 |         self.enc = nn.Sequential(
 73 |                 SAB(dim_input, dim_hidden, num_heads, ln=ln),
 74 |                 SAB(dim_hidden, dim_hidden, num_heads, ln=ln))
 75 |         self.dec = nn.Sequential(
 76 |                 SAB(dim_hidden, dim_hidden, num_heads, ln=ln),
 77 |                 nn.Linear(dim_hidden, dim_output))
 78 | 
 79 |     # v(x)
 80 |     def values(self, obs):
 81 |         v = self.enc(obs)
 82 |         v = v.sum(dim=1, keepdim=True)  #pooling mechanism: sum, mean, max
 83 |         v = self.dec(v).squeeze()
 84 |         return v
 85 | 
 86 |     # q(x,a)
 87 |     def forward(self, obs, act):
 88 |         v = self.enc(obs)
 89 |         v = v.sum(dim=1, keepdim=True)  #pooling mechanism: sum, mean, max
 90 |         v = self.dec(v).squeeze()
 91 |         q = torch.gather(v, 1, act)
 92 |         return q
 93 | 
 94 | class DeepSetmodel(nn.Module):
 95 | 
 96 |     def __init__(self, observation_space, action_space, dim_hidden=128):
 97 |         super().__init__()
 98 | 
 99 |         obs_dim = observation_space.shape[0]
100 |         act_dim = action_space.n
101 | 
102 |         # build policy and value functions
103 |         self.pi = SoftActionSelector(act_dim)
104 |         self.q1 = DeepSetAttention(dim_input=obs_dim, dim_output=act_dim, dim_hidden=dim_hidden)
105 |         self.q2 = DeepSetAttention(dim_input=obs_dim, dim_output=act_dim, dim_hidden=dim_hidden)
106 | 
107 |     def act(self, obs, deterministic=False):
108 |         with torch.no_grad():
109 |             v1 = self.q1.values(obs)
110 |             v2 = self.q2.values(obs)
111 | 
112 |             a, _ = self.pi(v1+v2, deterministic, False)
113 |             # Tensor to int
114 |             return int(a)
115 | 


--------------------------------------------------------------------------------
/algos/maTT/core_behavior.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | from algos.psspTT.modules import *
  5 | 
  6 | """
  7 | Used during evaluation of policies:
  8 | in order to take a look at the q values
  9 | Used to analyze weak form of cooperation
 10 | Could be used to look at attention
 11 | """
 12 | __author__ = 'Christopher D Hsu'
 13 | __copyright__ = ''
 14 | __credits__ = ['Christopher D Hsu']
 15 | __license__ = ''
 16 | __version__ = '0.0.1'
 17 | __maintainer__ = 'Christopher D Hsu'
 18 | __email__ = 'chsu8@seas.upenn.edu'
 19 | __status__ = 'Dev'
 20 | 
 21 | 
 22 | def combined_shape(length, shape=None):
 23 |     if shape is None:
 24 |         return (length,)
 25 |     return (length, shape) if np.isscalar(shape) else (length, *shape)
 26 | 
 27 | def count_vars(module):
 28 |     return sum([np.prod(p.shape) for p in module.parameters()])
 29 | 
 30 | 
 31 | class SoftActionSelector(nn.Module):
 32 |     '''
 33 |     Soft parameterization of q value logits,
 34 |     pi_log = (1/Z)*(e^(v(x)) - min(v(x))
 35 |     If determinstic take max value as action,
 36 |     Else (stochastic),
 37 |     Sample from multinomial of the soft logits.
 38 |     '''
 39 |     def __init__(self, act_dim):
 40 |         super().__init__()
 41 |         self.act_dim = act_dim
 42 |         self.logsoftmax = nn.LogSoftmax(dim=1)
 43 |         self.softmax = nn.Softmax(dim=1)
 44 | 
 45 | 
 46 |     def forward(self, q, deterministic=False, with_logprob=True):
 47 |         q_soft = q - torch.min(q)
 48 | 
 49 |         # Convert q values to log probability space
 50 |         try:
 51 |             pi_log = self.logsoftmax(q_soft)
 52 |             q_values = self.softmax(q_soft)
 53 |         except:
 54 |             q_soft = q_soft.unsqueeze(0)
 55 |             pi_log = self.logsoftmax(q_soft)
 56 |             q_values = self.softmax(q_soft)
 57 |         # Select action
 58 |         if deterministic:
 59 |             mu = torch.argmax(pi_log)
 60 |             pi_action = mu      
 61 |         else:
 62 |             q_log_dist = torch.distributions.multinomial.Multinomial(1, logits=pi_log)
 63 |             action = q_log_dist.sample()
 64 |             pi_action = torch.argmax(action, dim=1, keepdim=True)
 65 | 
 66 |         # Calculate log probability if training
 67 |         if with_logprob:
 68 |             logp_pi = torch.gather(pi_log,1,pi_action)
 69 |         else:
 70 |             logp_pi = None
 71 |         
 72 |         return pi_action, logp_pi, q_values
 73 | 
 74 | class DeepSetAttention(nn.Module):
 75 |     """ Written by Christopher Hsu:
 76 | 
 77 |     """
 78 |     def __init__(self, dim_input, dim_output, num_outputs=1,
 79 |                         dim_hidden=128, num_heads=4, ln=True):
 80 |         super().__init__()
 81 |         self.enc = nn.Sequential(
 82 |                 SAB(dim_input, dim_hidden, num_heads, ln=ln),
 83 |                 SAB(dim_hidden, dim_hidden, num_heads, ln=ln))
 84 |         self.dec = nn.Sequential(
 85 |                 SAB(dim_hidden, dim_hidden, num_heads, ln=ln),
 86 |                 nn.Linear(dim_hidden, dim_output))
 87 | 
 88 |     # v(x)
 89 |     def values(self, obs):
 90 |         v = self.enc(obs)
 91 |         v = v.sum(dim=1, keepdim=True)  #pooling mechanism: sum, mean, max
 92 |         v = self.dec(v).squeeze()
 93 |         return v
 94 | 
 95 |     # q(x,a)
 96 |     def forward(self, obs, act):
 97 |         v = self.enc(obs)
 98 |         v = v.sum(dim=1, keepdim=True)  #pooling mechanism: sum, mean, max
 99 |         v = self.dec(v).squeeze()
100 |         q = torch.gather(v, 1, act)
101 |         return q
102 | 
103 | class DeepSetmodel(nn.Module):
104 | 
105 |     def __init__(self, observation_space, action_space, dim_hidden=128):
106 |         super().__init__()
107 | 
108 |         obs_dim = observation_space.shape[0]
109 |         act_dim = action_space.n
110 | 
111 |         # build policy and value functions
112 |         self.pi = SoftActionSelector(act_dim)
113 |         self.q1 = DeepSetAttention(dim_input=obs_dim, dim_output=act_dim, dim_hidden=dim_hidden)
114 |         self.q2 = DeepSetAttention(dim_input=obs_dim, dim_output=act_dim, dim_hidden=dim_hidden)
115 | 
116 |     def act(self, obs, deterministic=False):
117 |         with torch.no_grad():
118 |             v1 = self.q1.values(obs)
119 |             v2 = self.q2.values(obs)
120 | 
121 |             a, _, q_values = self.pi(v1+v2, deterministic, False)
122 |             # Tensor to int
123 |             return int(a), q_values
124 | 


--------------------------------------------------------------------------------
/algos/maTT/dql.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | import itertools
  3 | import numpy as np
  4 | import torch
  5 | from torch.optim import Adam
  6 | from torch.utils.tensorboard import SummaryWriter
  7 | import gym
  8 | from gym.spaces import Box, Discrete
  9 | import time, os, random, pdb
 10 | from utils.logSpinUp import EpochLogger
 11 | import algos.maTT.core as core
 12 | from algos.maTT.replay_buffer import ReplayBufferSet as ReplayBuffer
 13 | 
 14 | __author__ = 'Christopher D Hsu'
 15 | __copyright__ = ''
 16 | __credits__ = ['Christopher D Hsu', 'SpinningUp']
 17 | __license__ = ''
 18 | __version__ = '0.0.1'
 19 | __maintainer__ = 'Christopher D Hsu'
 20 | __email__ = 'chsu8@seas.upenn.edu'
 21 | __status__ = 'Dev'
 22 | 
 23 | 
 24 | SET_EVAL_v0 = [
 25 |         {'nb_agents': 1, 'nb_targets': 1},
 26 |         {'nb_agents': 2, 'nb_targets': 2},
 27 |         {'nb_agents': 3, 'nb_targets': 3},
 28 |         {'nb_agents': 4, 'nb_targets': 4},
 29 | ]
 30 | 
 31 | def eval_set(num_agents, num_targets):
 32 |     agents = np.linspace(num_agents/2, num_agents, num=3, dtype=int)
 33 |     targets = np.linspace(num_agents/2, num_targets, num=3, dtype=int)
 34 |     params_set = [{'nb_agents':1, 'nb_targets':1},
 35 |                   {'nb_agents':4, 'nb_targets':4}]
 36 |     for a in agents:
 37 |         for t in targets:
 38 |             params_set.append({'nb_agents':a, 'nb_targets':t})
 39 |     return params_set
 40 | 
 41 | def test_agent(test_env, get_action, logger, num_test_episodes, 
 42 |                 num_agents, num_targets, render=False):
 43 |     """ Evaluate current policy over an environment set
 44 |     """
 45 |     ## Either manually set evaluation set or auto fill
 46 |     params_set = SET_EVAL_v0
 47 |     # params_set = eval_set(num_agents, num_targets)
 48 | 
 49 |     for params in params_set:
 50 |         for j in range(num_test_episodes):
 51 |             done, ep_ret, ep_len = {'__all__':False}, 0, 0
 52 |             obs = test_env.reset(**params)
 53 |             while not done['__all__']:
 54 |                 if render:
 55 |                     test_env.render()
 56 |                 action_dict = {}
 57 |                 for agent_id, o in obs.items():
 58 |                     action_dict[agent_id] = get_action(o, deterministic=False)
 59 | 
 60 |                 obs, rew, done, _ = test_env.step(action_dict)
 61 |                 ep_ret += rew['__all__']
 62 |                 ep_len += 1  
 63 |             logger.store(TestEpRet=ep_ret)
 64 | 
 65 | 
 66 | def doubleQlearning(env_fn, model=core.DeepSetmodel, model_kwargs=dict(), seed=0, 
 67 |         steps_per_epoch=4000, epochs=100, replay_size=int(1e6), gamma=0.99, 
 68 |         polyak=0.995, lr=1e-2, alpha=0.2, batch_size=100, start_steps=10000, 
 69 |         update_after=4000, update_every=1, num_test_episodes=5, max_ep_len=200, 
 70 |         logger_kwargs=dict(), save_freq=1, lr_period=0.7, grad_clip=5, render=False,
 71 |         torch_threads=1, amp=False):
 72 |     """
 73 |     Soft Clipped Double Q-learning
 74 | 
 75 |     Written by Christopher Hsu
 76 |     based on SpinningUp structure
 77 | 
 78 |     Args:
 79 |         env_fn : A function which creates a copy of the environment.
 80 |             The environment must satisfy the OpenAI Gym API.
 81 | 
 82 |         policy: The constructor method for a PyTorch Module with an ``act`` 
 83 |             method, a ``pi`` module, a ``q1`` module, and a ``q2`` module.
 84 |             The ``act`` method and ``pi`` module should accept batches of 
 85 |             observations as inputs, and ``q1`` and ``q2`` should accept a batch 
 86 |             of observations. When called, ``act``, ``q1``, and ``q2`` should return:
 87 | 
 88 |             ===========  ================  ======================================
 89 |             Call         Output Shape      Description
 90 |             ===========  ================  ======================================
 91 |             ``act``      (act_dim)         | Numpy array of actions for each 
 92 |                                            | observation.
 93 |             ``q1``       (batch,)          | Tensor containing one current estimate
 94 |                                            | of Q* for the provided observations
 95 |                                            | and action.
 96 |             ``q1.values``(batch,act_dim)   | Tensor containing current estimate
 97 |                                            | of Q*'s' for observations.
 98 |             ``q2``       (batch,)          | Tensor containing the other current 
 99 |                                            | estimate of Q* for the provided observations
100 |                                            | and action.
101 |             ``q2.values``(batch,act_dim)   | Tensor containing current estimate
102 |                                            | of Q*'s' for observations.
103 |             ===========  ================  ======================================
104 | 
105 |             Calling ``pi`` should return:
106 | 
107 |             ===========  ================  ======================================
108 |             Symbol       Shape             Description
109 |             ===========  ================  ======================================
110 |             ``a``        (batch, act_dim)  | Tensor containing actions from policy
111 |                                            | given observations.
112 |             ``logp_pi``  (batch,)          | Tensor containing log probabilities of
113 |                                            | actions in ``a``. Importantly: gradients
114 |                                            | should be able to flow back into ``a``.
115 |             ===========  ================  ======================================
116 | 
117 |         model_kwargs (dict): Any kwargs appropriate for the MLPmodel object 
118 |             you provided to double q learning.
119 | 
120 |         seed (int): Seed for random number generators.
121 | 
122 |         steps_per_epoch (int): Number of steps of interaction (state-action pairs) 
123 |             for the agent and the environment in each epoch.
124 | 
125 |         epochs (int): Number of epochs to run and train agent.
126 | 
127 |         replay_size (int): Maximum length of replay buffer.
128 | 
129 |         gamma (float): Discount factor. (Always between 0 and 1.)
130 | 
131 |         polyak (float): Interpolation factor in polyak averaging for target 
132 |             networks. Target networks are updated towards main networks 
133 |             according to:
134 | 
135 |             .. math:: \\theta_{\\text{targ}} \\leftarrow 
136 |                 \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta
137 | 
138 |             where :math:`\\rho` is polyak. (Always between 0 and 1, usually 
139 |             close to 1.)
140 | 
141 |         lr (float): Learning rate (used for both policy and value learning).
142 | 
143 |         alpha (float): Entropy regularization coefficient. (Equivalent to 
144 |             inverse of reward scale in the original SAC paper.)
145 | 
146 |         batch_size (int): Minibatch size for SGD.
147 | 
148 |         start_steps (int): Number of steps for uniform-random action selection,
149 |             before running real policy. Helps exploration.
150 | 
151 |         update_after (int): Number of env interactions to collect before
152 |             starting to do gradient descent updates. Ensures replay buffer
153 |             is full enough for useful updates.
154 | 
155 |         update_every (int): Number of env interactions that should elapse
156 |             between gradient descent updates. Note: Regardless of how long 
157 |             you wait between updates, the ratio of env steps to gradient steps 
158 |             is locked to 1.
159 | 
160 |         num_test_episodes (int): Number of episodes to test the deterministic
161 |             policy at the end of each epoch.
162 | 
163 |         max_ep_len (int): Maximum length of trajectory / episode / rollout.
164 | 
165 |         logger_kwargs (dict): Keyword args for EpochLogger.
166 | 
167 |         save_freq (int): How often (in terms of gap between epochs) to save
168 |             the current policy and value function.
169 | 
170 |     """
171 |     device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
172 |     torch.set_num_threads(torch_threads)
173 | 
174 |     logger = EpochLogger(**logger_kwargs)
175 |     logger.save_config(locals())
176 |     writer = SummaryWriter(logger_kwargs['output_dir'])
177 | 
178 |     torch.manual_seed(seed)
179 |     np.random.seed(seed)
180 | 
181 |     # env, test_env = env_fn(), env_fn()    #Can be used if env is official gym env
182 |     env = env_fn()
183 |     test_env = deepcopy(env)
184 |     obs_dim = env.observation_space.shape[0]
185 |     act_dim = env.action_space.n
186 | 
187 |     # Create actor-critic module and target networks
188 |     policy = model(env.observation_space, env.action_space, **model_kwargs).to(device)
189 |     policy_targ = deepcopy(policy).to(device)
190 | 
191 |     # Freeze target networks with respect to optimizers (only update via polyak averaging)
192 |     for p in policy_targ.parameters():
193 |         p.requires_grad = False
194 |         
195 |     # List of parameters for both Q-networks (save this for convenience)
196 |     q_params = itertools.chain(policy.q1.parameters(), policy.q2.parameters())
197 | 
198 |     # Experience Replay Buffer
199 |     replay_buffer = ReplayBuffer(replay_size, obs_dim, act_dim)
200 | 
201 |     # Count variables (protip: try to get a feel for how different size networks behave!)
202 |     var_counts = tuple(core.count_vars(module) for module in [policy.q1, policy.q2])
203 |     logger.log('\nNumber of parameters: \t q1: %d, \t q2: %d\n'%var_counts)
204 | 
205 |     # Set up optimizer
206 |     q_optimizer = Adam(q_params, lr=lr)
207 | 
208 |     #AMP: automatic mixed precision: fp32 -> fp 16
209 |     if amp:
210 |         scaler = torch.cuda.amp.GradScaler()
211 | 
212 |     # Set up function for computing Q-losses
213 |     def compute_loss_q(data):
214 |         obs = data['obs'].to(device)
215 |         act = data['act'].type(torch.LongTensor).to(device)
216 |         rew = data['rew'].to(device)
217 |         obs2 = data['obs2'].to(device)
218 |         done = data['done'].type(torch.float32).to(device)  
219 | 
220 |         q1 = policy.q1(obs,act)
221 |         q2 = policy.q2(obs,act)
222 | 
223 |         # Bellman backup for Q functions
224 |         with torch.cuda.amp.autocast(enabled=False):
225 |             with torch.no_grad():
226 |                 # Target actions come from *current* policy
227 |                 v1 = policy.q1.values(obs2)
228 |                 v2 = policy.q2.values(obs2)
229 |                 act2, logp_a2 = policy.pi(v1+v2)
230 | 
231 |                 # Target Q-values
232 |                 q1_pi_targ = policy_targ.q1(obs2, act2)
233 |                 q2_pi_targ = policy_targ.q2(obs2, act2)
234 |                 q_pi_targ = torch.min(q1_pi_targ, q2_pi_targ)
235 | 
236 |                 #Unsqueeze adds another dim, necessary to be column vectors
237 |                 # backup = r.unsqueeze(1) + gamma * (1 - done).unsqueeze(1) * (q_pi_targ - alpha * logp_a2)
238 |                 backup = rew.unsqueeze(1) + gamma * (q_pi_targ - alpha * logp_a2)
239 | 
240 |         # MSE loss against Bellman backup
241 |         # loss_q1 = ((q1 - backup)**2).mean()
242 |         # loss_q2 = ((q2 - backup)**2).mean()
243 |         # Huber loss against Bellman backup
244 |         huber = torch.nn.SmoothL1Loss()
245 |         loss_q1 = huber(q1, backup)
246 |         loss_q2 = huber(q2, backup)
247 |         loss_q = loss_q1 + loss_q2
248 | 
249 |         # Useful info for logging, if training on gpu need to send to cpu to store
250 |         try:
251 |             q_info = dict(Q1Vals=q1.detach().numpy(),
252 |                           Q2Vals=q2.detach().numpy())
253 |         except:
254 |             q_info = dict(Q1Vals=q1.cpu().detach().numpy(),
255 |                           Q2Vals=q2.cpu().detach().numpy())
256 | 
257 |         return loss_q, q_info
258 | 
259 |     def update(data, lr_iter):
260 |         # Update learning rate with cosine schedule
261 |         lr = np.clip(0.0005*np.cos(np.pi*lr_iter/(total_steps*lr_period))+0.000501, 1e-5, 1e-3)
262 |         q_optimizer.param_groups[0]['lr'] = lr
263 | 
264 |         # First run one gradient descent step for Q1 and Q2
265 |         q_optimizer.zero_grad()
266 | 
267 |         # Automatic Mixed Precision, used for faster training with fp16 operations
268 |         if amp:
269 |             # Enables autocasting for the forward pass (model + loss)
270 |             with torch.cuda.amp.autocast():
271 |                 loss_q, q_info = compute_loss_q(data)
272 |             # Calls backward() on scaled loss to create scaled gradients
273 |             scaler.scale(loss_q).backward()
274 |             # Unscales the gradients of optimizer's assigned params in-place
275 |             scaler.unscale_(q_optimizer)
276 |             # Clip gradient values
277 |             torch.nn.utils.clip_grad_value_(policy.parameters(), grad_clip)
278 |             # scaler.step() first unscales the gradients of the optimizer's assigned params.
279 |             scaler.step(q_optimizer)
280 |             # Updates the scale for next iteration.
281 |             scaler.update()
282 | 
283 |         # Standard fp32 training operations
284 |         else:
285 |             loss_q, q_info = compute_loss_q(data)
286 |             # Backprop
287 |             loss_q.backward()
288 |             # Clip gradient values
289 |             torch.nn.utils.clip_grad_value_(policy.parameters(), grad_clip)
290 |             # Gradient step
291 |             q_optimizer.step()
292 | 
293 |         ## Record things
294 |         logger.store(LossQ=loss_q.item(), **q_info)
295 | 
296 |         # Finally, update target networks by polyak averaging.
297 |         with torch.no_grad():
298 |             for p, p_targ in zip(policy.parameters(), policy_targ.parameters()):
299 |                 # NB: We use an in-place operations "mul_", "add_" to update target
300 |                 # params, as opposed to "mul" and "add", which would make new tensors.
301 |                 p_targ.data.mul_(polyak)
302 |                 p_targ.data.add_((1 - polyak) * p.data)
303 | 
304 |     def get_action(obs, deterministic=False):
305 |         # Unsqueeze obs to [1, n, d]
306 |         return policy.act(torch.as_tensor(obs, dtype=torch.float32).unsqueeze(0).to(device), deterministic)
307 | 
308 |     # Prepare for interaction with environment
309 |     total_steps = steps_per_epoch * epochs
310 |     start_time = time.time()
311 |     ep_ret, ep_len, best_test_ret = 0, 0, 0
312 |     obs = env.reset()
313 | 
314 |     # Main loop: collect experience in env and update/log each epoch
315 |     for t in range(total_steps):
316 |         
317 |         # Until start_steps have elapsed, randomly sample actions
318 |         # from a uniform distribution for better exploration. Afterwards, 
319 |         # use the learned policy.
320 |         action_dict = {}
321 |         if t > start_steps:
322 |             for agent_id, o in obs.items():
323 |                 action_dict[agent_id] = get_action(o, deterministic=False)
324 |         else:
325 |             for agent_id, o in obs.items():
326 |                 action_dict[agent_id] = env.action_space.sample()
327 |         
328 |         # Step the env
329 |         obs2, rew, done, info = env.step(action_dict)
330 |         ep_ret += rew['__all__']
331 |         ep_len += 1
332 | 
333 |         # Ignore the "done" signal if it comes from hitting the time
334 |         # horizon (that is, when it's an artificial terminal signal
335 |         # that isn't based on the agent's state)
336 |         done['__all__'] = False if ep_len==max_ep_len else False
337 | 
338 |         # Store experience to replay buffer seperately for each agent
339 |         for agent_id, o in obs.items():
340 |             replay_buffer.store(o, action_dict[agent_id], rew['__all__'], 
341 |                                 obs2[agent_id], float(done['__all__']))#, env.nb_targets)
342 | 
343 |         # Super critical, easy to overlook step: make sure to update 
344 |         # most recent observation!
345 |         obs = obs2
346 | 
347 |         # End of trajectory handling
348 |         if done['__all__'] or (ep_len == max_ep_len):
349 |             logger.store(EpRet=ep_ret, EpLen=ep_len)
350 |             ep_ret, ep_len = 0, 0
351 |             obs = env.reset()
352 | 
353 |         # Update handling
354 |         if t >= update_after and t % update_every == 0:
355 |             for j in range(update_every):
356 |                 #Cosine learning rate schedule
357 |                 if t < total_steps*(1-lr_period):
358 |                     lr_iter = 0
359 |                 else:
360 |                     lr_iter = t-total_steps*(1-lr_period)
361 | 
362 |                 batch = replay_buffer.sample_batch(batch_size)#, env.num_targets)
363 |                 update(data=batch, lr_iter=lr_iter)
364 | 
365 |         # End of epoch handling
366 |         if (t+1) % steps_per_epoch == 0:
367 |             epoch = (t+1) // steps_per_epoch
368 |             logger.log_tabular('Epoch', epoch)
369 | 
370 |             # Test the performance of the deterministic version of the agent.
371 |             test_agent(test_env, get_action, logger, num_test_episodes, env.num_agents, env.num_targets)
372 |             # Averages test ep returns
373 |             logger.log_tabular('TestEpRet', with_min_and_max=True)
374 | 
375 |             # Save model
376 |             # if (epoch % save_freq == 0) or (epoch == epochs):
377 |             #     torch.save(policy.state_dict(), fpath+'model%d.pt'%epoch)
378 | 
379 |             # Save model based on best test episode return
380 |             if logger.log_current_row['AverageTestEpRet'] > best_test_ret:
381 |                 logger.log('Saving model, AverageTestEpRet increase %d -> %d'%
382 |                             (best_test_ret, logger.log_current_row['AverageTestEpRet']))
383 | 
384 |                 fpath = logger_kwargs['output_dir']+'/state_dict/'
385 |                 os.makedirs(fpath, exist_ok=True)
386 |                 torch.save(policy.state_dict(), fpath+'model.pt')
387 |                 best_test_ret = logger.log_current_row['AverageTestEpRet']
388 | 
389 | 
390 |             # Log info about epoch
391 |             logger.log_tabular('EpRet', with_min_and_max=True)
392 |             logger.log_tabular('EpLen', average_only=True)
393 |             logger.log_tabular('TotalEnvInteracts', t)
394 |             logger.log_tabular('Q1Vals', with_min_and_max=True)
395 |             logger.log_tabular('Q2Vals', with_min_and_max=True)
396 |             logger.log_tabular('LossQ', average_only=True)
397 |             logger.log_tabular('LR', q_optimizer.param_groups[0]['lr'])
398 |             logger.log_tabular('Time', time.time()-start_time)
399 | 
400 |             # Tensorboard logger
401 |             writer.add_scalar('AverageEpRet', logger.log_current_row['AverageEpRet'],t)
402 |             writer.add_scalar('AverageTestEpRet', logger.log_current_row['AverageTestEpRet'],t)
403 |             writer.add_scalar('AverageQ1Vals', logger.log_current_row['AverageQ1Vals'],t)
404 |             writer.add_scalar('AverageQ2Vals', logger.log_current_row['AverageQ2Vals'],t)
405 |             writer.add_scalar('HuberLossQ', logger.log_current_row['LossQ'],t)
406 |             writer.add_scalar('LearningRate', logger.log_current_row['LR'],t)
407 | 
408 |             logger.dump_tabular()
409 | 
410 | 
411 |             


--------------------------------------------------------------------------------
/algos/maTT/evaluation.py:
--------------------------------------------------------------------------------
  1 | import datetime, json, os, argparse, time
  2 | import pickle, tabulate
  3 | import matplotlib
  4 | from matplotlib import pyplot as plt
  5 | import numpy as np
  6 | import os.path as osp
  7 | import torch
  8 | 
  9 | __author__ = 'Christopher D Hsu'
 10 | __copyright__ = ''
 11 | __credits__ = ['Christopher D Hsu']
 12 | __license__ = ''
 13 | __version__ = '0.0.1'
 14 | __maintainer__ = 'Christopher D Hsu'
 15 | __email__ = 'chsu8@seas.upenn.edu'
 16 | __status__ = 'Dev'
 17 | 
 18 | def load_pytorch_policy(fpath, fname, model, deterministic=True):
 19 |     fname = osp.join(fpath,'state_dict/',fname)
 20 |     map_location = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 21 |     model.load_state_dict(torch.load(fname, map_location))
 22 | 
 23 |     # make function for producing an action given a single state
 24 |     def get_action(x, deterministic=True):
 25 |         with torch.no_grad():
 26 |             x = torch.as_tensor(x, dtype=torch.float32).unsqueeze(0)
 27 |             action = model.act(x, deterministic)
 28 |         return action
 29 | 
 30 |     return get_action
 31 | 
 32 | def eval_set(num_agents, num_targets):
 33 |     agents = np.linspace(num_agents/2, num_agents, num=3, dtype=int)
 34 |     targets = np.linspace(num_agents/2, num_targets, num=3, dtype=int)
 35 |     params_set = [{'nb_agents':1, 'nb_targets':1},
 36 |                   {'nb_agents':4, 'nb_targets':4}]
 37 |     for a in agents:
 38 |         for t in targets:
 39 |             params_set.append({'nb_agents':a, 'nb_targets':t})
 40 |     return params_set
 41 | 
 42 | class Test:
 43 |     def __init__(self):
 44 |         pass
 45 | 
 46 |     def test(self, args, env, act, torch_threads=1):
 47 |         device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 48 |         torch.set_num_threads(torch_threads)
 49 | 
 50 |         seed = args.seed
 51 |         env.seed(seed)
 52 |         torch.manual_seed(seed)
 53 |         np.random.seed(seed)
 54 | 
 55 |         set_eval = eval_set(env.num_agents, env.num_targets)
 56 |         if args.eval_type == 'random':
 57 |             params_set = [{}]
 58 |         elif args.eval_type == 'fixed_nb':
 59 |             ## Either manually set evaluation set or auto fill
 60 |             params_set = SET_EVAL_v0
 61 |             # params_set = set_eval
 62 |         else:
 63 |             raise ValueError("Wrong evaluation type for ttenv.")
 64 | 
 65 |         timelimit_env = env
 66 |         while( not hasattr(timelimit_env, '_elapsed_steps')):
 67 |             timelimit_env = timelimit_env.env
 68 | 
 69 |         total_nlogdetcov = []
 70 |         total_intruders = []
 71 |         for params in params_set:
 72 |             ep = 0
 73 |             ep_nlogdetcov = [] #'Episode nLogDetCov'
 74 |             ep_intruders = []
 75 |             time_elapsed = ['Elapsed Time (sec)']
 76 | 
 77 |             while(ep < args.nb_test_eps): # test episode
 78 |                 ep += 1
 79 |                 s_time = time.time()
 80 |                 episode_rew, nlogdetcov, ep_len, intruders = 0, 0, 0, 0
 81 |                 done = {'__all__':False}
 82 |                 obs = env.reset(**params)
 83 | 
 84 |                 while not done['__all__']:
 85 |                     if args.render:
 86 |                         env.render()
 87 |                     action_dict = {}
 88 |                     for agent_id, o in obs.items():
 89 |                         action_dict[agent_id] = act(o, deterministic=False)
 90 |                     obs, rew, done, info = env.step(action_dict)
 91 |                     episode_rew += rew['__all__']
 92 |                     nlogdetcov += info['mean_nlogdetcov']
 93 |                     ep_len += 1
 94 | 
 95 |                 time_elapsed.append(time.time() - s_time)
 96 |                 ep_nlogdetcov.append(nlogdetcov)
 97 | 
 98 |                 if args.render:
 99 |                     print("Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f"%(ep, episode_rew, nlogdetcov))
100 |                 if ep % 50 == 0:
101 |                     print("Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f"%(ep, episode_rew, nlogdetcov))
102 | 
103 |             if args.record :
104 |                 env.moviewriter.finish()
105 |             if args.ros_log :
106 |                 ros_log.save(args.log_dir)
107 | 
108 |             # Stats
109 |             meanofeps = np.mean(ep_nlogdetcov)
110 |             total_nlogdetcov.append(meanofeps)
111 |             # Eval plots and saves
112 |             if args.env == 'setTracking-vGreedy':
113 |                 eval_dir = os.path.join(os.path.split(args.log_dir)[0], 'greedy_eval_seed%d_'%(seed)+args.map)
114 |             else:
115 |                 eval_dir = os.path.join(os.path.split(args.log_dir)[0], 'eval_seed%d_'%(seed)+args.map)
116 |             model_seed = os.path.split(args.log_dir)[-1]           
117 |             # eval_dir = os.path.join(args.log_dir, 'eval_seed%d_'%(seed)+args.map)
118 |             # model_seed = os.path.split(args.log_fname)[0]
119 |             if not os.path.exists(eval_dir):
120 |                 os.makedirs(eval_dir)
121 |             matplotlib.use('Agg')
122 |             f0, ax0 = plt.subplots()
123 |             _ = ax0.plot(ep_nlogdetcov, '.')
124 |             _ = ax0.set_title(args.env)
125 |             _ = ax0.set_xlabel('episode number')
126 |             _ = ax0.set_ylabel('mean nlogdetcov')
127 |             _ = ax0.axhline(y=meanofeps, color='r', linestyle='-', label='mean over episodes: %.2f'%(meanofeps))
128 |             _ = ax0.legend()
129 |             _ = ax0.grid()
130 |             _ = f0.savefig(os.path.join(eval_dir, "%da%dt_%d_eval_"%(env.nb_agents, env.nb_targets, args.nb_test_eps)
131 |                                                     +model_seed+".png"))
132 |             plt.close()
133 |             pickle.dump(ep_nlogdetcov, open(os.path.join(eval_dir,"%da%dt_%d_eval_"%(env.nb_agents, env.nb_targets, args.nb_test_eps))
134 |                                                                     +model_seed+".pkl", 'wb'))
135 | 
136 |         #Plot over all example episode sets
137 |         f1, ax1 = plt.subplots()
138 |         _ = ax1.plot(total_nlogdetcov, '.')
139 |         _ = ax1.set_title(args.env)
140 |         _ = ax1.set_xlabel('example episode set number')
141 |         _ = ax1.set_ylabel('mean nlogdetcov over episodes')
142 |         _ = ax1.grid()
143 |         _ = f1.savefig(os.path.join(eval_dir,'all_%d_eval'%(args.nb_test_eps)+model_seed+'.png'))
144 |         plt.close()        
145 |         pickle.dump(total_nlogdetcov, open(os.path.join(eval_dir,'all_%d_eval'%(args.nb_test_eps))+model_seed+'%da%dt'%(args.nb_agents,args.nb_targets)+'.pkl', 'wb'))
146 | 
147 | SET_EVAL_v0 = [
148 |         # {'nb_agents': 1, 'nb_targets': 1},
149 |         # {'nb_agents': 2, 'nb_targets': 1},
150 |         # {'nb_agents': 3, 'nb_targets': 1},
151 |         # {'nb_agents': 4, 'nb_targets': 1},
152 |         # {'nb_agents': 1, 'nb_targets': 2},
153 |         # {'nb_agents': 2, 'nb_targets': 2},
154 |         # {'nb_agents': 3, 'nb_targets': 2},
155 |         # {'nb_agents': 4, 'nb_targets': 2},
156 |         # {'nb_agents': 1, 'nb_targets': 3},
157 |         # {'nb_agents': 2, 'nb_targets': 3},
158 |         # {'nb_agents': 3, 'nb_targets': 3},
159 |         # {'nb_agents': 4, 'nb_targets': 3},
160 |         # {'nb_agents': 1, 'nb_targets': 4},
161 |         # {'nb_agents': 2, 'nb_targets': 4},
162 |         # {'nb_agents': 3, 'nb_targets': 4},
163 |         {'nb_agents': 20, 'nb_targets': 20},
164 | ]


--------------------------------------------------------------------------------
/algos/maTT/evaluation_behavior.py:
--------------------------------------------------------------------------------
  1 | import datetime, json, os, argparse, time, pdb
  2 | import pickle, tabulate
  3 | import matplotlib
  4 | from matplotlib import pyplot as plt
  5 | import numpy as np
  6 | import os.path as osp
  7 | import torch
  8 | from torch.utils.tensorboard import SummaryWriter
  9 | 
 10 | __author__ = 'Christopher D Hsu'
 11 | __copyright__ = ''
 12 | __credits__ = ['Christopher D Hsu']
 13 | __license__ = ''
 14 | __version__ = '0.0.1'
 15 | __maintainer__ = 'Christopher D Hsu'
 16 | __email__ = 'chsu8@seas.upenn.edu'
 17 | __status__ = 'Dev'
 18 | 
 19 | def load_pytorch_policy(fpath, fname, model, deterministic=True):
 20 |     fname = osp.join(fpath,'state_dict/',fname)
 21 |     map_location = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 22 |     model.load_state_dict(torch.load(fname, map_location))
 23 | 
 24 |     # make function for producing an action given a single state
 25 |     def get_action(x, deterministic=True):
 26 |         with torch.no_grad():
 27 |             x = torch.as_tensor(x, dtype=torch.float32).unsqueeze(0)
 28 |             action = model.act(x, deterministic)
 29 |         return action
 30 | 
 31 |     return get_action
 32 | 
 33 | 
 34 | def get_init_pose_list(nb_test_eps, eval_type):
 35 |     init_pose_list = []
 36 |     if eval_type == 'fixed_2':
 37 |         for ii in range(nb_test_eps):
 38 |             left = np.random.uniform(20,30)
 39 |             Lyaxis = np.random.uniform(25,35)
 40 |             right = np.random.uniform(30,40)
 41 |             Ryaxis = np.random.uniform(35,45)
 42 |             init_pose_list.append({'agents':[[24.5, 15.5, 1.57], [26.5, 15.5, 1.57]],
 43 |                             'targets':[[left, Lyaxis, 0, 0],[right, Ryaxis, 0, 0]],
 44 |                             'belief_targets':[[left, Lyaxis, 0, 0], [right, Ryaxis, 0, 0]]})
 45 |     else:
 46 |         for ii in range(nb_test_eps):
 47 |             xone = np.random.uniform(20,30)
 48 |             yone = np.random.uniform(15,25)
 49 |             xtwo = np.random.uniform(30,40)
 50 |             ytwo = np.random.uniform(25,35)
 51 |             xthree = np.random.uniform(20,30)
 52 |             ythree = np.random.uniform(35,45)
 53 |             xfour = np.random.uniform(10,20)
 54 |             yfour = np.random.uniform(35,45)
 55 |             init_pose_list.append({'agents':[[24.5, 10, 1.57], [26.5, 10, 1.57], 
 56 |                                             [22.5, 10, 1.57], [28.5, 10, 1.57]],
 57 |                             'targets':[[xone, yone, 0, 0],[xtwo, ytwo, 0, 0],
 58 |                                         [xthree, ythree, 0, 0],[xfour, yfour, 0, 0]],
 59 |                             'belief_targets':[[xone, yone, 0, 0], [xtwo, ytwo, 0, 0],
 60 |                                             [xthree, ythree, 0, 0], [xfour, yfour, 0, 0]]}) 
 61 | 
 62 |     return init_pose_list
 63 | 
 64 | class TestBehavior:
 65 |     def __init__(self):
 66 |         pass
 67 | 
 68 |     def test(self, args, env, act, torch_threads=1):
 69 |         device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 70 |         torch.set_num_threads(torch_threads)
 71 | 
 72 |         seed = args.seed
 73 |         env.seed(seed)
 74 |         torch.manual_seed(seed)
 75 |         np.random.seed(seed)
 76 | 
 77 |         eval_dir = os.path.join(os.path.split(args.log_dir)[0], 'behave_seed%d_'%(seed)+args.map)
 78 |         model_seed = os.path.split(args.log_dir)[-1]           
 79 |         if not os.path.exists(eval_dir):
 80 |             os.makedirs(eval_dir)
 81 | 
 82 |         behave_dir = os.path.join(os.path.split(args.log_dir)[0], 'behave_seed%d_'%(seed)+args.map)
 83 |         model_seed = os.path.split(args.log_dir)[-1]           
 84 |         if not os.path.exists(behave_dir):
 85 |             os.makedirs(behave_dir)
 86 |         # writer = SummaryWriter(behave_dir)
 87 | 
 88 |         if args.eval_type == 'random':
 89 |             params_set = [{}]
 90 |         elif args.eval_type == 'fixed_2':
 91 |             params_set = EVAL_BEHAVIOR_2
 92 |             tot_eplen = 60
 93 |         elif args.eval_type == 'fixed_4':
 94 |             params_set = EVAL_BEHAVIOR_4
 95 |             tot_eplen = 100
 96 |         else:
 97 |             raise ValueError("Wrong evaluation type for ttenv.")
 98 | 
 99 |         timelimit_env = env
100 |         while( not hasattr(timelimit_env, '_elapsed_steps')):
101 |             timelimit_env = timelimit_env.env
102 | 
103 |         if args.ros_log:
104 |             from envs.target_tracking.ros_wrapper import RosLog
105 |             ros_log = RosLog(num_targets=args.nb_targets, wrapped_num=args.ros + args.render + args.record + 1)
106 | 
107 |         init_pose_list = get_init_pose_list(args.nb_test_eps, args.eval_type)
108 | 
109 |         total_nlogdetcov = []
110 |         for params in params_set:
111 |             ep = 0
112 |             ep_nlogdetcov = [] #'Episode nLogDetCov'
113 |             time_elapsed = ['Elapsed Time (sec)']
114 |             test_observations = np.zeros(args.nb_test_eps)
115 |             
116 |             while(ep < args.nb_test_eps): # test episode
117 |                 ep += 1
118 |                 s_time = time.time()
119 |                 episode_rew, nlogdetcov, ep_len = 0, 0, 0
120 |                 done = {'__all__':False}
121 |                 obs = env.reset(init_pose_list=init_pose_list, **params)
122 | 
123 |                 all_observations = np.zeros(env.nb_targets, dtype=bool)
124 | 
125 | 
126 |                 bigq0 = []
127 |                 bigq1 = []
128 | 
129 |                 while ep_len < tot_eplen:
130 |                     if args.render:
131 |                         env.render()
132 |                     if args.ros_log:
133 |                         ros_log.log(env)
134 |                     action_dict = {}
135 |                     q_dict = {}
136 |                     for agent_id, o in obs.items():
137 |                         action_dict[agent_id], q_dict[agent_id] = act(o, deterministic=False)
138 |                         # record target observations
139 |                         observed = np.zeros(env.nb_targets, dtype=bool)
140 |                         all_observations = np.logical_or(all_observations, o[:,5].astype(bool))  
141 | 
142 |                     if all(all_observations) == True:
143 |                         test_observations[ep-1] = 1
144 | 
145 |                     obs, rew, done, info = env.step(action_dict)
146 |                     episode_rew += rew['__all__']
147 |                     nlogdetcov += info['mean_nlogdetcov']
148 |                     # log q values
149 |                     rearrange = [0,3,6,9,1,4,7,10,2,5,8,11]
150 |                     q0 = np.zeros((12))
151 |                     q1 = np.zeros((12))
152 |                     for ii, val in enumerate(rearrange):
153 |                         qs0 = q_dict['agent-0'].squeeze(0)
154 |                         qs1 = q_dict['agent-1'].squeeze(0)
155 |                         q0[ii] = qs0[val]
156 |                         q1[ii] = qs1[val] 
157 | 
158 |                     bigq0.append(q0)
159 |                     bigq1.append(q1)
160 |                     ep_len += 1
161 | 
162 |                 bigq0 = np.asarray(bigq0)
163 |                 bigq1 = np.asarray(bigq1)
164 | 
165 | 
166 |                 time_elapsed.append(time.time() - s_time)
167 |                 ep_nlogdetcov.append(nlogdetcov)
168 |                 if args.render:
169 |                     print("Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f, ep len : %d"%(ep, episode_rew, nlogdetcov, ep_len))
170 |                 if ep % 50 == 0:
171 |                     print("Ep.%d - Episode reward : %.2f, Episode nLogDetCov : %.2f"%(ep, episode_rew, nlogdetcov))
172 | 
173 |             if args.record :
174 |                 env.moviewriter.finish()
175 |             if args.ros_log :
176 |                 ros_log.save(args.log_dir)
177 | 
178 |             print(test_observations)
179 |             print("Cooperation ratio over total evals: %.2f"%(np.sum(test_observations)/args.nb_test_eps))
180 | 
181 |             # plot actions for agents in 3d bar graph
182 |             f2 = plt.figure()
183 |             ax2 = f2.add_subplot(121,projection='3d')
184 |             ax3 = f2.add_subplot(122,projection='3d')
185 | 
186 | 
187 |             lx = len(bigq0[0])
188 |             ly = len(bigq0[:,0])
189 |             xpos = np.arange(0,lx,1)
190 |             ypos = np.arange(0,ly,1)
191 |             xpos, ypos = np.meshgrid(xpos+0.25, ypos+0.25)
192 | 
193 |             xpos = xpos.flatten()
194 |             ypos = ypos.flatten()
195 |             zpos = np.zeros(lx*ly)
196 | 
197 |             dx = 0.5 *np.ones_like(zpos)
198 |             dy = dx.copy()
199 |             dz0 = bigq0.flatten()
200 |             dz1 = bigq1.flatten()
201 |             
202 |             cs = ['r', 'r', 'r', 'r', 'g', 'g', 'g', 'g','b','b','b','b'] * ly
203 | 
204 |             ax2.bar3d(xpos,ypos,zpos, dx, dy, dz0, color=cs)
205 |             ax3.bar3d(xpos,ypos,zpos, dx, dy, dz1, color=cs)
206 |             # plt.show()
207 | 
208 | EVAL_BEHAVIOR_2 = [
209 |         {'nb_agents': 2, 'nb_targets': 2},
210 | ]
211 | EVAL_BEHAVIOR_4 = [
212 |         {'nb_agents': 4, 'nb_targets': 4},
213 | ]
214 | 


--------------------------------------------------------------------------------
/algos/maTT/modules.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import math
 5 | 
 6 | __author__ = 'Christopher D Hsu'
 7 | __copyright__ = ''
 8 | __credits__ = ['Christopher D Hsu']
 9 | __license__ = ''
10 | __version__ = '0.0.1'
11 | __maintainer__ = 'Christopher D Hsu'
12 | __email__ = 'chsu8@seas.upenn.edu'
13 | __status__ = 'Dev'
14 | 
15 | """
16 | @InProceedings{lee2019set,
17 |     title={Set Transformer: A Framework for Attention-based Permutation-Invariant Neural Networks},
18 |     author={Lee, Juho and Lee, Yoonho and Kim, Jungtaek and Kosiorek, Adam and Choi, Seungjin and Teh, Yee Whye},
19 |     booktitle={Proceedings of the 36th International Conference on Machine Learning},
20 |     pages={3744--3753},
21 |     year={2019}
22 | }
23 | """
24 | 
25 | class MAB(nn.Module):
26 |     def __init__(self, dim_Q, dim_K, dim_V, num_heads, ln=False):
27 |         super(MAB, self).__init__()
28 |         self.dim_V = dim_V
29 |         self.num_heads = num_heads
30 |         self.fc_q = nn.Linear(dim_Q, dim_V)
31 |         self.fc_k = nn.Linear(dim_K, dim_V)
32 |         self.fc_v = nn.Linear(dim_K, dim_V)
33 |         if ln:
34 |             self.ln0 = nn.LayerNorm(dim_V)
35 |             self.ln1 = nn.LayerNorm(dim_V)
36 |         self.fc_o = nn.Linear(dim_V, dim_V)
37 | 
38 |     def forward(self, Q, K):
39 |         Q = self.fc_q(Q)
40 |         K, V = self.fc_k(K), self.fc_v(K)
41 | 
42 |         dim_split = self.dim_V // self.num_heads
43 |         Q_ = torch.cat(Q.split(dim_split, 2), 0)
44 |         K_ = torch.cat(K.split(dim_split, 2), 0)
45 |         V_ = torch.cat(V.split(dim_split, 2), 0)
46 | 
47 |         A = torch.softmax(Q_.bmm(K_.transpose(1,2))/math.sqrt(self.dim_V), 2)
48 |         O = torch.cat((Q_ + A.bmm(V_)).split(Q.size(0), 0), 2)
49 |         O = O if getattr(self, 'ln0', None) is None else self.ln0(O)
50 |         O = O + F.relu(self.fc_o(O))
51 |         O = O if getattr(self, 'ln1', None) is None else self.ln1(O)
52 |         return O
53 | 
54 | 
55 | class SAB(nn.Module):
56 |     def __init__(self, dim_in, dim_out, num_heads, ln=False):
57 |         super(SAB, self).__init__()
58 |         self.mab = MAB(dim_in, dim_in, dim_out, num_heads, ln=ln)
59 | 
60 |     def forward(self, X):
61 |         return self.mab(X, X)
62 | 
63 | 
64 | class ISAB(nn.Module):
65 |     def __init__(self, dim_in, dim_out, num_heads, num_inds, ln=False):
66 |         super(ISAB, self).__init__()
67 |         self.I = nn.Parameter(torch.Tensor(1, num_inds, dim_out))
68 |         nn.init.xavier_uniform_(self.I)
69 |         self.mab0 = MAB(dim_out, dim_in, dim_out, num_heads, ln=ln)
70 |         self.mab1 = MAB(dim_in, dim_out, dim_out, num_heads, ln=ln)
71 | 
72 |     def forward(self, X):
73 |         H = self.mab0(self.I.repeat(X.size(0), 1, 1), X)
74 |         return self.mab1(X, H)
75 | 
76 | 
77 | class PMA(nn.Module):
78 |     def __init__(self, dim, num_heads, num_seeds, ln=False):
79 |         super(PMA, self).__init__()
80 |         self.S = nn.Parameter(torch.Tensor(1, num_seeds, dim))
81 |         nn.init.xavier_uniform_(self.S)
82 |         self.mab = MAB(dim, dim, dim, num_heads, ln=ln)
83 | 
84 |     def forward(self, X):
85 |         return self.mab(self.S.repeat(X.size(0), 1, 1), X)


--------------------------------------------------------------------------------
/algos/maTT/replay_buffer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import algos.maTT.core as core
  4 | 
  5 | __author__ = 'Christopher D Hsu'
  6 | __copyright__ = ''
  7 | __credits__ = ['Christopher D Hsu', 'SpinningUp']
  8 | __license__ = ''
  9 | __version__ = '0.0.1'
 10 | __maintainer__ = 'Christopher D Hsu'
 11 | __email__ = 'chsu8@seas.upenn.edu'
 12 | __status__ = 'Dev'
 13 | 
 14 | class ReplayBufferSet(object):
 15 |     def __init__(self, size, obs_dim, act_dim):
 16 |         """Create Replay buffer. edited to include nb_targets_idx buffer
 17 | 
 18 |         Parameters
 19 |         ----------
 20 |         size: int
 21 |             Max number of transitions to store in the buffer. When the buffer
 22 |             overflows the old memories are dropped.
 23 |         """
 24 |         self._storage = {}
 25 |         self._maxsize = size
 26 |         self._next_idx = 0
 27 |         self._obs_dim = obs_dim
 28 |         self._act_dim = act_dim
 29 | 
 30 |     def __len__(self):
 31 |         return len(self._storage)
 32 | 
 33 |     def store(self, obs_t, action, reward, obs_tp1, done):#, nb_targets):
 34 |         nb_targets = obs_t.shape[0]
 35 |         try:
 36 |             len(self._storage[nb_targets])
 37 |         except:
 38 |             self._storage[nb_targets] = []
 39 |         
 40 |         ## Combine into a tuple and append to storage
 41 |         data = (obs_t, obs_tp1, action, reward, done)
 42 |         
 43 |         if self._next_idx >= len(self._storage[nb_targets]):
 44 |             self._storage[nb_targets].append(data)
 45 |         else:
 46 |             self._storage[nb_targets][self._next_idx] = data
 47 | 
 48 |         self._next_idx = int((self._next_idx + 1) % self._maxsize)
 49 | 
 50 |     def _encode_sample(self, size, idxes, nb_targets):
 51 |         """Given indexes, return a batch of data
 52 |         action data is outputted as an column vector
 53 |         """
 54 |         batch = dict(obs=np.zeros((size, nb_targets, self._obs_dim)), 
 55 |                      obs2=np.zeros((size, nb_targets, self._obs_dim)), 
 56 |                      act=np.zeros((size, 1)), 
 57 |                      rew=np.zeros((size)), 
 58 |                      done=np.zeros((size)))
 59 | 
 60 |         for i, idx in enumerate(idxes):
 61 |             data = self._storage[nb_targets][idx]
 62 |             obs_t, obs_tp1, action, reward, done = data
 63 | 
 64 |             batch['obs'][i] = obs_t
 65 |             batch['obs2'][i] = obs_tp1
 66 |             batch['act'][i] = action
 67 |             batch['rew'][i] = reward
 68 |             batch['done'][i] = done
 69 | 
 70 |         batch['obs'] = torch.as_tensor(batch['obs'], dtype=torch.float32)
 71 |         batch['obs2'] = torch.as_tensor(batch['obs2'], dtype=torch.float32)
 72 |         batch['act'] = torch.as_tensor(batch['act'])
 73 |         batch['rew'] = torch.as_tensor(batch['rew'], dtype=torch.float32)
 74 |         batch['done'] = torch.as_tensor(batch['done'])
 75 |         return batch
 76 | 
 77 |     def sample_batch(self, batch_size):#, num_targets):
 78 |         """Sample a batch of experiences. Randomly sample a batch of experiences all
 79 |         with the same number of targets. Number of targets is sampled from the range [1,num_targets]
 80 | 
 81 |         Parameters
 82 |         ----------
 83 |         batch_size: int
 84 |             How many transitions to sample.
 85 | 
 86 |         Returns
 87 |         -------
 88 |         obs_batch: np.array [batch, nb_targets, obs_dim]
 89 |             batch of observations
 90 |         act_batch: np.array [batch, 1]
 91 |             batch of actions executed given obs_batch
 92 |         rew_batch: np.array [batch]
 93 |             rewards received as results of executing act_batch
 94 |         next_obs_batch: np.array [batch, nb_targets, obs_dim]
 95 |             next set of observations seen after executing act_batch
 96 |         done_mask: np.array [batch]
 97 |             done_mask[i] = 1 if executing act_batch[i] resulted in
 98 |             the end of an episode and 0 otherwise.
 99 |         """
100 | 
101 |         # nb_targets = np.random.random_integers(1, num_targets)
102 |         nb_targets = np.random.randint(1, len(self._storage))
103 |         idxes = [np.random.randint(0, len(self._storage[nb_targets]) - 1) for _ in range(batch_size)]
104 |         return self._encode_sample(batch_size, idxes, nb_targets)


--------------------------------------------------------------------------------
/algos/maTT/run_script.py:
--------------------------------------------------------------------------------
  1 | import pdb, argparse, os, datetime, json, pickle
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | import gym
  6 | from gym import wrappers
  7 | 
  8 | from algos.maTT.dql import doubleQlearning
  9 | import algos.maTT.core as core
 10 | 
 11 | import envs
 12 | 
 13 | __author__ = 'Christopher D Hsu'
 14 | __copyright__ = ''
 15 | __credits__ = ['Christopher D Hsu']
 16 | __license__ = ''
 17 | __version__ = '0.0.1'
 18 | __maintainer__ = 'Christopher D Hsu'
 19 | __email__ = 'chsu8@seas.upenn.edu'
 20 | __status__ = 'Dev'
 21 | 
 22 | 
 23 | os.environ['OPENBLAS_NUM_THREADS'] = '1'
 24 | os.environ['OMP_NUM_THREADS'] = '1'
 25 | os.environ['MKL_NUM_THREADS'] = '1'
 26 | 
 27 | BASE_DIR = os.path.dirname('/'.join(str.split(os.path.realpath(__file__),'/')[:-2]))
 28 | 
 29 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 30 | parser.add_argument('--env', help='environment ID', default='setTracking-v0')
 31 | parser.add_argument('--map', type=str, default="emptyMed")
 32 | parser.add_argument('--nb_agents', type=int, default=4)
 33 | parser.add_argument('--nb_targets', type=int, default=4)
 34 | parser.add_argument('--seed', help='RNG seed', type=int, default=0)
 35 | parser.add_argument('--mode', choices=['train', 'test', 'test-behavior'], default='train')
 36 | parser.add_argument('--steps_per_epoch', type=int, default=25000)
 37 | parser.add_argument('--epochs', type=int, default=20)
 38 | parser.add_argument('--batch_size', type=int, default=256)
 39 | parser.add_argument('--alpha', type=float, default=0.4)
 40 | parser.add_argument('--gamma', type=float, default=.99)
 41 | parser.add_argument('--polyak', type=float, default=0.999) #tau in polyak averaging
 42 | parser.add_argument('--hiddens', type=int, default=128)
 43 | parser.add_argument('--learning_rate', type=float, default=0.001)
 44 | parser.add_argument('--learning_rate_period', type=float, default=0.7) #Back half portion with cosine lr schedule
 45 | parser.add_argument('--grad_clip', type=int, default=0.2)
 46 | parser.add_argument('--start_steps', type=int, default=20000)
 47 | parser.add_argument('--update_after', type=int, default=20000)
 48 | parser.add_argument('--num_eval_episodes', type=int, default=2) #During training
 49 | parser.add_argument('--replay_size', type=int, default=int(1e6))
 50 | parser.add_argument('--max_ep_len', type=int, default=200)
 51 | parser.add_argument('--checkpoint_freq', type=int, default=1)
 52 | 
 53 | parser.add_argument('--record',type=int, default=0)
 54 | parser.add_argument('--render', type=int, default=0)
 55 | parser.add_argument('--nb_test_eps',type=int, default=50)
 56 | parser.add_argument('--log_dir', type=str, default='./results/maTT')
 57 | parser.add_argument('--log_fname', type=str, default='model.pt')
 58 | parser.add_argument('--repeat', type=int, default=1)
 59 | parser.add_argument('--eval_type', choices=['random', 'fixed_4', 
 60 |                                             'fixed_2', 'fixed_nb'], default='fixed_nb')
 61 | 
 62 | parser.add_argument('--torch_threads', type=int, default=1)
 63 | parser.add_argument('--amp', type=int, default=0)
 64 | 
 65 | args = parser.parse_args()
 66 | 
 67 | 
 68 | def train(seed, save_dir):
 69 |     save_dir_0 = os.path.join(save_dir, 'seed_%d'%seed)
 70 | 
 71 |     env = envs.make(args.env,
 72 |                     'ma_target_tracking',
 73 |                     render=bool(args.render),
 74 |                     record=bool(args.record),
 75 |                     directory=save_dir_0,
 76 |                     map_name=args.map,
 77 |                     num_agents=args.nb_agents,
 78 |                     num_targets=args.nb_targets,
 79 |                     is_training=True,
 80 |                     )
 81 | 
 82 |     # Create env function
 83 |     env_fn = lambda : env
 84 |     
 85 |     #Training function
 86 |     model_kwargs = dict(dim_hidden=args.hiddens)
 87 |     logger_kwargs = dict(output_dir=save_dir_0, exp_name=save_dir_0)
 88 |     model = core.DeepSetmodel
 89 | 
 90 |     doubleQlearning(
 91 |         env_fn=env_fn,
 92 |         model=model,
 93 |         model_kwargs=model_kwargs,
 94 |         seed=seed, 
 95 |         steps_per_epoch=args.steps_per_epoch, 
 96 |         epochs=args.epochs, 
 97 |         gamma=args.gamma,
 98 |         polyak=args.polyak,
 99 |         lr=args.learning_rate,
100 |         lr_period=args.learning_rate_period,
101 |         alpha=args.alpha, 
102 |         grad_clip=args.grad_clip,
103 |         batch_size=args.batch_size,
104 |         start_steps=args.start_steps, 
105 |         update_after=args.update_after,
106 |         num_test_episodes=args.num_eval_episodes,
107 |         replay_size=args.replay_size,
108 |         max_ep_len=args.max_ep_len,
109 |         logger_kwargs=logger_kwargs, 
110 |         save_freq=args.checkpoint_freq, 
111 |         render=bool(args.render),
112 |         torch_threads=args.torch_threads,
113 |         amp=bool(args.amp)
114 |         )
115 | 
116 | def test(seed):
117 |     from algos.maTT.evaluation import Test, load_pytorch_policy
118 |     
119 |     env = envs.make(args.env,
120 |                     'ma_target_tracking',
121 |                     render=bool(args.render),
122 |                     record=bool(args.record),
123 |                     directory=args.log_dir,
124 |                     map_name=args.map,
125 |                     num_agents=args.nb_agents,
126 |                     num_targets=args.nb_targets,
127 |                     is_training=False,
128 |                     )    
129 | 
130 |     # Load saved policy
131 |     model_kwargs = dict(dim_hidden=args.hiddens)
132 |     model = core.DeepSetmodel(env.observation_space, env.action_space, **model_kwargs)
133 |     policy = load_pytorch_policy(args.log_dir, args.log_fname, model)
134 | 
135 |     # Testing environment
136 |     Eval = Test()
137 |     Eval.test(args, env, policy)
138 | 
139 | def testbehavior(seed):
140 |     from algos.maTT.evaluation_behavior import TestBehavior, load_pytorch_policy
141 |     import algos.maTT.core_behavior as core_behavior
142 |     
143 |     env = envs.make(args.env,
144 |                     'ma_target_tracking',
145 |                     render=bool(args.render),
146 |                     record=bool(args.record),
147 |                     directory=args.log_dir,
148 |                     map_name=args.map,
149 |                     num_agents=args.nb_agents,
150 |                     num_targets=args.nb_targets,
151 |                     is_training=False,
152 |                     )    
153 | 
154 |     # Load saved policy
155 |     model_kwargs = dict(dim_hidden=args.hiddens)
156 |     model = core_behavior.DeepSetmodel(env.observation_space, env.action_space, **model_kwargs)
157 |     policy = load_pytorch_policy(args.log_dir, args.log_fname, model)
158 | 
159 |     # Testing environment
160 |     Eval = TestBehavior()
161 |     Eval.test(args, env, policy)
162 | 
163 | 
164 | if __name__ == '__main__':
165 |     if args.mode == 'train':
166 |         save_dir = os.path.join(args.log_dir, '_'.join([args.env, datetime.datetime.now().strftime("%m%d%H%M")]))
167 |         if not os.path.exists(save_dir):
168 |             os.makedirs(save_dir)
169 |         else:
170 |             ValueError("The directory already exists...", save_dir)
171 | 
172 |         notes = input("Any notes for this experiment? : ")
173 |         f = open(os.path.join(save_dir, "notes.txt"), 'w')
174 |         f.write(notes)
175 |         f.close()
176 | 
177 |         seed = args.seed
178 |         list_records = []
179 |         for _ in range(args.repeat):
180 |             print("===== TRAIN A TARGET TRACKING RL AGENT : SEED %d ====="%seed)
181 |             results = train(seed, save_dir)
182 |             json.dump(vars(args), open(os.path.join(save_dir, "seed_%d"%seed, 'learning_prop.json'), 'w'))
183 |             seed += 1
184 |             args.seed += 1
185 | 
186 |     elif args.mode =='test':
187 |         test(args.seed)
188 | 
189 |     elif args.mode =='test-behavior':
190 |         testbehavior(args.seed)
191 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: scalableMARL
 2 | channels:
 3 |   - pytorch
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip
 7 |   - numpy
 8 |   - nvidia::cudatoolkit=11.3
 9 |   - pytorch::pytorch
10 |   - wheel
11 |   - h5py
12 |   - typing-extensions
13 |   - cython          #==0.29.0
14 |   - numpy           #==1.19.0
15 |   - psutil          ==5.8.0
16 |   - tabulate        ==0.8.3
17 |   - llvmlite
18 |   - numba           #==0.52.0
19 |   - cloudpickle     ==1.6.0
20 |   - joblib          #==0.13.2
21 |   - dill            ==0.3.3
22 |   - scikit-learn    #==0.24.0
23 |   - scipy           #==1.5.4
24 |   - pandas          #==1.1.5
25 |   - requests
26 |   - lz4
27 |   - pyyaml
28 |   - mpi4py
29 |   - tensorboard
30 |   - seaborn
31 |   - pillow
32 |   - pip:
33 |     - filterpy
34 |     - opencv-python
35 |     - librosa==0.4.2
36 |     - tqdm==4.32.2
37 |     - progressbar2==3.53.1
38 |     - gym[atari]


--------------------------------------------------------------------------------
/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | """ envs/ folder is for openAIgym-like simulation environments
 2 | To use,
 3 | >>> import envs
 4 | >>> env = envs.make("NAME_OF_ENV")
 5 | 
 6 | """
 7 | import gym
 8 | 
 9 | def make(env_name, type, render=False, record=False, directory='', **kwargs):
10 |     """
11 |     env_name : str
12 |         name of an environment. (e.g. 'Cartpole-v0')
13 |     type : str
14 |         type of an environment. One of ['atari', 'classic_control',
15 |         'classic_mdp','target_tracking']
16 |     """
17 | 
18 |     if type == 'ma_target_tracking':
19 |         import envs.maTTenv
20 |         env = maTTenv.make(env_name, render=render, record=record,
21 |                                                 directory=directory, **kwargs)
22 | 
23 |     else:
24 |         raise ValueError('Designate the right type of the environment.')
25 | 
26 |     return env
27 | 


--------------------------------------------------------------------------------
/envs/maTTenv/__init__.py:
--------------------------------------------------------------------------------
 1 | from envs.utilities.ma_time_limit import maTimeLimit
 2 | 
 3 | def make(env_name, render=False, figID=0, record=False, directory='',
 4 |                     T_steps=None, num_agents=2, num_targets=1, **kwargs):
 5 |     """
 6 |     env_name : str
 7 |         name of an environment. (e.g. 'Cartpole-v0')
 8 |     type : str
 9 |         type of an environment. One of ['atari', 'classic_control',
10 |         'classic_mdp','target_tracking']
11 |     """
12 |     if T_steps is None:
13 |         T_steps = 200
14 | 
15 |     if env_name == 'setTracking-v0':
16 |         from envs.maTTenv.env.setTracking_v0 import setTrackingEnv0
17 |         env0 = setTrackingEnv0(num_agents=num_agents, num_targets=num_targets, **kwargs)
18 |     elif env_name == 'setTracking-vGreedy':
19 |         from envs.maTTenv.env.setTracking_vGreedy import setTrackingEnvGreedy
20 |         env0 = setTrackingEnvGreedy(num_agents=num_agents, num_targets=num_targets, **kwargs)
21 |     elif env_name == 'setTracking-vkGreedy':
22 |         from envs.maTTenv.env.setTracking_vkGreedy import setTrackingEnvkGreedy
23 |         env0 = setTrackingEnvkGreedy(num_agents=num_agents, num_targets=num_targets, **kwargs)
24 |     elif env_name == 'setTracking-model':
25 |         from envs.maTTenv.env.setTracking_model import setTrackingEnvModel
26 |         env0 = setTrackingEnvModel(num_agents=num_agents, num_targets=num_targets, **kwargs)
27 |     elif env_name == 'setTracking-vGru':
28 |         from envs.maTTenv.env.setTracking_vGru import setTrackingEnvGru
29 |         env0 = setTrackingEnvGru(num_agents=num_agents, num_targets=num_targets, **kwargs)
30 |     else:
31 |         raise ValueError('No such environment exists.')
32 | 
33 |     env = maTimeLimit(env0, max_episode_steps=T_steps)
34 | 
35 |     if render:
36 |         from envs.maTTenv.display_wrapper import Display2D
37 |         env = Display2D(env, figID=figID)
38 |     if record:
39 |         from envs.maTTenv.display_wrapper import Video2D
40 |         env = Video2D(env, dirname = directory)
41 | 
42 |     return env


--------------------------------------------------------------------------------
/envs/maTTenv/agent_models.py:
--------------------------------------------------------------------------------
  1 | """Dynamic Object Models
  2 | 
  3 | AgentDoubleInt2D : Double Integrator Model in 2D
  4 |                    state: x,y,xdot,ydot
  5 | AgentSE2 : SE2 Model 
  6 |            state x,y,theta
  7 | 
  8 | Agent2DFixedPath : Model with a pre-defined path
  9 | Agent_InfoPlanner : Model from the InfoPlanner repository
 10 | 
 11 | SE2Dynamics : update dynamics function with a control input -- linear, angular velocities
 12 | SEDynamicsVel : update dynamics function for contant linear and angular velocities
 13 | 
 14 | edited by christopher-hsu from coco66 for multi_agent
 15 | """
 16 | 
 17 | import numpy as np
 18 | from envs.maTTenv.metadata import METADATA
 19 | import envs.maTTenv.util as util
 20 | 
 21 | class Agent(object):
 22 |     def __init__(self, agent_id, dim, sampling_period, limit, collision_func, margin=METADATA['margin']):
 23 |         self.agent_id = agent_id
 24 |         self.dim = dim
 25 |         self.sampling_period = sampling_period
 26 |         self.limit = limit
 27 |         self.collision_func = collision_func
 28 |         self.margin = margin
 29 | 
 30 |     def range_check(self):
 31 |         self.state = np.clip(self.state, self.limit[0], self.limit[1])
 32 | 
 33 |     def collision_check(self, pos):
 34 |         return self.collision_func(pos[:2])
 35 | 
 36 |     def margin_check(self, pos, target_pos):
 37 |         return any(np.sqrt(np.sum((pos - target_pos)**2, axis=1)) < self.margin) # no update 
 38 | 
 39 |     def reset(self, init_state):
 40 |         self.state = init_state
 41 | 
 42 | class AgentSE2(Agent):
 43 |     def __init__(self, agent_id, dim, sampling_period, limit, collision_func, 
 44 |                         margin=METADATA['margin'], policy=None):
 45 |         super().__init__(agent_id, dim, sampling_period, limit, collision_func, margin=margin)
 46 |         self.policy = policy
 47 | 
 48 |     def reset(self, init_state):
 49 |         super().reset(init_state)
 50 |         if self.policy:
 51 |             self.policy.reset(init_state)
 52 | 
 53 |     def update(self, control_input=None, margin_pos=None, col=False):
 54 |         """
 55 |         control_input : [linear_velocity, angular_velocity]
 56 |         margin_pos : a minimum distance to a target
 57 |         """
 58 |         if control_input is None:
 59 |             control_input = self.policy.get_control(self.state)
 60 | 
 61 |         new_state = SE2Dynamics(self.state, self.sampling_period, control_input) 
 62 |         is_col = 0 
 63 |         if self.collision_check(new_state[:2]):
 64 |             is_col = 1
 65 |             new_state[:2] = self.state[:2]
 66 |             if self.policy is not None:
 67 |                 # self.policy.collision(new_state)
 68 |                 corrected_policy = self.policy.collision(new_state)
 69 |                 if corrected_policy is not None:
 70 |                     new_state = SE2DynamicsVel(self.state,
 71 |                                         self.sampling_period, corrected_policy)
 72 | 
 73 |         elif margin_pos is not None:
 74 |             if self.margin_check(new_state[:2], margin_pos):
 75 |                 new_state[:2] = self.state[:2]
 76 |        
 77 |         self.state = new_state
 78 |         self.range_check()        
 79 | 
 80 |         return is_col
 81 | 
 82 | def SE2Dynamics(x, dt, u):  
 83 |     assert(len(x)==3)          
 84 |     tw = dt * u[1] 
 85 |     # Update the agent state 
 86 |     if abs(tw) < 0.001:
 87 |         diff = np.array([dt*u[0]*np.cos(x[2]+tw/2),
 88 |                 dt*u[0]*np.sin(x[2]+tw/2),
 89 |                 tw])
 90 |     else:
 91 |         diff = np.array([u[0]/u[1]*(np.sin(x[2]+tw) - np.sin(x[2])),
 92 |                 u[0]/u[1]*(np.cos(x[2]) - np.cos(x[2]+tw)),
 93 |                 tw])
 94 |     new_x = x + diff
 95 |     new_x[2] = util.wrap_around(new_x[2])
 96 |     return new_x
 97 | 
 98 | def SE2DynamicsVel(x, dt, u=None):  
 99 |     assert(len(x)==5) # x = [x,y,theta,v,w]
100 |     if u is None:
101 |         u = x[-2:]
102 |     odom = SE2Dynamics(x[:3], dt, u)
103 |     return np.concatenate((odom, u))
104 | 
105 | class AgentDoubleInt2D(Agent):
106 |     def __init__(self, agent_id, dim, sampling_period, limit, collision_func, 
107 |                     margin=METADATA['margin'], A=None, W=None):
108 |         super().__init__(agent_id, dim, sampling_period, limit, collision_func, margin=margin)
109 |         self.A = np.eye(self.dim) if A is None else A
110 |         self.W = W
111 | 
112 |     def update(self, margin_pos=None):
113 |         new_state = np.matmul(self.A, self.state)
114 |         if self.W is not None:
115 |             noise_sample = np.random.multivariate_normal(np.zeros(self.dim,), self.W)
116 |             new_state += noise_sample
117 |         if self.collision_check(new_state[:2]):
118 |             new_state = self.collision_control(new_state)
119 | 
120 |         self.state = new_state
121 |         # self.range_check()
122 | 
123 |     def collision_control(self, new_state):
124 |         new_state[0] = self.state[0]
125 |         new_state[1] = self.state[1]
126 |         if self.dim > 2:
127 |             new_state[2] = -2 * .2 * new_state[2] + np.random.normal(0.0, 0.2)#-0.001*np.sign(new_state[2])
128 |             new_state[3] = -2 * .2 * new_state[3] + np.random.normal(0.0, 0.2)#-0.001*np.sign(new_state[3])
129 |         return new_state
130 | 
131 | class AgentDoubleInt2D_Avoidance(AgentDoubleInt2D):
132 |     def __init__(self, agent_id, dim, sampling_period, limit, collision_func,
133 |                     margin=METADATA['margin'], A=None, W=None, obs_check_func=None):
134 |         super().__init__(agent_id, dim, sampling_period, limit,
135 |             collision_func, margin=margin, A=A, W=W)
136 |         self.obs_check_func = obs_check_func
137 | 
138 |     def update(self, margin_pos=None):
139 |         new_state = np.matmul(self.A, self.state[:self.dim])
140 |         if self.W is not None:
141 |             noise_sample = np.random.multivariate_normal(np.zeros(self.dim,), self.W)
142 |             new_state += noise_sample
143 | 
144 |         is_col = 0
145 |         if self.collision_check(new_state[:2]):
146 |             new_state = self.collision_control()
147 |             is_col = 1
148 | 
149 |         if self.obs_check_func is not None:
150 |             del_vx, del_vy = self.obstacle_detour_maneuver(
151 |                     r_margin=METADATA['target_speed_limit']*self.sampling_period*2)
152 |             new_state[2] += del_vx
153 |             new_state[3] += del_vy
154 | 
155 |         self.state = new_state
156 |         self.range_check()
157 |         return is_col
158 | 
159 |     def range_check(self):
160 |         """
161 |         Limit the position and the velocity.
162 |         self.limit[:][2] = self.limit[:][3] = speed limit. The velocity components
163 |         are clipped proportional to the original values.
164 |         """
165 |         self.state[:2] = np.clip(self.state[:2], self.limit[0][:2], self.limit[1][:2])
166 |         v_square = self.state[2:]**2
167 |         del_v = np.sum(v_square) - self.limit[1][2]**2
168 |         if del_v > 0.0:
169 |             self.state[2] = np.sign(self.state[2]) * np.sqrt(max(0.0,
170 |                 v_square[0] - del_v * v_square[0] / (v_square[0] + v_square[1])))
171 |             self.state[3] = np.sign(self.state[3]) * np.sqrt(max(0.0,
172 |                 v_square[1] - del_v * v_square[1] / (v_square[0] + v_square[1])))
173 | 
174 |     def collision_control(self):
175 |         """
176 |         Assigns a new velocity deviating the agent with an angle (pi/2, pi) from
177 |         the closest obstacle point.
178 |         """
179 |         odom = [self.state[0], self.state[1], np.arctan2(self.state[3], self.state[2])]
180 |         # obs_pos = self.obs_check_func(odom)
181 |         v = np.sqrt(np.sum(np.square(self.state[2:]))) + np.random.normal(0.0,1.0)
182 |         if obs_pos[1] >= 0:
183 |             th = obs_pos[1] - (1 + np.random.random()) * np.pi/2
184 |         else:
185 |             th = obs_pos[1] + (1 + np.random.random()) * np.pi/2
186 | 
187 |         state = np.array([self.state[0], self.state[1], v * np.cos(th + odom[2]), v * np.sin(th + odom[2])])
188 |         return state
189 | 
190 |     def obstacle_detour_maneuver(self, r_margin=1.0):
191 |         """
192 |         Returns del_vx, del_vy which will be added to the new state.
193 |         This provides a repultive force from the closest obstacle point based
194 |         on the current velocity, a linear distance, and an angular distance.
195 | 
196 |         Parameters:
197 |         ----------
198 |         r_margin : float. A margin from an obstalce that you want to consider
199 |         as the minimum distance the target can get close to the obstacle.
200 |         """
201 |         odom = [self.state[0], self.state[1], np.arctan2(self.state[3], self.state[2])]
202 |         # obs_pos = self.obs_check_func(odom)
203 |         speed = np.sqrt(np.sum(self.state[2:]**2))
204 |         rot_ang = np.pi/2 * (1. + 1./(1. + np.exp(-(speed-0.5*METADATA['target_speed_limit']))))
205 |         if obs_pos is not None:
206 |             acc = max(0.0, speed * np.cos(obs_pos[1])) / max(METADATA['margin2wall'], obs_pos[0] - r_margin)
207 |             th = obs_pos[1] - rot_ang if obs_pos[1] >= 0 else obs_pos[1] + rot_ang
208 |             del_vx = acc * np.cos(th + odom[2]) * self.sampling_period
209 |             del_vy = acc * np.sin(th + odom[2]) * self.sampling_period
210 |             return del_vx, del_vy
211 |         else:
212 |             return 0., 0.
213 | 
214 | 
215 | class Agent2DFixedPath(Agent):
216 |     def __init__(self, dim, sampling_period, limit, collision_func, path, margin=METADATA['margin']):
217 |         Agent.__init__(self, dim, sampling_period, limit, collision_func, margin=margin)
218 |         self.path = path
219 | 
220 |     def update(self, margin_pos=None):
221 |         # fixed policy for now
222 |         self.t += 1
223 |         new_state = np.concatenate((self.path[self.t][:2], self.path[self.t][-2:]))
224 |         if margin_pos is not None:
225 |             if type(margin_pos) != list:
226 |                 margin_pos = [margin_pos]
227 |             in_margin = False
228 |             while(True):
229 |                 in_margin = self.margin_check(new_state[:2], margin_pos)
230 |                 if in_margin:
231 |                     new_state[:2] = new_state[:2] + 0.01*(np.random.random((2,))-0.5)
232 |                 else:
233 |                     break
234 |         self.state = new_state
235 | 
236 |     def reset(self, init_state):
237 |         self.t = 0
238 |         self.state = np.concatenate((self.path[self.t][:2], self.path[self.t][-2:]))
239 | 
240 | 
241 | class AgentDoubleInt2D_Nonlinear(AgentDoubleInt2D):
242 |     def __init__(self, dim, sampling_period, limit, collision_func,
243 |                     margin=METADATA['margin'], A=None, W=None, obs_check_func=None):
244 |         AgentDoubleInt2D.__init__(self, dim, sampling_period, limit,
245 |             collision_func, margin=margin, A=A, W=W)
246 |         self.obs_check_func = obs_check_func
247 | 
248 |     def update(self, margin_pos=None):
249 |         new_state = np.matmul(self.A, self.state[:self.dim])
250 |         if self.W is not None:
251 |             noise_sample = np.random.multivariate_normal(np.zeros(self.dim,), self.W)
252 |             new_state += noise_sample
253 | 
254 |         is_col = 0
255 |         if self.collision_check(new_state[:2]):
256 |             new_state = self.collision_control()
257 |             is_col = 1
258 | 
259 |         if self.obs_check_func is not None:
260 |             del_vx, del_vy = self.obstacle_detour_maneuver(
261 |                     r_margin=METADATA['target_speed_limit']*self.sampling_period*2)
262 |             new_state[2] += del_vx
263 |             new_state[3] += del_vy
264 | 
265 |         self.state = new_state
266 |         self.range_check()
267 |         return is_col
268 | 
269 |     def range_check(self):
270 |         """
271 |         Limit the position and the velocity.
272 |         self.limit[:][2] = self.limit[:][3] = speed limit. The velocity components
273 |         are clipped proportional to the original values.
274 |         """
275 |         self.state[:2] = np.clip(self.state[:2], self.limit[0][:2], self.limit[1][:2])
276 |         v_square = self.state[2:]**2
277 |         del_v = np.sum(v_square) - self.limit[1][2]**2
278 |         if del_v > 0.0:
279 |             self.state[2] = np.sign(self.state[2]) * np.sqrt(max(0.0,
280 |                 v_square[0] - del_v * v_square[0] / (v_square[0] + v_square[1])))
281 |             self.state[3] = np.sign(self.state[3]) * np.sqrt(max(0.0,
282 |                 v_square[1] - del_v * v_square[1] / (v_square[0] + v_square[1])))
283 | 
284 |     def collision_control(self):
285 |         """
286 |         Assigns a new velocity deviating the agent with an angle (pi/2, pi) from
287 |         the closest obstacle point.
288 |         """
289 |         odom = [self.state[0], self.state[1], np.arctan2(self.state[3], self.state[2])]
290 |         obs_pos = self.obs_check_func(odom)
291 |         v = np.sqrt(np.sum(np.square(self.state[2:]))) + np.random.normal(0.0,1.0)
292 |         if obs_pos[1] >= 0:
293 |             th = obs_pos[1] - (1 + np.random.random()) * np.pi/2
294 |         else:
295 |             th = obs_pos[1] + (1 + np.random.random()) * np.pi/2
296 | 
297 |         state = np.array([self.state[0], self.state[1], v * np.cos(th + odom[2]), v * np.sin(th + odom[2])])
298 |         return state
299 | 
300 |     def obstacle_detour_maneuver(self, r_margin=1.0):
301 |         """
302 |         Returns del_vx, del_vy which will be added to the new state.
303 |         This provides a repultive force from the closest obstacle point based
304 |         on the current velocity, a linear distance, and an angular distance.
305 | 
306 |         Parameters:
307 |         ----------
308 |         r_margin : float. A margin from an obstalce that you want to consider
309 |         as the minimum distance the target can get close to the obstacle.
310 |         """
311 |         odom = [self.state[0], self.state[1], np.arctan2(self.state[3], self.state[2])]
312 |         obs_pos = self.obs_check_func(odom)
313 |         speed = np.sqrt(np.sum(self.state[2:]**2))
314 |         rot_ang = np.pi/2 * (1. + 1./(1. + np.exp(-(speed-0.5*METADATA['target_speed_limit']))))
315 |         if obs_pos is not None:
316 |             acc = max(0.0, speed * np.cos(obs_pos[1])) / max(METADATA['margin2wall'], obs_pos[0] - r_margin)
317 |             th = obs_pos[1] - rot_ang if obs_pos[1] >= 0 else obs_pos[1] + rot_ang
318 |             del_vx = acc * np.cos(th + odom[2]) * self.sampling_period
319 |             del_vy = acc * np.sin(th + odom[2]) * self.sampling_period
320 |             return del_vx, del_vy
321 |         else:
322 |             return 0., 0.


--------------------------------------------------------------------------------
/envs/maTTenv/belief_tracker.py:
--------------------------------------------------------------------------------
  1 | """Belief Trackers
  2 | 
  3 | KFbelief : Belief Update using Kalman Filter
  4 | UKFbelief : Belief Update using Unscented Kalman Filter using filterpy library
  5 | """
  6 | import numpy as np
  7 | from numpy import linalg as LA
  8 | import envs.maTTenv.util as util
  9 | 
 10 | from filterpy.kalman import JulierSigmaPoints, UnscentedKalmanFilter, ExtendedKalmanFilter
 11 | 
 12 | class KFbelief(object):
 13 |     """
 14 |     Kalman Filter for the target tracking problem.
 15 | 
 16 |     state : target state
 17 |     x : agent state
 18 |     z : observation (r, alpha)
 19 |     """
 20 |     def __init__(self, agent_id, dim, limit, dim_z=2, A=None, W=None,
 21 |                     obs_noise_func=None, collision_func=None):
 22 |         """
 23 |         dim : dimension of state
 24 |         limit : An array of two vectors.
 25 |                 limit[0] = minimum values for the state,
 26 |                 limit[1] = maximum value for the state
 27 |         dim_z : dimension of observation,
 28 |         A : state transition matrix
 29 |         W : state noise matrix
 30 |         obs_noise_func : observation noise matrix function of z
 31 |         collision_func : collision checking function
 32 |         """
 33 |         self.agent_id = agent_id
 34 |         self.dim = dim
 35 |         self.limit = limit
 36 |         self.A = np.eye(self.dim) if A is None else A
 37 |         self.W = W if W is not None else np.zeros((self.dim, self.dim))
 38 |         self.obs_noise_func = obs_noise_func
 39 |         self.collision_func = collision_func
 40 |         self.cov = np.eye(self.dim)
 41 | 
 42 |     def reset(self, init_state, init_cov):
 43 |         self.state = init_state
 44 |         self.cov = init_cov*np.eye(self.dim)
 45 | 
 46 |     def predict(self):
 47 |         # Prediction
 48 |         state_new = np.matmul(self.A, self.state)
 49 |         cov_new = np.matmul(np.matmul(self.A, self.cov), self.A.T) +  self.W
 50 |         if True: # LA.det(cov_new) < 1e6:
 51 |             self.cov = cov_new
 52 |         self.state = np.clip(state_new, self.limit[0], self.limit[1])
 53 | 
 54 |     def update(self, z_t, x_t):
 55 |         """
 56 |         Parameters
 57 |         --------
 58 |         z_t : observation - radial and angular distances from the agent.
 59 |         x_t : agent state (x, y, orientation) in the global frame.
 60 |         """
 61 |         # Kalman Filter Update
 62 |         r_pred, alpha_pred = util.relative_distance_polar(
 63 |                         self.state[:2], x_t[:2], x_t[2])
 64 |         diff_pred = self.state[:2] - x_t[:2]
 65 |         if self.dim == 2:
 66 |             Hmat = np.array([[diff_pred[0],diff_pred[1]],
 67 |                         [-diff_pred[1]/r_pred, diff_pred[0]/r_pred]])/r_pred
 68 |         elif self.dim == 4:
 69 |             Hmat = np.array([[diff_pred[0], diff_pred[1], 0.0, 0.0],
 70 |               [-diff_pred[1]/r_pred, diff_pred[0]/r_pred, 0.0, 0.0]])/r_pred
 71 |         else:
 72 |             raise ValueError('target dimension for KF must be either 2 or 4')
 73 |         innov = z_t - np.array([r_pred, alpha_pred])
 74 |         innov[1] = util.wrap_around(innov[1])
 75 | 
 76 |         R = np.matmul(np.matmul(Hmat, self.cov), Hmat.T) \
 77 |                                 + self.obs_noise_func((r_pred, alpha_pred))
 78 |         K = np.matmul(np.matmul(self.cov, Hmat.T), LA.inv(R))
 79 |         C = np.eye(self.dim) - np.matmul(K, Hmat)
 80 | 
 81 |         cov_new = np.matmul(C, self.cov)
 82 |         state_new = self.state +  np.matmul(K, innov)
 83 | 
 84 |         if True: #LA.det(cov_new) < 1e6:
 85 |             self.cov = cov_new
 86 |         self.state = np.clip(state_new, self.limit[0], self.limit[1])
 87 | 
 88 | 
 89 | class UKFbelief(object):
 90 |     """
 91 |     Unscented Kalman Filter from filterpy
 92 |     """
 93 |     def __init__(self, dim, limit, dim_z=2, fx=None, W=None, obs_noise_func=None,
 94 |                     collision_func=None, sampling_period=0.5, kappa=1):
 95 |         """
 96 |         dim : dimension of state
 97 |             ***Assuming dim==3: (x,y,theta), dim==4: (x,y,xdot,ydot), dim==5: (x,y,theta,v,w)
 98 |         limit : An array of two vectors. limit[0] = minimum values for the state,
 99 |                                             limit[1] = maximum value for the state
100 |         dim_z : dimension of observation,
101 |         fx : x_tp1 = fx(x_t, dt), state dynamic function
102 |         W : state noise matrix
103 |         obs_noise_func : observation noise matrix function of z
104 |         collision_func : collision checking function
105 |         n : the number of sigma points
106 |         """
107 |         self.dim = dim
108 |         self.limit = limit
109 |         self.W = W if W is not None else np.zeros((self.dim, self.dim))
110 |         self.obs_noise_func = obs_noise_func
111 |         self.collision_func = collision_func
112 | 
113 |         def hx(y, agent_state, measure_func=util.relative_distance_polar):
114 |             r_pred, alpha_pred = measure_func(y[:2], agent_state[:2],
115 |                                                                 agent_state[2])
116 |             return np.array([r_pred, alpha_pred])
117 | 
118 |         def x_mean_fn_(sigmas, Wm):
119 |             if dim == 3:
120 |                 x = np.zeros(dim)
121 |                 sum_sin, sum_cos = 0., 0.
122 |                 for i in range(len(sigmas)):
123 |                     s = sigmas[i]
124 |                     x[0] += s[0] * Wm[i]
125 |                     x[1] += s[1] * Wm[i]
126 |                     sum_sin += np.sin(s[2])*Wm[i]
127 |                     sum_cos += np.cos(s[2])*Wm[i]
128 |                 x[2] = np.arctan2(sum_sin, sum_cos)
129 |                 return x
130 |             elif dim == 5:
131 |                 x = np.zeros(dim)
132 |                 sum_sin, sum_cos = 0., 0.
133 |                 for i in range(len(sigmas)):
134 |                     s = sigmas[i]
135 |                     x[0] += s[0] * Wm[i]
136 |                     x[1] += s[1] * Wm[i]
137 |                     x[3] += s[3] * Wm[i]
138 |                     x[4] += s[4] * Wm[i]
139 |                     sum_sin += np.sin(s[2])*Wm[i]
140 |                     sum_cos += np.cos(s[2])*Wm[i]
141 |                 x[2] = np.arctan2(sum_sin, sum_cos)
142 |                 return x
143 |             else:
144 |                 return None
145 | 
146 |         def z_mean_fn_(sigmas, Wm):
147 |             x = np.zeros(dim_z)
148 |             sum_sin, sum_cos = 0., 0.
149 |             for i in range(len(sigmas)):
150 |                 s = sigmas[i]
151 |                 x[0] += s[0] * Wm[i]
152 |                 sum_sin += np.sin(s[1])*Wm[i]
153 |                 sum_cos += np.cos(s[1])*Wm[i]
154 |             x[1] = np.arctan2(sum_sin, sum_cos)
155 |             return x
156 | 
157 |         def residual_x_(x, xp):
158 |             """
159 |             x : state, [x, y, theta]
160 |             xp : predicted state
161 |             """
162 |             if dim == 3 or dim == 5:
163 |                 r_x = x - xp
164 |                 r_x[2] = util.wrap_around(r_x[2])
165 |                 return r_x
166 |             else:
167 |                 return None
168 | 
169 |         def residual_z_(z, zp):
170 |             """
171 |             z : observation, [r, alpha]
172 |             zp : predicted observation
173 |             """
174 |             r_z = z - zp
175 |             r_z[1] = util.wrap_around(r_z[1])
176 |             return r_z
177 | 
178 |         sigmas = JulierSigmaPoints(n=dim, kappa=kappa)
179 | 
180 |         self.ukf = UnscentedKalmanFilter(dim, dim_z, sampling_period, fx=fx,
181 |                     hx=hx, points=sigmas, x_mean_fn=x_mean_fn_,
182 |                     z_mean_fn=z_mean_fn_, residual_x=residual_x_,
183 |                     residual_z=residual_z_)
184 | 
185 |     def reset(self, init_state, init_cov):
186 |         self.state = init_state
187 |         self.cov = init_cov*np.eye(self.dim)
188 |         self.ukf.x = self.state
189 |         self.ukf.P = self.cov
190 |         self.ukf.Q = self.W # process noise matrix
191 | 
192 |     def update(self, observed, z_t, x_t, u_t=None):
193 |         # Kalman Filter Update
194 |         self.ukf.predict(u=u_t)
195 | 
196 |         if observed:
197 |             r_pred, alpha_pred = util.relative_distance_polar(self.ukf.x[:2], x_t[:2], x_t[2])
198 |             self.ukf.update(z_t, R=self.obs_noise_func((r_pred, alpha_pred)),
199 |                                 agent_state=x_t)
200 | 
201 |         cov_new = self.ukf.P
202 |         state_new = self.ukf.x
203 | 
204 |         if LA.det(cov_new) < 1e6:
205 |             self.cov = cov_new
206 |         if not(self.collision_func(state_new[:2])):
207 |             self.state = np.clip(state_new, self.limit[0], self.limit[1])
208 | 


--------------------------------------------------------------------------------
/envs/maTTenv/display_wrapper.py:
--------------------------------------------------------------------------------
  1 | """
  2 | edited by christopher-hsu from coco66 for multi_agent
  3 | """
  4 | from gym import Wrapper
  5 | import numpy as np
  6 | from numpy import linalg as LA
  7 | 
  8 | import pdb, os
  9 | import matplotlib
 10 | matplotlib.use('TkAgg')
 11 | from matplotlib import pyplot as plt
 12 | from matplotlib import patches
 13 | from matplotlib import animation
 14 | from envs.maTTenv.metadata import *
 15 | 
 16 | class Display2D(Wrapper):
 17 |     def __init__(self, env, figID = 0, skip = 1, confidence=0.95):
 18 |         super(Display2D,self).__init__(env)
 19 |         self.figID = figID # figID = 0 : train, figID = 1 : test
 20 |         """ If used in run_maTracking to debug: self.env_core = env
 21 |             if used in visualize_ma for normal use: self.env_core = env.env"""
 22 |         self.env_core = env.env
 23 |         self.bin = self.env_core.MAP.mapres
 24 |         if self.env_core.MAP.map is None:
 25 |             self.map = np.zeros(self.env_core.MAP.mapdim)
 26 |         else:
 27 |             self.map = self.env_core.MAP.map
 28 |         self.mapmin = self.env_core.MAP.mapmin
 29 |         self.mapmax = self.env_core.MAP.mapmax
 30 |         self.mapres = self.env_core.MAP.mapres
 31 |         self.fig = plt.figure(self.figID)
 32 |         self.n_frames = 0 
 33 |         self.skip = skip
 34 |         self.c_cf = np.sqrt(-2*np.log(1-confidence))
 35 | 
 36 |     def pos2map(self, obs, sd):
 37 |         x = obs[:,0]
 38 |         y = obs[:,1]
 39 | 
 40 |     def close(self):
 41 |         plt.close(self.fig)
 42 | 
 43 |     def render(self, mode='empty', record=False, traj_num=0, batch_outputs=None):
 44 |         if not hasattr(self, 'traj'):
 45 |             raise ValueError('Must do a env.reset() first before calling env.render()')
 46 | 
 47 |         # num_agents = len(self.traj)
 48 |         num_agents = self.env_core.nb_agents
 49 |         if type(self.env_core.agents) == list:
 50 |             agent_pos = [self.env_core.agents[i].state for i in range(num_agents)]
 51 |         else:
 52 |             agent_pos = self.env_core.agents.state
 53 | 
 54 |         # num_targets = len(self.traj_y)
 55 |         num_targets = self.env_core.nb_targets
 56 |         if type(self.env_core.targets) == list:
 57 |             target_true_pos = [self.env_core.targets[i].state[:2] for i in range(num_targets)]
 58 |             target_b_state = [self.env_core.belief_targets[i].state for i in range(num_targets)] # state[3:5]
 59 |             target_cov = [self.env_core.belief_targets[i].cov for i in range(num_targets)]
 60 |         else:
 61 |             target_true_pos = self.env_core.targets.state[:,:2]
 62 |             target_b_state = self.env_core.belief_targets.state[:,:2]  # state[3:5]
 63 |             target_cov = self.env_core.belief_targets.cov 
 64 | 
 65 |         if self.n_frames%self.skip == 0:
 66 |             self.fig.clf()       
 67 |             ax = self.fig.subplots()
 68 |             im = None
 69 |             if mode == 'empty':
 70 |                 im = ax.imshow(self.map, cmap='gray_r', origin='lower',
 71 |                     extent=[self.mapmin[0], self.mapmax[0], self.mapmin[1], self.mapmax[1]])
 72 | 
 73 |             for ii in range(num_agents):
 74 |                 #agents positions
 75 |                 ax.plot(agent_pos[ii][0], agent_pos[ii][1], marker=(3, 0, agent_pos[ii][2]/np.pi*180-90),
 76 |                     markersize=10, linestyle='None', markerfacecolor='b', markeredgecolor='b')
 77 |                 ax.plot(self.traj[ii][0], self.traj[ii][1], 'b.', markersize=2)
 78 |                 #agents sensor indicators
 79 |                 sensor_arc = patches.Arc((agent_pos[ii][0], agent_pos[ii][1]), METADATA['sensor_r']*2, METADATA['sensor_r']*2, 
 80 |                     angle = agent_pos[ii][2]/np.pi*180, theta1 = -METADATA['fov']/2, theta2 = METADATA['fov']/2, facecolor='gray')
 81 |                 ax.add_patch(sensor_arc)
 82 |                 ax.plot([agent_pos[ii][0], agent_pos[ii][0]+METADATA['sensor_r']*np.cos(agent_pos[ii][2]+0.5*METADATA['fov']/180.0*np.pi)],
 83 |                     [agent_pos[ii][1], agent_pos[ii][1]+METADATA['sensor_r']*np.sin(agent_pos[ii][2]+0.5*METADATA['fov']/180.0*np.pi)],'k', linewidth=0.5)
 84 |                 ax.plot([agent_pos[ii][0], agent_pos[ii][0]+METADATA['sensor_r']*np.cos(agent_pos[ii][2]-0.5*METADATA['fov']/180.0*np.pi)],
 85 |                     [agent_pos[ii][1], agent_pos[ii][1]+METADATA['sensor_r']*np.sin(agent_pos[ii][2]-0.5*METADATA['fov']/180.0*np.pi)],'k', linewidth=0.5)
 86 |                 self.traj[ii][0].append(agent_pos[ii][0])
 87 |                 self.traj[ii][1].append(agent_pos[ii][1])
 88 | 
 89 |             for jj in range(num_targets):
 90 |                 ax.plot(self.traj_y[jj][0], self.traj_y[jj][1], 'r.', markersize=2)
 91 |                 ax.plot(target_true_pos[jj][0], target_true_pos[jj][1], marker='o', markersize=5, 
 92 |                     linestyle='None', markerfacecolor='r', markeredgecolor='r')
 93 |                 # Belief on target
 94 |                 ax.plot(target_b_state[jj][0], target_b_state[jj][1], marker='o', markersize=10, 
 95 |                     linewidth=5, markerfacecolor='none', markeredgecolor='g')
 96 |                 eig_val, eig_vec = LA.eig(target_cov[jj][:2,:2])
 97 |                 belief_target = patches.Ellipse((target_b_state[jj][0], target_b_state[jj][1]), 
 98 |                             2*np.sqrt(eig_val[0])*self.c_cf, 2*np.sqrt(eig_val[1])*self.c_cf, 
 99 |                             angle = 180/np.pi*np.arctan2(eig_vec[0][1],eig_vec[0][0]) ,fill=True,
100 |                             zorder=2, facecolor='g', alpha=0.5)
101 |                 ax.add_patch(belief_target)
102 |                 self.traj_y[jj][0].append(target_true_pos[jj][0])
103 |                 self.traj_y[jj][1].append(target_true_pos[jj][1])
104 |             
105 |             ax.set_xlim((self.mapmin[0], self.mapmax[0]))
106 |             ax.set_ylim((self.mapmin[1], self.mapmax[1]))
107 |             ax.set_aspect('equal','box')
108 |             ax.grid()
109 |             ax.set_title(' '.join([mode.upper(),': Trajectory',str(traj_num)]))
110 | 
111 |             if not record :
112 |                 plt.draw()
113 |                 plt.pause(0.0005)
114 |         self.n_frames += 1 
115 | 
116 |     def reset(self, **kwargs):
117 |         self.traj = [[[],[]]]*self.env_core.num_agents
118 |         self.traj_y = [[[],[]]]*self.env_core.num_targets
119 |         return self.env.reset(**kwargs)
120 | 
121 | class Video2D(Wrapper):
122 |     def __init__(self, env, dirname = '', skip = 1, dpi=80, local_view=False):
123 |         super(Video2D, self).__init__(env)
124 |         self.local_view = local_view
125 |         self.skip = skip
126 |         self.moviewriter = animation.FFMpegWriter()
127 |         fname = os.path.join(dirname, 'eval_%da%dt.mp4'%(env.nb_agents,env.nb_targets))
128 |         self.moviewriter.setup(fig=env.fig, outfile=fname, dpi=dpi)
129 |         if self.local_view:
130 |             self.moviewriter0 = animation.FFMpegWriter()
131 |             fname0 = os.path.join(dirname, 'train_local_%d.mp4'%np.random.randint(0,20))
132 |             self.moviewriter0.setup(fig=env.fig0, outfile=fname0, dpi=dpi)
133 |         self.n_frames = 0
134 | 
135 |     def render(self, *args, **kwargs):
136 |         if self.n_frames % self.skip == 0:
137 |         #if traj_num % self.skip == 0:
138 |             self.env.render(record=True, *args, **kwargs)
139 |         self.moviewriter.grab_frame()
140 |         if self.local_view:
141 |             self.moviewriter0.grab_frame()
142 |         self.n_frames += 1
143 | 
144 |     def reset(self, **kwargs):
145 |         return self.env.reset(**kwargs)
146 | 
147 | 


--------------------------------------------------------------------------------
/envs/maTTenv/env/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grasp-lyrl/scalableMARL/01642fb5275494694db48deb9bbd66fab82ed3a5/envs/maTTenv/env/__init__.py


--------------------------------------------------------------------------------
/envs/maTTenv/env/maTracking_Base.py:
--------------------------------------------------------------------------------
  1 | import os, copy, pdb
  2 | import numpy as np
  3 | from numpy import linalg as LA
  4 | import gym
  5 | from gym import spaces, logger
  6 | from gym.utils import seeding
  7 | from envs.maTTenv.maps import map_utils
  8 | import envs.maTTenv.util as util 
  9 | from envs.maTTenv.metadata import METADATA
 10 | 
 11 | 
 12 | class maTrackingBase(gym.Env):
 13 |     def __init__(self, num_agents=2, num_targets=1, map_name='empty',
 14 |                         is_training=True, known_noise=True, **kwargs):
 15 |         self.seed()   #used with gym
 16 |         self.id = 'maTracking-base'
 17 |         self.action_space = spaces.Discrete(len(METADATA['action_v']) * \
 18 |                                                 len(METADATA['action_w']))
 19 |         self.action_map = {}
 20 |         for (i,v) in enumerate(METADATA['action_v']):
 21 |             for (j,w) in enumerate(METADATA['action_w']):
 22 |                 self.action_map[len(METADATA['action_w'])*i+j] = (v,w)
 23 |         assert(len(self.action_map.keys())==self.action_space.n)
 24 | 
 25 |         self.agent_dim = 3
 26 |         self.target_dim = 2
 27 |         self.num_agents = num_agents
 28 |         self.nb_agents = num_agents
 29 |         self.num_targets = num_targets
 30 |         self.nb_targets = num_targets
 31 |         self.viewer = None
 32 |         self.is_training = is_training
 33 | 
 34 |         self.sampling_period = 0.5 # sec
 35 |         self.sensor_r_sd = METADATA['sensor_r_sd']
 36 |         self.sensor_b_sd = METADATA['sensor_b_sd']
 37 |         self.sensor_r = METADATA['sensor_r']
 38 |         self.fov = METADATA['fov']
 39 |         map_dir_path = '/'.join(map_utils.__file__.split('/')[:-1])
 40 |         self.MAP = map_utils.GridMap(
 41 |             map_path=os.path.join(map_dir_path, map_name), 
 42 |             r_max = self.sensor_r, fov = self.fov/180.0*np.pi,
 43 |             margin2wall = METADATA['margin2wall'])
 44 | 
 45 |         self.agent_init_pos =  np.array([self.MAP.origin[0], self.MAP.origin[1], 0.0])
 46 |         self.target_init_pos = np.array(self.MAP.origin)
 47 |         self.target_init_cov = METADATA['target_init_cov']
 48 | 
 49 |         self.reset_num = 0
 50 |         #needed for gym/core.py wrappers
 51 |         self.metadata = {'render.modes': []}
 52 |         self.reward_range = (-float('inf'), float('inf'))
 53 |         self.spec = None
 54 | 
 55 |     def seed(self, seed=None):
 56 |         '''EXTREMELY IMPORTANT for reproducability in env randomness
 57 |         RNG is set and every call of the function will produce the same results
 58 |         '''
 59 |         self.np_random, seed = seeding.np_random(seed)
 60 |         return [seed]
 61 | 
 62 |     def setup_agents(self):
 63 |         """Construct all the agents for the environment"""
 64 |         raise NotImplementedError
 65 | 
 66 |     def setup_targets(self):
 67 |         """Construct all the targets for the environment"""
 68 |         raise NotImplementedError
 69 | 
 70 |     def setup_belief_targets(self):
 71 |         """Construct all the target beliefs for the environment"""
 72 |         raise NotImplementedError
 73 | 
 74 |     def reset(self, init_random=True):
 75 |         """Reset the state of the environment."""
 76 |         raise NotImplementedError
 77 | 
 78 |     def step(self, action_dict):
 79 |         """Takes in dict of action and coverts them to map updates (obs, rewards)"""
 80 |         raise NotImplementedError
 81 | 
 82 |     def observation(self, target, agent):
 83 |         r, alpha = util.relative_distance_polar(target.state[:2],
 84 |                                             xy_base=agent.state[:2], 
 85 |                                             theta_base=agent.state[2])    
 86 |         observed = (r <= self.sensor_r) \
 87 |                     & (abs(alpha) <= self.fov/2/180*np.pi) \
 88 |                     & (not(map_utils.is_blocked(self.MAP, agent.state, target.state)))
 89 |         z = None
 90 |         if observed:
 91 |             z = np.array([r, alpha])
 92 |             # z += np.random.multivariate_normal(np.zeros(2,), self.observation_noise(z))
 93 |             z += self.np_random.multivariate_normal(np.zeros(2,), self.observation_noise(z))
 94 |         '''For some reason, self.np_random is needed only here instead of np.random in order for the 
 95 |         RNG seed to work, if used in the gen_rand_pose functions RNG seed will NOT work '''
 96 |         return observed, z
 97 | 
 98 |     def observation_noise(self, z):
 99 |         obs_noise_cov = np.array([[self.sensor_r_sd * self.sensor_r_sd, 0.0], #z[0]/self.sensor_r * self.sensor_r_sd, 0.0], 
100 |                                 [0.0, self.sensor_b_sd * self.sensor_b_sd]])
101 |         return obs_noise_cov
102 | 
103 |     def get_reward(self, obstacles_pt, observed, is_training=True):
104 |         return reward_fun(self.belief_targets, is_training)
105 | 
106 |     def gen_rand_pose(self, o_xy, c_theta, min_lin_dist, max_lin_dist, min_ang_dist, max_ang_dist):
107 |         """Generates random position and yaw.
108 |         Parameters
109 |         --------
110 |         o_xy : xy position of a point in the global frame which we compute a distance from.
111 |         c_theta : angular position of a point in the global frame which we compute an angular distance from.
112 |         min_lin_dist : the minimum linear distance from o_xy to a sample point.
113 |         max_lin_dist : the maximum linear distance from o_xy to a sample point.
114 |         min_ang_dist : the minimum angular distance (counter clockwise direction) from c_theta to a sample point.
115 |         max_ang_dist : the maximum angular distance (counter clockwise direction) from c_theta to a sample point.
116 |         """
117 |         if max_ang_dist < min_ang_dist:
118 |             max_ang_dist += 2*np.pi
119 |         rand_ang = util.wrap_around(np.random.rand() * \
120 |                         (max_ang_dist - min_ang_dist) + min_ang_dist + c_theta)
121 | 
122 |         rand_r = np.random.rand() * (max_lin_dist - min_lin_dist) + min_lin_dist
123 |         rand_xy = np.array([rand_r*np.cos(rand_ang), rand_r*np.sin(rand_ang)]) + o_xy
124 |         is_valid = not(map_utils.is_collision(self.MAP, rand_xy))
125 |         return is_valid, [rand_xy[0], rand_xy[1], rand_ang]
126 | 
127 |     def get_init_pose(self, init_pose_list=[], **kwargs):
128 |         """Generates initial positions for the agent, targets, and target beliefs.
129 |         Parameters
130 |         ---------
131 |         init_pose_list : a list of dictionaries with pre-defined initial positions.
132 |         lin_dist_range_target : a tuple of the minimum and maximum distance of a 
133 |                             target from the agent.
134 |         ang_dist_range_target : a tuple of the minimum and maximum angular
135 |                             distance (counter clockwise) of a target from the
136 |                             agent. -pi <= x <= pi
137 |         lin_dist_range_belief : a tuple of the minimum and maximum distance of a 
138 |                             belief from the target.
139 |         ang_dist_range_belief : a tuple of the minimum and maximum angular
140 |                             distance (counter clockwise) of a belief from the
141 |                             target. -pi <= x <= pi
142 |         blocked : True if there is an obstacle between a target and the agent.
143 |         """
144 |         if init_pose_list:
145 |             self.reset_num += 1
146 |             return init_pose_list[self.reset_num-1]
147 |         else:
148 |             return self.get_init_pose_random(**kwargs)
149 | 
150 |     def get_init_pose_random(self,
151 |                             lin_dist_range_target=(METADATA['init_distance_min'], METADATA['init_distance_max']),
152 |                             ang_dist_range_target=(-np.pi, np.pi),
153 |                             lin_dist_range_belief=(METADATA['init_belief_distance_min'], METADATA['init_belief_distance_max']),
154 |                             ang_dist_range_belief=(-np.pi, np.pi),
155 |                             blocked=False,
156 |                             **kwargs):
157 |         is_agent_valid = False
158 |         init_pose = {}
159 |         init_pose['agents'] = []
160 |         for ii in range(self.nb_agents):
161 |             is_agent_valid = False
162 |             if self.MAP.map is None and ii==0:
163 |                 if blocked:
164 |                     raise ValueError('Unable to find a blocked initial condition. There is no obstacle in this map.')
165 |                 a_init = self.agent_init_pos[:2]
166 |             else:
167 |                 while(not is_agent_valid):
168 |                     a_init = np.random.random((2,)) * (self.MAP.mapmax-self.MAP.mapmin) + self.MAP.mapmin
169 |                     is_agent_valid = not(map_utils.is_collision(self.MAP, a_init))
170 |             init_pose_agent = [a_init[0], a_init[1], np.random.random() * 2 * np.pi - np.pi]
171 |             init_pose['agents'].append(init_pose_agent)
172 | 
173 |         init_pose['targets'], init_pose['belief_targets'] = [], []
174 |         for jj in range(self.nb_targets):
175 |             is_target_valid = False
176 |             while(not is_target_valid):
177 |                 rand_agent = np.random.randint(self.nb_agents)
178 |                 is_target_valid, init_pose_target = self.gen_rand_pose(
179 |                     init_pose['agents'][rand_agent][:2], init_pose['agents'][rand_agent][2],
180 |                     lin_dist_range_target[0], lin_dist_range_target[1],
181 |                     ang_dist_range_target[0], ang_dist_range_target[1])
182 |             init_pose['targets'].append(init_pose_target)
183 | 
184 |             is_belief_valid, init_pose_belief = False, np.zeros((2,))
185 |             while((not is_belief_valid) and is_target_valid):
186 |                 is_belief_valid, init_pose_belief = self.gen_rand_pose(
187 |                     init_pose['targets'][jj][:2], init_pose['targets'][jj][2],
188 |                     lin_dist_range_belief[0], lin_dist_range_belief[1],
189 |                     ang_dist_range_belief[0], ang_dist_range_belief[1])
190 |             init_pose['belief_targets'].append(init_pose_belief)
191 |         return init_pose
192 | 
193 | def reward_fun(belief_targets, is_training=True, c_mean=0.1):
194 | 
195 |     detcov = [LA.det(b_target.cov) for b_target in belief_targets]
196 |     r_detcov_mean = - np.mean(np.log(detcov))
197 |     reward = c_mean * r_detcov_mean
198 | 
199 |     mean_nlogdetcov = None
200 |     if not(is_training):
201 |         logdetcov = [np.log(LA.det(b_target.cov)) for b_target in belief_targets]
202 |         mean_nlogdetcov = -np.mean(logdetcov)
203 |     return reward, False, mean_nlogdetcov


--------------------------------------------------------------------------------
/envs/maTTenv/env/setTracking_model.py:
--------------------------------------------------------------------------------
  1 | import os, copy, pdb
  2 | import numpy as np
  3 | from numpy import linalg as LA
  4 | from gym import spaces, logger
  5 | from envs.maTTenv.maps import map_utils
  6 | import envs.maTTenv.util as util 
  7 | from envs.maTTenv.agent_models import *
  8 | from envs.maTTenv.belief_tracker import KFbelief
  9 | from envs.maTTenv.metadata import METADATA
 10 | from envs.maTTenv.env.maTracking_Base import maTrackingBase
 11 | 
 12 | """
 13 | Target Tracking Environments for Reinforcement Learning.
 14 | [Variables]
 15 | 
 16 | d: radial coordinate of a belief target in the learner frame
 17 | alpha : angular coordinate of a belief target in the learner frame
 18 | ddot : radial velocity of a belief target in the learner frame
 19 | alphadot : angular velocity of a belief target in the learner frame
 20 | Sigma : Covariance of a belief target
 21 | 
 22 | [Environment Description]
 23 | Varying number of agents, varying number of randomly moving targets
 24 | No obstacles
 25 | Greedy target assignment, k targets are shown to agent.
 26 | 
 27 | setTrackingEnvkGreedy : Double Integrator Target model with KF belief tracker
 28 |     obs state: [d, alpha, ddot, alphadot, logdet(Sigma), observed] *nb_targets
 29 |             where nb_targets and nb_agents vary between a range
 30 |             num_targets describes the upperbound on possible number of targets in env
 31 |             num_agents describes the upperbound on possible number of agents in env
 32 |     Target : Double Integrator model, [x,y,xdot,ydot]
 33 |     Belief Target : KF, Double Integrator model
 34 | 
 35 | """
 36 | 
 37 | class setTrackingEnvModel(maTrackingBase):
 38 | 
 39 |     def __init__(self, num_agents=1, num_targets=2, map_name='empty', 
 40 |                         is_training=True, known_noise=True, **kwargs):
 41 |         super().__init__(num_agents=num_agents, num_targets=num_targets,
 42 |                         map_name=map_name, is_training=is_training)
 43 | 
 44 |         self.id = 'setTracking-model'
 45 |         ###=======================================================###
 46 |         # Assign agents to closest k targets, if less targets than k, consider all targets
 47 |         self.k = 4
 48 |         ###=======================================================###
 49 |         self.nb_agents = num_agents #only for init, will change with reset()
 50 |         self.nb_targets = num_targets #only for init, will change with reset()
 51 |         self.agent_dim = 3
 52 |         self.target_dim = 4
 53 |         self.target_init_vel = METADATA['target_init_vel']*np.ones((2,))
 54 |         # LIMIT
 55 |         self.limit = {} # 0: low, 1:highs
 56 |         self.limit['agent'] = [np.concatenate((self.MAP.mapmin,[-np.pi])), np.concatenate((self.MAP.mapmax, [np.pi]))]
 57 |         self.limit['target'] = [np.concatenate((self.MAP.mapmin,[-METADATA['target_vel_limit'], -METADATA['target_vel_limit']])),
 58 |                                 np.concatenate((self.MAP.mapmax, [METADATA['target_vel_limit'], METADATA['target_vel_limit']]))]
 59 |         rel_vel_limit = METADATA['target_vel_limit'] + METADATA['action_v'][0] # Maximum relative speed
 60 |         self.limit['state'] = [np.array(([0.0, -np.pi, -rel_vel_limit, -10*np.pi, -50.0, 0.0, 0.0, -np.pi ])),
 61 |                                np.array(([600.0, np.pi, rel_vel_limit, 10*np.pi, 50.0, 2.0, self.sensor_r, np.pi]))]
 62 |         self.observation_space = spaces.Box(self.limit['state'][0], self.limit['state'][1], dtype=np.float32)
 63 |         self.targetA = np.concatenate((np.concatenate((np.eye(2), self.sampling_period*np.eye(2)), axis=1), 
 64 |                                         [[0,0,1,0],[0,0,0,1]]))
 65 |         self.target_noise_cov = METADATA['const_q'] * np.concatenate((
 66 |                         np.concatenate((self.sampling_period**3/3*np.eye(2), self.sampling_period**2/2*np.eye(2)), axis=1),
 67 |                         np.concatenate((self.sampling_period**2/2*np.eye(2), self.sampling_period*np.eye(2)),axis=1) ))
 68 |         if known_noise:
 69 |             self.target_true_noise_sd = self.target_noise_cov
 70 |         else:
 71 |             self.target_true_noise_sd = METADATA['const_q_true'] * np.concatenate((
 72 |                         np.concatenate((self.sampling_period**2/2*np.eye(2), self.sampling_period/2*np.eye(2)), axis=1),
 73 |                         np.concatenate((self.sampling_period/2*np.eye(2), self.sampling_period*np.eye(2)),axis=1) ))
 74 | 
 75 |         # Build a robot 
 76 |         self.setup_agents()
 77 |         # Build a target
 78 |         self.setup_targets()
 79 |         self.setup_belief_targets()
 80 |         # Use custom reward
 81 |         self.get_reward()
 82 | 
 83 |     def setup_agents(self):
 84 |         self.agents = [AgentSE2(agent_id = 'agent-' + str(i), 
 85 |                         dim=self.agent_dim, sampling_period=self.sampling_period, 
 86 |                         limit=self.limit['agent'], 
 87 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x))
 88 |                         for i in range(self.num_agents)]
 89 | 
 90 |     def setup_targets(self):
 91 |         self.targets = [AgentDoubleInt2D(agent_id = 'target-' + str(i),
 92 |                         dim=self.target_dim, sampling_period=self.sampling_period, 
 93 |                         limit=self.limit['target'],
 94 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x),
 95 |                         A=self.targetA, W=self.target_true_noise_sd) 
 96 |                         for i in range(self.num_targets)]
 97 | 
 98 |     def setup_belief_targets(self):
 99 |         self.belief_targets = [KFbelief(agent_id = 'target-' + str(i),
100 |                         dim=self.target_dim, limit=self.limit['target'], A=self.targetA,
101 |                         W=self.target_noise_cov, obs_noise_func=self.observation_noise, 
102 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x))
103 |                         for i in range(self.num_targets)]
104 | 
105 |     def get_reward(self, obstacles_pt=None, observed=None, is_training=True):
106 |         return reward_fun(self.nb_targets, self.belief_targets, is_training)
107 | 
108 |     def reset(self,**kwargs):
109 |         """
110 |         Random initialization a number of agents and targets at the reset of the env epsiode.
111 |         Agents are given random positions in the map, targets are given random positions near a random agent.
112 |         Return an observation state dict with agent ids (keys) that refer to their observation
113 |         """
114 |         try: 
115 |             self.nb_agents = kwargs['nb_agents']
116 |             self.nb_targets = kwargs['nb_targets']
117 |         except:
118 |             self.nb_agents = np.random.random_integers(1, self.num_agents)
119 |             self.nb_targets = np.random.random_integers(1, self.num_targets)
120 |         obs_dict = {}
121 |         # init_pose = self.get_init_pose(**kwargs)
122 |         
123 |         # Initialize agents
124 |         for ii in range(self.nb_agents):
125 |             self.agents[ii].reset(init_pose['agents'][ii])
126 |             obs_dict[self.agents[ii].agent_id] = []
127 | 
128 |         # Initialize targets and beliefs
129 |         for nn in range(self.nb_targets):
130 |             self.belief_targets[nn].reset(
131 |                         init_state=np.concatenate((init_pose['belief_targets'][nn][:2], np.zeros(2))),
132 |                         init_cov=self.target_init_cov)
133 |             self.targets[nn].reset(np.concatenate((init_pose['targets'][nn][:2], self.target_init_vel)))
134 |         # For nb agents calculate belief of targets assigned
135 |         for jj in range(self.nb_targets):
136 |             for kk in range(self.nb_agents):
137 |                 r, alpha = util.relative_distance_polar(self.belief_targets[jj].state[:2],
138 |                                             xy_base=self.agents[kk].state[:2], 
139 |                                             theta_base=self.agents[kk].state[2])
140 |                 logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
141 |                 obs_dict[self.agents[kk].agent_id].append([r, alpha, 0.0, 0.0, logdetcov, 0.0, self.sensor_r, np.pi])
142 | 
143 |         if self.nb_targets > self.k:
144 |             for agent_id in obs_dict:
145 |                 obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
146 |                 close = np.argpartition(obs_dict[agent_id][:,0], self.k)
147 |                 obs_dict[agent_id] = obs_dict[agent_id][close[:self.k]]
148 |         else:
149 |             for agent_id in obs_dict:
150 |                 obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
151 | 
152 |         return obs_dict
153 | 
154 |     def step(self, action_dict):
155 |         obs_dict = {}
156 |         reward_dict = {}
157 |         done_dict = {'__all__':False}
158 |         info_dict = {}
159 | 
160 |         # Targets move (t -> t+1)
161 |         for n in range(self.nb_targets):
162 |             # self.targets[n].update() 
163 |             self.belief_targets[n].predict() # Belief state at t+1
164 |         # Agents move (t -> t+1) and observe the targets
165 |         for ii, agent_id in enumerate(action_dict):
166 |             obs_dict[self.agents[ii].agent_id] = []
167 |             reward_dict[self.agents[ii].agent_id] = []
168 |             done_dict[self.agents[ii].agent_id] = []
169 | 
170 |             action_vw = self.action_map[action_dict[agent_id]]
171 | 
172 |             # Locations of all targets and agents in order to maintain a margin between them
173 |             margin_pos = [t.state[:2] for t in self.targets[:self.nb_targets]]
174 |             for p, ids in enumerate(action_dict):
175 |                 if agent_id != ids:
176 |                     margin_pos.append(np.array(self.agents[p].state[:2]))
177 |             _ = self.agents[ii].update(action_vw, margin_pos)
178 |             
179 |             # Target and map observations
180 |             observed = np.zeros(self.nb_targets, dtype=bool)
181 |             # obstacles_pt = map_utils.get_closest_obstacle(self.MAP, self.agents[ii].state)
182 |             # if obstacles_pt is None:
183 |             obstacles_pt = (self.sensor_r, np.pi)
184 | 
185 |             # Update beliefs of targets
186 |             for jj in range(self.nb_targets):
187 |                 # Observe
188 |                 obs, z_t = self.observation(self.targets[jj], self.agents[ii])
189 |                 observed[jj] = obs
190 |                 if obs: # if observed, update the target belief.
191 |                     self.belief_targets[jj].update(z_t, self.agents[ii].state)
192 | 
193 |                 r_b, alpha_b = util.relative_distance_polar(self.belief_targets[jj].state[:2],
194 |                                         xy_base=self.agents[ii].state[:2], 
195 |                                         theta_base=self.agents[ii].state[-1])
196 |                 r_dot_b, alpha_dot_b = util.relative_velocity_polar(
197 |                                         self.belief_targets[jj].state[:2],
198 |                                         self.belief_targets[jj].state[2:],
199 |                                         self.agents[ii].state[:2], self.agents[ii].state[-1],
200 |                                         action_vw[0], action_vw[1])
201 |                 obs_dict[agent_id].append([r_b, alpha_b, r_dot_b, alpha_dot_b,
202 |                                         np.log(LA.det(self.belief_targets[jj].cov)), float(obs), self.sensor_r, np.pi])
203 |         # Assign agents to closest k targets, if less targets than k, consider all targets
204 |         if self.nb_targets > self.k:
205 |             for agent_id in obs_dict:
206 |                 obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
207 |                 close = np.argpartition(obs_dict[agent_id][:,0], self.k)
208 |                 obs_dict[agent_id] = obs_dict[agent_id][close[:self.k]]
209 |         else:
210 |             for agent_id in obs_dict:
211 |                 obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
212 |         # Get all rewards after all agents and targets move (t -> t+1)
213 |         reward, done, mean_nlogdetcov = self.get_reward(obstacles_pt, observed, self.is_training)
214 |         reward_dict['__all__'], done_dict['__all__'], info_dict['mean_nlogdetcov'] = reward, done, mean_nlogdetcov
215 |         return obs_dict, reward_dict, done_dict, info_dict
216 | 
217 | def reward_fun(nb_targets, belief_targets, is_training=True, c_mean=0.1):
218 |     detcov = [LA.det(b_target.cov) for b_target in belief_targets[:nb_targets]]
219 |     r_detcov_mean = - np.mean(np.log(detcov))
220 |     reward = c_mean * r_detcov_mean
221 | 
222 |     mean_nlogdetcov = None
223 |     if not(is_training):
224 |         logdetcov = [np.log(LA.det(b_target.cov)) for b_target in belief_targets[:nb_targets]]
225 |         mean_nlogdetcov = -np.mean(logdetcov)
226 |     return reward, False, mean_nlogdetcov


--------------------------------------------------------------------------------
/envs/maTTenv/env/setTracking_v0.py:
--------------------------------------------------------------------------------
  1 | import os, copy, pdb
  2 | import numpy as np
  3 | from numpy import linalg as LA
  4 | from gym import spaces, logger
  5 | from envs.maTTenv.maps import map_utils
  6 | import envs.maTTenv.util as util 
  7 | from envs.maTTenv.agent_models import *
  8 | from envs.maTTenv.belief_tracker import KFbelief
  9 | from envs.maTTenv.metadata import METADATA
 10 | from envs.maTTenv.env.maTracking_Base import maTrackingBase
 11 | 
 12 | """
 13 | Target Tracking Environments for Reinforcement Learning.
 14 | [Variables]
 15 | 
 16 | d: radial coordinate of a belief target in the learner frame
 17 | alpha : angular coordinate of a belief target in the learner frame
 18 | ddot : radial velocity of a belief target in the learner frame
 19 | alphadot : angular velocity of a belief target in the learner frame
 20 | Sigma : Covariance of a belief target
 21 | 
 22 | [Environment Description]
 23 | Varying number of agents, varying number of randomly moving targets
 24 | No obstacles
 25 | 
 26 | setTrackingEnv0 : Double Integrator Target model with KF belief tracker
 27 |     obs state: [d, alpha, ddot, alphadot, logdet(Sigma), observed] *nb_targets
 28 |             where nb_targets and nb_agents vary between a range
 29 |             num_targets describes the upperbound on possible number of targets in env
 30 |             num_agents describes the upperbound on possible number of agents in env
 31 |     Target : Double Integrator model, [x,y,xdot,ydot]
 32 |     Belief Target : KF, Double Integrator model
 33 | 
 34 | """
 35 | 
 36 | class setTrackingEnv0(maTrackingBase):
 37 | 
 38 |     def __init__(self, num_agents=1, num_targets=2, map_name='empty', 
 39 |                         is_training=True, known_noise=True, **kwargs):
 40 |         super().__init__(num_agents=num_agents, num_targets=num_targets,
 41 |                         map_name=map_name, is_training=is_training)
 42 | 
 43 |         self.id = 'setTracking-v0'
 44 |         self.nb_agents = num_agents #only for init, will change with reset()
 45 |         self.nb_targets = num_targets #only for init, will change with reset()
 46 |         self.agent_dim = 3
 47 |         self.target_dim = 4
 48 |         self.target_init_vel = METADATA['target_init_vel']*np.ones((2,))
 49 |         # LIMIT
 50 |         self.limit = {} # 0: low, 1:highs
 51 |         self.limit['agent'] = [np.concatenate((self.MAP.mapmin,[-np.pi])), np.concatenate((self.MAP.mapmax, [np.pi]))]
 52 |         self.limit['target'] = [np.concatenate((self.MAP.mapmin,[-METADATA['target_vel_limit'], -METADATA['target_vel_limit']])),
 53 |                                 np.concatenate((self.MAP.mapmax, [METADATA['target_vel_limit'], METADATA['target_vel_limit']]))]
 54 |         rel_vel_limit = METADATA['target_vel_limit'] + METADATA['action_v'][0] # Maximum relative speed
 55 |         self.limit['state'] = [np.array(([0.0, -np.pi, -rel_vel_limit, -10*np.pi, -50.0, 0.0])),
 56 |                                np.array(([600.0, np.pi, rel_vel_limit, 10*np.pi, 50.0, 2.0]))]
 57 |         self.observation_space = spaces.Box(self.limit['state'][0], self.limit['state'][1], dtype=np.float32)
 58 |         self.targetA = np.concatenate((np.concatenate((np.eye(2), self.sampling_period*np.eye(2)), axis=1), 
 59 |                                         [[0,0,1,0],[0,0,0,1]]))
 60 |         self.target_noise_cov = METADATA['const_q'] * np.concatenate((
 61 |                         np.concatenate((self.sampling_period**3/3*np.eye(2), self.sampling_period**2/2*np.eye(2)), axis=1),
 62 |                         np.concatenate((self.sampling_period**2/2*np.eye(2), self.sampling_period*np.eye(2)),axis=1) ))
 63 |         if known_noise:
 64 |             self.target_true_noise_sd = self.target_noise_cov
 65 |         else:
 66 |             self.target_true_noise_sd = METADATA['const_q_true'] * np.concatenate((
 67 |                         np.concatenate((self.sampling_period**2/2*np.eye(2), self.sampling_period/2*np.eye(2)), axis=1),
 68 |                         np.concatenate((self.sampling_period/2*np.eye(2), self.sampling_period*np.eye(2)),axis=1) ))
 69 | 
 70 |         # Build a robot 
 71 |         self.setup_agents()
 72 |         # Build a target
 73 |         self.setup_targets()
 74 |         self.setup_belief_targets()
 75 |         # Use custom reward
 76 |         self.get_reward()
 77 | 
 78 |     def setup_agents(self):
 79 |         self.agents = [AgentSE2(agent_id = 'agent-' + str(i), 
 80 |                         dim=self.agent_dim, sampling_period=self.sampling_period, 
 81 |                         limit=self.limit['agent'], 
 82 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x))
 83 |                         for i in range(self.num_agents)]
 84 | 
 85 |     def setup_targets(self):
 86 |         self.targets = [AgentDoubleInt2D(agent_id = 'target-' + str(i),
 87 |                         dim=self.target_dim, sampling_period=self.sampling_period, 
 88 |                         limit=self.limit['target'],
 89 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x),
 90 |                         A=self.targetA, W=self.target_true_noise_sd) 
 91 |                         for i in range(self.num_targets)]
 92 | 
 93 |     def setup_belief_targets(self):
 94 |         self.belief_targets = [KFbelief(agent_id = 'target-' + str(i),
 95 |                         dim=self.target_dim, limit=self.limit['target'], A=self.targetA,
 96 |                         W=self.target_noise_cov, obs_noise_func=self.observation_noise, 
 97 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x))
 98 |                         for i in range(self.num_targets)]
 99 | 
100 |     def get_reward(self, observed=None, is_training=True):
101 |         return reward_fun(self.nb_targets, self.belief_targets, is_training)
102 | 
103 |     def reset(self,**kwargs):
104 |         """
105 |         Random initialization a number of agents and targets at the reset of the env epsiode.
106 |         Agents are given random positions in the map, targets are given random positions near a random agent.
107 |         Return an observation state dict with agent ids (keys) that refer to their observation
108 |         """
109 |         self.rng = np.random.default_rng()
110 |         try: 
111 |             self.nb_agents = kwargs['nb_agents']
112 |             self.nb_targets = kwargs['nb_targets']
113 |         except:
114 |             self.nb_agents = np.random.randint(1, self.num_agents)
115 |             self.nb_targets = np.random.randint(1, self.num_targets)
116 |         obs_dict = {}
117 |         init_pose = self.get_init_pose(**kwargs)
118 |         # Initialize agents
119 |         for ii in range(self.nb_agents):
120 |             self.agents[ii].reset(init_pose['agents'][ii])
121 |             obs_dict[self.agents[ii].agent_id] = []
122 | 
123 |         # Initialize targets and beliefs
124 |         for nn in range(self.nb_targets):
125 |             self.belief_targets[nn].reset(
126 |                         init_state=np.concatenate((init_pose['belief_targets'][nn][:2], np.zeros(2))),
127 |                         init_cov=self.target_init_cov)
128 |             self.targets[nn].reset(np.concatenate((init_pose['targets'][nn][:2], self.target_init_vel)))
129 |         # For nb agents calculate belief of targets assigned
130 |         for jj in range(self.nb_targets):
131 |             for kk in range(self.nb_agents):
132 |                 r, alpha = util.relative_distance_polar(self.belief_targets[jj].state[:2],
133 |                                             xy_base=self.agents[kk].state[:2], 
134 |                                             theta_base=self.agents[kk].state[2])
135 |                 logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
136 |                 obs_dict[self.agents[kk].agent_id].append([r, alpha, 0.0, 0.0, logdetcov, 0.0])
137 |         for agent_id in obs_dict:
138 |             obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
139 |         return obs_dict
140 | 
141 |     def step(self, action_dict):
142 |         obs_dict = {}
143 |         reward_dict = {}
144 |         done_dict = {'__all__':False}
145 |         info_dict = {}
146 | 
147 |         # Targets move (t -> t+1)
148 |         for n in range(self.nb_targets):
149 |             self.targets[n].update() 
150 |             self.belief_targets[n].predict() # Belief state at t+1
151 |         # Agents move (t -> t+1) and observe the targets
152 |         for ii, agent_id in enumerate(action_dict):
153 |             obs_dict[self.agents[ii].agent_id] = []
154 |             reward_dict[self.agents[ii].agent_id] = []
155 |             done_dict[self.agents[ii].agent_id] = []
156 | 
157 |             action_vw = self.action_map[action_dict[agent_id]]
158 | 
159 |             # Locations of all targets and agents in order to maintain a margin between them
160 |             margin_pos = [t.state[:2] for t in self.targets[:self.nb_targets]]
161 |             for p, ids in enumerate(action_dict):
162 |                 if agent_id != ids:
163 |                     margin_pos.append(np.array(self.agents[p].state[:2]))
164 |             _ = self.agents[ii].update(action_vw, margin_pos)
165 |             
166 |             # Target and map observations
167 |             observed = np.zeros(self.nb_targets, dtype=bool)
168 | 
169 |             # Update beliefs of targets
170 |             for jj in range(self.nb_targets):
171 |                 # Observe
172 |                 obs, z_t = self.observation(self.targets[jj], self.agents[ii])
173 |                 observed[jj] = obs
174 |                 if obs: # if observed, update the target belief.
175 |                     self.belief_targets[jj].update(z_t, self.agents[ii].state)
176 | 
177 |                 r_b, alpha_b = util.relative_distance_polar(self.belief_targets[jj].state[:2],
178 |                                         xy_base=self.agents[ii].state[:2], 
179 |                                         theta_base=self.agents[ii].state[-1])
180 |                 r_dot_b, alpha_dot_b = util.relative_velocity_polar(
181 |                                         self.belief_targets[jj].state[:2],
182 |                                         self.belief_targets[jj].state[2:],
183 |                                         self.agents[ii].state[:2], self.agents[ii].state[-1],
184 |                                         action_vw[0], action_vw[1])
185 |                 obs_dict[agent_id].append([r_b, alpha_b, r_dot_b, alpha_dot_b,
186 |                                         np.log(LA.det(self.belief_targets[jj].cov)), float(obs)])
187 |             obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
188 |             # shuffle obs to promote permutation invariance
189 |             self.rng.shuffle(obs_dict[agent_id])
190 |         # Get all rewards after all agents and targets move (t -> t+1)
191 |         reward, done, mean_nlogdetcov = self.get_reward(observed, self.is_training)
192 |         reward_dict['__all__'], done_dict['__all__'], info_dict['mean_nlogdetcov'] = reward, done, mean_nlogdetcov
193 |         return obs_dict, reward_dict, done_dict, info_dict
194 | 
195 | def reward_fun(nb_targets, belief_targets, is_training=True, c_mean=0.1):
196 |     detcov = [LA.det(b_target.cov) for b_target in belief_targets[:nb_targets]]
197 |     r_detcov_mean = - np.mean(np.log(detcov))
198 |     reward = c_mean * r_detcov_mean
199 | 
200 |     mean_nlogdetcov = None
201 |     if not(is_training):
202 |         logdetcov = [np.log(LA.det(b_target.cov)) for b_target in belief_targets[:nb_targets]]
203 |         mean_nlogdetcov = -np.mean(logdetcov)
204 |     return reward, False, mean_nlogdetcov


--------------------------------------------------------------------------------
/envs/maTTenv/env/setTracking_vGreedy.py:
--------------------------------------------------------------------------------
  1 | import os, copy, pdb
  2 | import numpy as np
  3 | from numpy import linalg as LA
  4 | from gym import spaces, logger
  5 | from envs.maTTenv.maps import map_utils
  6 | import envs.maTTenv.util as util 
  7 | from envs.maTTenv.agent_models import *
  8 | from envs.maTTenv.belief_tracker import KFbelief
  9 | from envs.maTTenv.metadata import METADATA
 10 | from envs.maTTenv.env.maTracking_Base import maTrackingBase
 11 | 
 12 | """
 13 | Target Tracking Environments for Reinforcement Learning.
 14 | [Variables]
 15 | 
 16 | d: radial coordinate of a belief target in the learner frame
 17 | alpha : angular coordinate of a belief target in the learner frame
 18 | ddot : radial velocity of a belief target in the learner frame
 19 | alphadot : angular velocity of a belief target in the learner frame
 20 | Sigma : Covariance of a belief target
 21 | o_d : linear distance to the closet obstacle point
 22 | o_alpha : angular distance to the closet obstacle point
 23 | 
 24 | [Environment Description]
 25 | Varying number of agents, varying number of randomly moving targets
 26 | No obstacles
 27 | VERY Greedy target assignment, always goes to the closest target
 28 | 
 29 | setTrackingEnvGreedy : Double Integrator Target model with KF belief tracker
 30 |     obs state: [d, alpha, ddot, alphadot, logdet(Sigma), observed] *nb_targets
 31 |             where nb_targets and nb_agents vary between a range
 32 |             num_targets describes the upperbound on possible number of targets in env
 33 |             num_agents describes the upperbound on possible number of agents in env
 34 |     Target : Double Integrator model, [x,y,xdot,ydot]
 35 |     Belief Target : KF, Double Integrator model
 36 | 
 37 | """
 38 | 
 39 | class setTrackingEnvGreedy(maTrackingBase):
 40 | 
 41 |     def __init__(self, num_agents=1, num_targets=2, map_name='empty', 
 42 |                         is_training=True, known_noise=True, **kwargs):
 43 |         super().__init__(num_agents=num_agents, num_targets=num_targets,
 44 |                         map_name=map_name, is_training=is_training)
 45 | 
 46 |         self.id = 'setTracking-vGreedy'
 47 |         self.nb_agents = num_agents #only for init, will change with reset()
 48 |         self.nb_targets = num_targets #only for init, will change with reset()
 49 |         self.agent_dim = 3
 50 |         self.target_dim = 4
 51 |         self.target_init_vel = METADATA['target_init_vel']*np.ones((2,))
 52 |         # LIMIT
 53 |         self.limit = {} # 0: low, 1:highs
 54 |         self.limit['agent'] = [np.concatenate((self.MAP.mapmin,[-np.pi])), np.concatenate((self.MAP.mapmax, [np.pi]))]
 55 |         self.limit['target'] = [np.concatenate((self.MAP.mapmin,[-METADATA['target_vel_limit'], -METADATA['target_vel_limit']])),
 56 |                                 np.concatenate((self.MAP.mapmax, [METADATA['target_vel_limit'], METADATA['target_vel_limit']]))]
 57 |         rel_vel_limit = METADATA['target_vel_limit'] + METADATA['action_v'][0] # Maximum relative speed
 58 |         self.limit['state'] = [np.array(([0.0, -np.pi, -rel_vel_limit, -10*np.pi, -50.0, 0.0])),
 59 |                                np.array(([600.0, np.pi, rel_vel_limit, 10*np.pi, 50.0, 2.0]))]
 60 |         self.observation_space = spaces.Box(self.limit['state'][0], self.limit['state'][1], dtype=np.float32)
 61 |         self.targetA = np.concatenate((np.concatenate((np.eye(2), self.sampling_period*np.eye(2)), axis=1), 
 62 |                                         [[0,0,1,0],[0,0,0,1]]))
 63 |         self.target_noise_cov = METADATA['const_q'] * np.concatenate((
 64 |                         np.concatenate((self.sampling_period**3/3*np.eye(2), self.sampling_period**2/2*np.eye(2)), axis=1),
 65 |                         np.concatenate((self.sampling_period**2/2*np.eye(2), self.sampling_period*np.eye(2)),axis=1) ))
 66 |         if known_noise:
 67 |             self.target_true_noise_sd = self.target_noise_cov
 68 |         else:
 69 |             self.target_true_noise_sd = METADATA['const_q_true'] * np.concatenate((
 70 |                         np.concatenate((self.sampling_period**2/2*np.eye(2), self.sampling_period/2*np.eye(2)), axis=1),
 71 |                         np.concatenate((self.sampling_period/2*np.eye(2), self.sampling_period*np.eye(2)),axis=1) ))
 72 | 
 73 |         # Build a robot 
 74 |         self.setup_agents()
 75 |         # Build a target
 76 |         self.setup_targets()
 77 |         self.setup_belief_targets()
 78 |         # Use custom reward
 79 |         self.get_reward()
 80 | 
 81 |     def setup_agents(self):
 82 |         self.agents = [AgentSE2(agent_id = 'agent-' + str(i), 
 83 |                         dim=self.agent_dim, sampling_period=self.sampling_period, 
 84 |                         limit=self.limit['agent'], 
 85 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x))
 86 |                         for i in range(self.num_agents)]
 87 | 
 88 |     def setup_targets(self):
 89 |         self.targets = [AgentDoubleInt2D(agent_id = 'target-' + str(i),
 90 |                         dim=self.target_dim, sampling_period=self.sampling_period, 
 91 |                         limit=self.limit['target'],
 92 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x),
 93 |                         A=self.targetA, W=self.target_true_noise_sd) 
 94 |                         for i in range(self.num_targets)]
 95 | 
 96 |     def setup_belief_targets(self):
 97 |         self.belief_targets = [KFbelief(agent_id = 'target-' + str(i),
 98 |                         dim=self.target_dim, limit=self.limit['target'], A=self.targetA,
 99 |                         W=self.target_noise_cov, obs_noise_func=self.observation_noise, 
100 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x))
101 |                         for i in range(self.num_targets)]
102 | 
103 |     def get_reward(self, observed=None, is_training=True):
104 |         return reward_fun(self.nb_targets, self.belief_targets, is_training)
105 | 
106 |     def reset(self,**kwargs):
107 |         """
108 |         Random initialization a number of agents and targets at the reset of the env epsiode.
109 |         Agents are given random positions in the map, targets are given random positions near a random agent.
110 |         Return an observation state dict with agent ids (keys) that refer to their observation
111 |         """
112 |         try: 
113 |             self.nb_agents = kwargs['nb_agents']
114 |             self.nb_targets = kwargs['nb_targets']
115 |         except:
116 |             self.nb_agents = np.random.randint(1, self.num_agents)
117 |             self.nb_targets = np.random.randint(1, self.num_targets)
118 |         obs_dict = {}
119 |         init_pose = self.get_init_pose(**kwargs)
120 |         # Initialize agents
121 |         for ii in range(self.nb_agents):
122 |             self.agents[ii].reset(init_pose['agents'][ii])
123 |             obs_dict[self.agents[ii].agent_id] = []
124 | 
125 |         # Initialize targets and beliefs
126 |         for nn in range(self.nb_targets):
127 |             self.belief_targets[nn].reset(
128 |                         init_state=np.concatenate((init_pose['belief_targets'][nn][:2], np.zeros(2))),
129 |                         init_cov=self.target_init_cov)
130 |             self.targets[nn].reset(np.concatenate((init_pose['targets'][nn][:2], self.target_init_vel)))
131 |         # For nb agents calculate belief of targets assigned
132 |         for jj in range(self.nb_targets):
133 |             for kk in range(self.nb_agents):
134 |                 r, alpha = util.relative_distance_polar(self.belief_targets[jj].state[:2],
135 |                                             xy_base=self.agents[kk].state[:2], 
136 |                                             theta_base=self.agents[kk].state[2])
137 |                 logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
138 |                 obs_dict[self.agents[kk].agent_id].append([r, alpha, 0.0, 0.0, logdetcov, 0.0])
139 |         # Greedily assign agents to closest target
140 |         for agent_id in obs_dict:
141 |             obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
142 |             close = np.argmin(obs_dict[agent_id][:,0])
143 |             obs_dict[agent_id] = obs_dict[agent_id][None,close]
144 |         return obs_dict
145 | 
146 |     def step(self, action_dict):
147 |         obs_dict = {}
148 |         reward_dict = {}
149 |         done_dict = {'__all__':False}
150 |         info_dict = {}
151 | 
152 |         # Targets move (t -> t+1)
153 |         for n in range(self.nb_targets):
154 |             self.targets[n].update() 
155 |             self.belief_targets[n].predict() # Belief state at t+1
156 |         # Agents move (t -> t+1) and observe the targets
157 |         for ii, agent_id in enumerate(action_dict):
158 |             obs_dict[self.agents[ii].agent_id] = []
159 |             reward_dict[self.agents[ii].agent_id] = []
160 |             done_dict[self.agents[ii].agent_id] = []
161 | 
162 |             action_vw = self.action_map[action_dict[agent_id]]
163 | 
164 |             # Locations of all targets and agents in order to maintain a margin between them
165 |             margin_pos = [t.state[:2] for t in self.targets[:self.nb_targets]]
166 |             for p, ids in enumerate(action_dict):
167 |                 if agent_id != ids:
168 |                     margin_pos.append(np.array(self.agents[p].state[:2]))
169 |             _ = self.agents[ii].update(action_vw, margin_pos)
170 |             
171 |             # Target and map observations
172 |             observed = np.zeros(self.nb_targets, dtype=bool)
173 | 
174 |             # Update beliefs of targets
175 |             for jj in range(self.nb_targets):
176 |                 # Observe
177 |                 obs, z_t = self.observation(self.targets[jj], self.agents[ii])
178 |                 observed[jj] = obs
179 |                 if obs: # if observed, update the target belief.
180 |                     self.belief_targets[jj].update(z_t, self.agents[ii].state)
181 | 
182 |                 r_b, alpha_b = util.relative_distance_polar(self.belief_targets[jj].state[:2],
183 |                                         xy_base=self.agents[ii].state[:2], 
184 |                                         theta_base=self.agents[ii].state[-1])
185 |                 r_dot_b, alpha_dot_b = util.relative_velocity_polar(
186 |                                         self.belief_targets[jj].state[:2],
187 |                                         self.belief_targets[jj].state[2:],
188 |                                         self.agents[ii].state[:2], self.agents[ii].state[-1],
189 |                                         action_vw[0], action_vw[1])
190 |                 obs_dict[agent_id].append([r_b, alpha_b, r_dot_b, alpha_dot_b,
191 |                                         np.log(LA.det(self.belief_targets[jj].cov)), float(obs)])
192 |         # Greedily assign agents to closest target
193 |         for agent_id in obs_dict:
194 |             obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
195 |             close = np.argmin(obs_dict[agent_id][:,0])
196 |             obs_dict[agent_id] = obs_dict[agent_id][None,close]
197 |         # Get all rewards after all agents and targets move (t -> t+1)
198 |         reward, done, mean_nlogdetcov = self.get_reward(observed, self.is_training)
199 |         reward_dict['__all__'], done_dict['__all__'], info_dict['mean_nlogdetcov'] = reward, done, mean_nlogdetcov
200 |         return obs_dict, reward_dict, done_dict, info_dict
201 | 
202 | def reward_fun(nb_targets, belief_targets, is_training=True, c_mean=0.1):
203 |     detcov = [LA.det(b_target.cov) for b_target in belief_targets[:nb_targets]]
204 |     r_detcov_mean = - np.mean(np.log(detcov))
205 |     reward = c_mean * r_detcov_mean
206 | 
207 |     mean_nlogdetcov = None
208 |     if not(is_training):
209 |         logdetcov = [np.log(LA.det(b_target.cov)) for b_target in belief_targets[:nb_targets]]
210 |         mean_nlogdetcov = -np.mean(logdetcov)
211 |     return reward, False, mean_nlogdetcov


--------------------------------------------------------------------------------
/envs/maTTenv/env/setTracking_vGru.py:
--------------------------------------------------------------------------------
  1 | import os, copy, pdb
  2 | import numpy as np
  3 | from numpy import linalg as LA
  4 | from gym import spaces, logger
  5 | from envs.maTTenv.maps import map_utils
  6 | import envs.maTTenv.util as util 
  7 | from envs.maTTenv.agent_models import *
  8 | from envs.maTTenv.belief_tracker import KFbelief
  9 | from envs.maTTenv.metadata import METADATA
 10 | from envs.maTTenv.env.maTracking_Base import maTrackingBase
 11 | 
 12 | """
 13 | Target Tracking Environments for Reinforcement Learning.
 14 | [Variables]
 15 | 
 16 | d: radial coordinate of a belief target in the learner frame
 17 | alpha : angular coordinate of a belief target in the learner frame
 18 | ddot : radial velocity of a belief target in the learner frame
 19 | alphadot : angular velocity of a belief target in the learner frame
 20 | Sigma : Covariance of a belief target
 21 | 
 22 | [Environment Description]
 23 | Use a gru to predict the belief of the target
 24 | The kf is used as a tool for the reward
 25 | 
 26 | Varying number of agents, varying number of randomly moving targets
 27 | No obstacles
 28 | 
 29 | setTrackingEnvGru : Double Integrator Target model with KF belief tracker
 30 |     obs state: [d, alpha, ddot, alphadot, observed] *nb_targets
 31 |             where nb_targets and nb_agents vary between a range
 32 |             num_targets describes the upperbound on possible number of targets in env
 33 |             num_agents describes the upperbound on possible number of agents in env
 34 |     Target : Double Integrator model, [x,y,xdot,ydot]
 35 |     Belief Target : KF, Double Integrator model
 36 | 
 37 | """
 38 | 
 39 | class setTrackingEnvGru(maTrackingBase):
 40 | 
 41 |     def __init__(self, num_agents=1, num_targets=2, map_name='empty', 
 42 |                         is_training=True, known_noise=True, **kwargs):
 43 |         super().__init__(num_agents=num_agents, num_targets=num_targets,
 44 |                         map_name=map_name, is_training=is_training)
 45 | 
 46 |         self.id = 'setTracking-vGru'
 47 |         self.nb_agents = num_agents #only for init, will change with reset()
 48 |         self.nb_targets = num_targets #only for init, will change with reset()
 49 |         self.agent_dim = 3
 50 |         self.target_dim = 4
 51 |         self.target_init_vel = METADATA['target_init_vel']*np.ones((2,))
 52 |         # LIMIT
 53 |         self.limit = {} # 0: low, 1:highs
 54 |         self.limit['agent'] = [np.concatenate((self.MAP.mapmin,[-np.pi])), np.concatenate((self.MAP.mapmax, [np.pi]))]
 55 |         self.limit['target'] = [np.concatenate((self.MAP.mapmin,[-METADATA['target_vel_limit'], -METADATA['target_vel_limit']])),
 56 |                                 np.concatenate((self.MAP.mapmax, [METADATA['target_vel_limit'], METADATA['target_vel_limit']]))]
 57 |         rel_vel_limit = METADATA['target_vel_limit'] + METADATA['action_v'][0] # Maximum relative speed
 58 |         self.limit['state'] = [np.array(([0.0, -np.pi, -rel_vel_limit, -10*np.pi, -50.0, 0.0])),
 59 |                                np.array(([600.0, np.pi, rel_vel_limit, 10*np.pi, 50.0, 2.0]))]
 60 |         self.observation_space = spaces.Box(self.limit['state'][0], self.limit['state'][1], dtype=np.float32)
 61 |         self.targetA = np.concatenate((np.concatenate((np.eye(2), self.sampling_period*np.eye(2)), axis=1), 
 62 |                                         [[0,0,1,0],[0,0,0,1]]))
 63 |         self.target_noise_cov = METADATA['const_q'] * np.concatenate((
 64 |                         np.concatenate((self.sampling_period**3/3*np.eye(2), self.sampling_period**2/2*np.eye(2)), axis=1),
 65 |                         np.concatenate((self.sampling_period**2/2*np.eye(2), self.sampling_period*np.eye(2)),axis=1) ))
 66 |         if known_noise:
 67 |             self.target_true_noise_sd = self.target_noise_cov
 68 |         else:
 69 |             self.target_true_noise_sd = METADATA['const_q_true'] * np.concatenate((
 70 |                         np.concatenate((self.sampling_period**2/2*np.eye(2), self.sampling_period/2*np.eye(2)), axis=1),
 71 |                         np.concatenate((self.sampling_period/2*np.eye(2), self.sampling_period*np.eye(2)),axis=1) ))
 72 | 
 73 |         # Build a robot 
 74 |         self.setup_agents()
 75 |         # Build a target
 76 |         self.setup_targets()
 77 |         self.setup_belief_targets()
 78 |         # Use custom reward
 79 |         self.get_reward()
 80 | 
 81 |     def setup_agents(self):
 82 |         self.agents = [AgentSE2(agent_id = 'agent-' + str(i), 
 83 |                         dim=self.agent_dim, sampling_period=self.sampling_period, 
 84 |                         limit=self.limit['agent'], 
 85 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x))
 86 |                         for i in range(self.num_agents)]
 87 | 
 88 |     def setup_targets(self):
 89 |         self.targets = [AgentDoubleInt2D(agent_id = 'target-' + str(i),
 90 |                         dim=self.target_dim, sampling_period=self.sampling_period, 
 91 |                         limit=self.limit['target'],
 92 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x),
 93 |                         A=self.targetA, W=self.target_true_noise_sd) 
 94 |                         for i in range(self.num_targets)]
 95 | 
 96 |     def setup_belief_targets(self):
 97 |         self.belief_targets = [KFbelief(agent_id = 'target-' + str(i),
 98 |                         dim=self.target_dim, limit=self.limit['target'], A=self.targetA,
 99 |                         W=self.target_noise_cov, obs_noise_func=self.observation_noise, 
100 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x))
101 |                         for i in range(self.num_targets)]
102 | 
103 |     def get_reward(self, observed=None, is_training=True):
104 |         return reward_fun(self.nb_targets, self.belief_targets, is_training)
105 | 
106 |     def reset(self,**kwargs):
107 |         """
108 |         Random initialization a number of agents and targets at the reset of the env epsiode.
109 |         Agents are given random positions in the map, targets are given random positions near a random agent.
110 |         Return an observation state dict with agent ids (keys) that refer to their observation
111 |         """
112 |         self.rng = np.random.default_rng()
113 |         try: 
114 |             self.nb_agents = kwargs['nb_agents']
115 |             self.nb_targets = kwargs['nb_targets']
116 |         except:
117 |             self.nb_agents = np.random.randint(1, self.num_agents)
118 |             self.nb_targets = np.random.randint(1, self.num_targets)
119 |         obs_dict = {}
120 |         init_pose = self.get_init_pose(**kwargs)
121 |         # Initialize agents
122 |         for ii in range(self.nb_agents):
123 |             self.agents[ii].reset(init_pose['agents'][ii])
124 |             obs_dict[self.agents[ii].agent_id] = []
125 | 
126 |         # Initialize targets and beliefs
127 |         for nn in range(self.nb_targets):
128 |             self.belief_targets[nn].reset(
129 |                         init_state=np.concatenate((init_pose['belief_targets'][nn][:2], np.zeros(2))),
130 |                         init_cov=self.target_init_cov)
131 |             self.targets[nn].reset(np.concatenate((init_pose['targets'][nn][:2], self.target_init_vel)))
132 |         # For nb agents calculate belief of targets assigned
133 |         for jj in range(self.nb_targets):
134 |             for kk in range(self.nb_agents):
135 |                 r, alpha = util.relative_distance_polar(self.belief_targets[jj].state[:2],
136 |                                             xy_base=self.agents[kk].state[:2], 
137 |                                             theta_base=self.agents[kk].state[2])
138 |                 logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
139 |                 obs_dict[self.agents[kk].agent_id].append([r, alpha, 0.0, 0.0, 0.0])
140 |         for agent_id in obs_dict:
141 |             obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
142 |         self._obs_dict = obs_dict
143 |         return obs_dict
144 | 
145 |     def step(self, action_dict):
146 |         obs_dict = {}
147 |         reward_dict = {}
148 |         done_dict = {'__all__':False}
149 |         info_dict = {}
150 | 
151 |         # Targets move (t -> t+1)
152 |         for n in range(self.nb_targets):
153 |             self.targets[n].update() 
154 |             self.belief_targets[n].predict() # Belief state at t+1
155 |         # Agents move (t -> t+1) and observe the targets
156 |         for ii, agent_id in enumerate(action_dict):
157 |             obs_dict[self.agents[ii].agent_id] = []
158 |             reward_dict[self.agents[ii].agent_id] = []
159 |             done_dict[self.agents[ii].agent_id] = []
160 | 
161 |             action_vw = self.action_map[action_dict[agent_id]]
162 | 
163 |             # Locations of all targets and agents in order to maintain a margin between them
164 |             margin_pos = [t.state[:2] for t in self.targets[:self.nb_targets]]
165 |             for p, ids in enumerate(action_dict):
166 |                 if agent_id != ids:
167 |                     margin_pos.append(np.array(self.agents[p].state[:2]))
168 |             _ = self.agents[ii].update(action_vw, margin_pos)
169 |             
170 |             # Target and map observations
171 |             observed = np.zeros(self.nb_targets, dtype=bool)
172 |             # obstacles_pt = map_utils.get_closest_obstacle(self.MAP, self.agents[ii].state)
173 |             # if obstacles_pt is None:
174 |             obstacles_pt = (self.sensor_r, np.pi)
175 | 
176 |             # Update beliefs of targets
177 |             for jj in range(self.nb_targets):
178 |                 # Observe
179 |                 obs, z_t = self.observation(self.targets[jj], self.agents[ii])
180 |                 observed[jj] = obs
181 |                 if obs: # if observed, update the target belief.
182 |                     self.belief_targets[jj].update(z_t, self.agents[ii].state)
183 | 
184 |                     r_b, alpha_b = util.relative_distance_polar(self.targets[jj].state[:2],
185 |                                             xy_base=self.agents[ii].state[:2], 
186 |                                             theta_base=self.agents[ii].state[-1])
187 |                     r_dot_b, alpha_dot_b = util.relative_velocity_polar(
188 |                                             self.targets[jj].state[:2],
189 |                                             self.targets[jj].state[2:],
190 |                                             self.agents[ii].state[:2], self.agents[ii].state[-1],
191 |                                             action_vw[0], action_vw[1])
192 |                     obs_dict[agent_id].append([r_b, alpha_b, r_dot_b, alpha_dot_b, float(obs)])
193 |                 else:
194 |                     ## if no obs
195 |                     obs_dict[agent_id].append([0.0, 0.0, 0.0, 0.0, float(obs)])
196 | 
197 |             obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
198 |             # store obs dict before shuffle
199 |             self._obs_dict[agent_id] = obs_dict[agent_id]
200 |             # shuffle obs to promote permutation invariance
201 |             self.rng.shuffle(obs_dict[agent_id])
202 |         # Get all rewards after all agents and targets move (t -> t+1)
203 |         reward, done, mean_nlogdetcov = self.get_reward(observed, self.is_training)
204 |         reward_dict['__all__'], done_dict['__all__'], info_dict['mean_nlogdetcov'] = reward, done, mean_nlogdetcov
205 |         return obs_dict, reward_dict, done_dict, info_dict
206 | 
207 | def reward_fun(nb_targets, belief_targets, is_training=True, c_mean=0.1):
208 |     detcov = [LA.det(b_target.cov) for b_target in belief_targets[:nb_targets]]
209 |     r_detcov_mean = - np.mean(np.log(detcov))
210 |     reward = c_mean * r_detcov_mean
211 | 
212 |     mean_nlogdetcov = None
213 |     if not(is_training):
214 |         logdetcov = [np.log(LA.det(b_target.cov)) for b_target in belief_targets[:nb_targets]]
215 |         mean_nlogdetcov = -np.mean(logdetcov)
216 |     return reward, False, mean_nlogdetcov


--------------------------------------------------------------------------------
/envs/maTTenv/env/setTracking_vkGreedy.py:
--------------------------------------------------------------------------------
  1 | import os, copy, pdb
  2 | import numpy as np
  3 | from numpy import linalg as LA
  4 | from gym import spaces, logger
  5 | from envs.maTTenv.maps import map_utils
  6 | import envs.maTTenv.util as util 
  7 | from envs.maTTenv.agent_models import *
  8 | from envs.maTTenv.belief_tracker import KFbelief
  9 | from envs.maTTenv.metadata import METADATA
 10 | from envs.maTTenv.env.maTracking_Base import maTrackingBase
 11 | 
 12 | """
 13 | Target Tracking Environments for Reinforcement Learning.
 14 | [Variables]
 15 | 
 16 | d: radial coordinate of a belief target in the learner frame
 17 | alpha : angular coordinate of a belief target in the learner frame
 18 | ddot : radial velocity of a belief target in the learner frame
 19 | alphadot : angular velocity of a belief target in the learner frame
 20 | Sigma : Covariance of a belief target
 21 | 
 22 | [Environment Description]
 23 | Varying number of agents, varying number of randomly moving targets
 24 | No obstacles
 25 | Greedy target assignment, k targets are shown to agent.
 26 | 
 27 | setTrackingEnvkGreedy : Double Integrator Target model with KF belief tracker
 28 |     obs state: [d, alpha, ddot, alphadot, logdet(Sigma), observed] *nb_targets
 29 |             where nb_targets and nb_agents vary between a range
 30 |             num_targets describes the upperbound on possible number of targets in env
 31 |             num_agents describes the upperbound on possible number of agents in env
 32 |     Target : Double Integrator model, [x,y,xdot,ydot]
 33 |     Belief Target : KF, Double Integrator model
 34 | 
 35 | """
 36 | 
 37 | class setTrackingEnvkGreedy(maTrackingBase):
 38 | 
 39 |     def __init__(self, num_agents=1, num_targets=2, map_name='empty', 
 40 |                         is_training=True, known_noise=True, **kwargs):
 41 |         super().__init__(num_agents=num_agents, num_targets=num_targets,
 42 |                         map_name=map_name, is_training=is_training)
 43 | 
 44 |         self.id = 'setTracking-vkGreedy'
 45 |         ###=======================================================###
 46 |         # Assign agents to closest k targets, if less targets than k, consider all targets
 47 |         self.k = 4
 48 |         ###=======================================================###
 49 |         self.nb_agents = num_agents #only for init, will change with reset()
 50 |         self.nb_targets = num_targets #only for init, will change with reset()
 51 |         self.agent_dim = 3
 52 |         self.target_dim = 4
 53 |         self.target_init_vel = METADATA['target_init_vel']*np.ones((2,))
 54 |         # LIMIT
 55 |         self.limit = {} # 0: low, 1:highs
 56 |         self.limit['agent'] = [np.concatenate((self.MAP.mapmin,[-np.pi])), np.concatenate((self.MAP.mapmax, [np.pi]))]
 57 |         self.limit['target'] = [np.concatenate((self.MAP.mapmin,[-METADATA['target_vel_limit'], -METADATA['target_vel_limit']])),
 58 |                                 np.concatenate((self.MAP.mapmax, [METADATA['target_vel_limit'], METADATA['target_vel_limit']]))]
 59 |         rel_vel_limit = METADATA['target_vel_limit'] + METADATA['action_v'][0] # Maximum relative speed
 60 |         self.limit['state'] = [np.array(([0.0, -np.pi, -rel_vel_limit, -10*np.pi, -50.0, 0.0])),
 61 |                                np.array(([600.0, np.pi, rel_vel_limit, 10*np.pi, 50.0, 2.0]))]
 62 |         self.observation_space = spaces.Box(self.limit['state'][0], self.limit['state'][1], dtype=np.float32)
 63 |         self.targetA = np.concatenate((np.concatenate((np.eye(2), self.sampling_period*np.eye(2)), axis=1), 
 64 |                                         [[0,0,1,0],[0,0,0,1]]))
 65 |         self.target_noise_cov = METADATA['const_q'] * np.concatenate((
 66 |                         np.concatenate((self.sampling_period**3/3*np.eye(2), self.sampling_period**2/2*np.eye(2)), axis=1),
 67 |                         np.concatenate((self.sampling_period**2/2*np.eye(2), self.sampling_period*np.eye(2)),axis=1) ))
 68 |         if known_noise:
 69 |             self.target_true_noise_sd = self.target_noise_cov
 70 |         else:
 71 |             self.target_true_noise_sd = METADATA['const_q_true'] * np.concatenate((
 72 |                         np.concatenate((self.sampling_period**2/2*np.eye(2), self.sampling_period/2*np.eye(2)), axis=1),
 73 |                         np.concatenate((self.sampling_period/2*np.eye(2), self.sampling_period*np.eye(2)),axis=1) ))
 74 | 
 75 |         # Build a robot 
 76 |         self.setup_agents()
 77 |         # Build a target
 78 |         self.setup_targets()
 79 |         self.setup_belief_targets()
 80 |         # Use custom reward
 81 |         self.get_reward()
 82 | 
 83 |     def setup_agents(self):
 84 |         self.agents = [AgentSE2(agent_id = 'agent-' + str(i), 
 85 |                         dim=self.agent_dim, sampling_period=self.sampling_period, 
 86 |                         limit=self.limit['agent'], 
 87 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x))
 88 |                         for i in range(self.num_agents)]
 89 | 
 90 |     def setup_targets(self):
 91 |         self.targets = [AgentDoubleInt2D(agent_id = 'target-' + str(i),
 92 |                         dim=self.target_dim, sampling_period=self.sampling_period, 
 93 |                         limit=self.limit['target'],
 94 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x),
 95 |                         A=self.targetA, W=self.target_true_noise_sd) 
 96 |                         for i in range(self.num_targets)]
 97 | 
 98 |     def setup_belief_targets(self):
 99 |         self.belief_targets = [KFbelief(agent_id = 'target-' + str(i),
100 |                         dim=self.target_dim, limit=self.limit['target'], A=self.targetA,
101 |                         W=self.target_noise_cov, obs_noise_func=self.observation_noise, 
102 |                         collision_func=lambda x: map_utils.is_collision(self.MAP, x))
103 |                         for i in range(self.num_targets)]
104 | 
105 |     def get_reward(self, obstacles_pt=None, observed=None, is_training=True):
106 |         return reward_fun(self.nb_targets, self.belief_targets, is_training)
107 | 
108 |     def reset(self,**kwargs):
109 |         """
110 |         Random initialization a number of agents and targets at the reset of the env epsiode.
111 |         Agents are given random positions in the map, targets are given random positions near a random agent.
112 |         Return an observation state dict with agent ids (keys) that refer to their observation
113 |         """
114 |         try: 
115 |             self.nb_agents = kwargs['nb_agents']
116 |             self.nb_targets = kwargs['nb_targets']
117 |         except:
118 |             self.nb_agents = np.random.randint(1, self.num_agents)
119 |             self.nb_targets = np.random.randint(1, self.num_targets)
120 |         obs_dict = {}
121 |         init_pose = self.get_init_pose(**kwargs)
122 |         # Initialize agents
123 |         for ii in range(self.nb_agents):
124 |             self.agents[ii].reset(init_pose['agents'][ii])
125 |             obs_dict[self.agents[ii].agent_id] = []
126 | 
127 |         # Initialize targets and beliefs
128 |         for nn in range(self.nb_targets):
129 |             self.belief_targets[nn].reset(
130 |                         init_state=np.concatenate((init_pose['belief_targets'][nn][:2], np.zeros(2))),
131 |                         init_cov=self.target_init_cov)
132 |             self.targets[nn].reset(np.concatenate((init_pose['targets'][nn][:2], self.target_init_vel)))
133 |         # For nb agents calculate belief of targets assigned
134 |         for jj in range(self.nb_targets):
135 |             for kk in range(self.nb_agents):
136 |                 r, alpha = util.relative_distance_polar(self.belief_targets[jj].state[:2],
137 |                                             xy_base=self.agents[kk].state[:2], 
138 |                                             theta_base=self.agents[kk].state[2])
139 |                 logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
140 |                 obs_dict[self.agents[kk].agent_id].append([r, alpha, 0.0, 0.0, logdetcov, 0.0])
141 | 
142 |         if self.nb_targets > self.k:
143 |             for agent_id in obs_dict:
144 |                 obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
145 |                 close = np.argpartition(obs_dict[agent_id][:,0], self.k)
146 |                 obs_dict[agent_id] = obs_dict[agent_id][close[:self.k]]
147 |         else:
148 |             for agent_id in obs_dict:
149 |                 obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
150 | 
151 |         return obs_dict
152 | 
153 |     def step(self, action_dict):
154 |         obs_dict = {}
155 |         reward_dict = {}
156 |         done_dict = {'__all__':False}
157 |         info_dict = {}
158 | 
159 |         # Targets move (t -> t+1)
160 |         for n in range(self.nb_targets):
161 |             self.targets[n].update() 
162 |             self.belief_targets[n].predict() # Belief state at t+1
163 |         # Agents move (t -> t+1) and observe the targets
164 |         for ii, agent_id in enumerate(action_dict):
165 |             obs_dict[self.agents[ii].agent_id] = []
166 |             reward_dict[self.agents[ii].agent_id] = []
167 |             done_dict[self.agents[ii].agent_id] = []
168 | 
169 |             action_vw = self.action_map[action_dict[agent_id]]
170 | 
171 |             # Locations of all targets and agents in order to maintain a margin between them
172 |             margin_pos = [t.state[:2] for t in self.targets[:self.nb_targets]]
173 |             for p, ids in enumerate(action_dict):
174 |                 if agent_id != ids:
175 |                     margin_pos.append(np.array(self.agents[p].state[:2]))
176 |             _ = self.agents[ii].update(action_vw, margin_pos)
177 |             
178 |             # Target and map observations
179 |             observed = np.zeros(self.nb_targets, dtype=bool)
180 |             # obstacles_pt = map_utils.get_closest_obstacle(self.MAP, self.agents[ii].state)
181 |             # if obstacles_pt is None:
182 |             obstacles_pt = (self.sensor_r, np.pi)
183 | 
184 |             # Update beliefs of targets
185 |             for jj in range(self.nb_targets):
186 |                 # Observe
187 |                 obs, z_t = self.observation(self.targets[jj], self.agents[ii])
188 |                 observed[jj] = obs
189 |                 if obs: # if observed, update the target belief.
190 |                     self.belief_targets[jj].update(z_t, self.agents[ii].state)
191 | 
192 |                 r_b, alpha_b = util.relative_distance_polar(self.belief_targets[jj].state[:2],
193 |                                         xy_base=self.agents[ii].state[:2], 
194 |                                         theta_base=self.agents[ii].state[-1])
195 |                 r_dot_b, alpha_dot_b = util.relative_velocity_polar(
196 |                                         self.belief_targets[jj].state[:2],
197 |                                         self.belief_targets[jj].state[2:],
198 |                                         self.agents[ii].state[:2], self.agents[ii].state[-1],
199 |                                         action_vw[0], action_vw[1])
200 |                 obs_dict[agent_id].append([r_b, alpha_b, r_dot_b, alpha_dot_b,
201 |                                         np.log(LA.det(self.belief_targets[jj].cov)), float(obs)])
202 |         # Assign agents to closest k targets, if less targets than k, consider all targets
203 |         if self.nb_targets > self.k:
204 |             for agent_id in obs_dict:
205 |                 obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
206 |                 close = np.argpartition(obs_dict[agent_id][:,0], self.k)
207 |                 obs_dict[agent_id] = obs_dict[agent_id][close[:self.k]]
208 |         else:
209 |             for agent_id in obs_dict:
210 |                 obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
211 |         # Get all rewards after all agents and targets move (t -> t+1)
212 |         reward, done, mean_nlogdetcov = self.get_reward(obstacles_pt, observed, self.is_training)
213 |         reward_dict['__all__'], done_dict['__all__'], info_dict['mean_nlogdetcov'] = reward, done, mean_nlogdetcov
214 |         return obs_dict, reward_dict, done_dict, info_dict
215 | 
216 | def reward_fun(nb_targets, belief_targets, is_training=True, c_mean=0.1):
217 |     detcov = [LA.det(b_target.cov) for b_target in belief_targets[:nb_targets]]
218 |     r_detcov_mean = - np.mean(np.log(detcov))
219 |     reward = c_mean * r_detcov_mean
220 | 
221 |     mean_nlogdetcov = None
222 |     if not(is_training):
223 |         logdetcov = [np.log(LA.det(b_target.cov)) for b_target in belief_targets[:nb_targets]]
224 |         mean_nlogdetcov = -np.mean(logdetcov)
225 |     return reward, False, mean_nlogdetcov


--------------------------------------------------------------------------------
/envs/maTTenv/gen_init_pose.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generates a set of initial positions of targets and the agent in ttenv.
 3 | If you want to have more conditions to generate initial positions other than
 4 | the current metadata version, provide values for the additional variables to
 5 | the reset function. For example,
 6 |     ex_var = {'init_distance_min':10.0,
 7 |                 'init_distacne_max':15.0,
 8 |                 'target_direction':False,
 9 |                 'belief_direction':False,
10 |                 'blocked':True }
11 |     obs.reset(**ex_var)
12 | """
13 | import numpy as np
14 | import envs
15 | import argparse
16 | import pickle
17 | import os, time
18 | 
19 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
20 | parser.add_argument('--env', help='environment ID', default='maTracking-v2')
21 | parser.add_argument('--render', type=int, default=1)
22 | parser.add_argument('--map', type=str, default="emptyMed")
23 | parser.add_argument('--nb_agents', type=int, default=2)
24 | parser.add_argument('--nb_targets', type=int, default=2)
25 | parser.add_argument('--nb_init_pose', type=int, default=10)
26 | parser.add_argument('--log_dir', type=str, default='.')
27 | 
28 | args = parser.parse_args()
29 | 
30 | def main():
31 |     env = envs.make(args.env,
32 |                     'ma_target_tracking',
33 |                     render=bool(args.render),
34 |                     directory=args.log_dir,
35 |                     map_name=args.map,
36 |                     num_agents=args.nb_agents,
37 |                     num_targets=args.nb_targets,
38 |                     is_training=False,
39 |                     )
40 |     timelimit_env = env
41 |     while( not hasattr(timelimit_env, '_elapsed_steps')):
42 |         timelimit_env = timelimit_env.env
43 |     init_pose = []
44 |     while(len(init_pose) < args.nb_init_pose): # test episode
45 |         obs, done = env.reset(), False
46 |         if args.render:
47 |             env.render()
48 |         notes = input("%d, Pass? y/n"%len(init_pose))
49 |         if notes == "y":
50 |             init_pose.append({'agents':[timelimit_env.env.agents[i].state for i in range(args.nb_agents)],
51 |                             'targets':[timelimit_env.env.targets[i].state for i in range(args.nb_targets)],
52 |                             'belief_targets':[timelimit_env.env.belief_targets[i].state for i in range(args.nb_targets)]})
53 | 
54 |     pickle.dump(init_pose, open(os.path.join(args.log_dir,'init_pose_random_1015.pkl'), 'wb'))
55 | 
56 | if __name__ == "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/envs/maTTenv/maps/empty100.cfg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grasp-lyrl/scalableMARL/01642fb5275494694db48deb9bbd66fab82ed3a5/envs/maTTenv/maps/empty100.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/empty100.yaml:
--------------------------------------------------------------------------------
 1 | # Empty Map
 2 | mapmin: [0, 0]
 3 | mapmax: [250, 250]
 4 | mapres: [.5, .5]
 5 | mapdim: [500, 500]
 6 | origin: [125, 125]
 7 | origincells: [125, 125]
 8 | datatype: c
 9 | storage: colmajor
10 | mappath: emptyMap.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/empty1000.cfg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grasp-lyrl/scalableMARL/01642fb5275494694db48deb9bbd66fab82ed3a5/envs/maTTenv/maps/empty1000.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/empty1000.yaml:
--------------------------------------------------------------------------------
 1 | # Empty Map
 2 | mapmin: [0, 0]
 3 | mapmax: [790, 790]
 4 | mapres: [.5, .5]
 5 | mapdim: [1580, 1580]
 6 | origin: [395, 395]
 7 | origincells: [395, 395]
 8 | datatype: c
 9 | storage: colmajor
10 | mappath: emptyMap.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/empty20.cfg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grasp-lyrl/scalableMARL/01642fb5275494694db48deb9bbd66fab82ed3a5/envs/maTTenv/maps/empty20.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/empty20.yaml:
--------------------------------------------------------------------------------
 1 | # Empty Map
 2 | mapmin: [0, 0]
 3 | mapmax: [112, 112]
 4 | mapres: [.5, .5]
 5 | mapdim: [224, 224]
 6 | origin: [56, 56]
 7 | origincells: [56, 56]
 8 | datatype: c
 9 | storage: colmajor
10 | mappath: emptyMap.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/empty8.cfg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grasp-lyrl/scalableMARL/01642fb5275494694db48deb9bbd66fab82ed3a5/envs/maTTenv/maps/empty8.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/empty8.yaml:
--------------------------------------------------------------------------------
 1 | # Empty Map
 2 | mapmin: [0, 0]
 3 | mapmax: [70, 70]
 4 | mapres: [.5, .5]
 5 | mapdim: [140, 140]
 6 | origin: [35, 35]
 7 | origincells: [35, 35]
 8 | datatype: c
 9 | storage: colmajor
10 | mappath: emptyMap.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/emptyMed.cfg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grasp-lyrl/scalableMARL/01642fb5275494694db48deb9bbd66fab82ed3a5/envs/maTTenv/maps/emptyMed.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/emptyMed.yaml:
--------------------------------------------------------------------------------
 1 | # Empty Map
 2 | mapmin: [0, 0]
 3 | mapmax: [51, 51]
 4 | mapres: [.5, .5]
 5 | mapdim: [101, 101]
 6 | origin: [25.5, 25.5]
 7 | origincells: [50, 50]
 8 | datatype: c
 9 | storage: colmajor
10 | mappath: emptyMap.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/emptySmall.cfg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grasp-lyrl/scalableMARL/01642fb5275494694db48deb9bbd66fab82ed3a5/envs/maTTenv/maps/emptySmall.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/emptySmall.yaml:
--------------------------------------------------------------------------------
 1 | # Empty Map
 2 | mapmin: [0, 0]
 3 | mapmax: [27, 27]
 4 | mapres: [.2, .2]
 5 | mapdim: [135, 135]
 6 | origin: [13.5, 13.5]
 7 | origincells: [67, 67]
 8 | datatype: c
 9 | storage: colmajor
10 | mappath: emptyMap.cfg


--------------------------------------------------------------------------------
/envs/maTTenv/maps/map_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions / Objects for occupancy grid maps.
  3 | 
  4 | !!IMPORTANT!! xy-rc coordinate relation:
  5 | 2D map. (xmin, ymin) is located at the bottom left corner.
  6 | rows are counted from the bottom to top.
  7 | columns are counted from left to right.
  8 | 
  9 | Matplotlib.pyplot displays a matrix with (r,c)=(0,0) located at the top left.
 10 | Therefore, in the display_wrapper, the map is flipped.
 11 | """
 12 | import numpy as np
 13 | import yaml
 14 | 
 15 | def round(x):
 16 |   if x >= 0:
 17 |     return int(x+0.5)
 18 |   else:
 19 |     return int(x-0.5)
 20 | 
 21 | class GridMap(object):
 22 |   def __init__(self, map_path, r_max=1.0, fov=np.pi, margin2wall=0.5):
 23 |     map_config = yaml.safe_load(open(map_path+".yaml", "r"))
 24 |     self.map = np.loadtxt(map_path+".cfg")
 25 |     if 'empty' in map_path:
 26 |       self.map = None
 27 |     else:
 28 |       self.map_linear = np.squeeze(self.map.astype(np.int8).reshape(-1, 1))
 29 |     self.mapdim = map_config['mapdim']
 30 |     self.mapres = np.array(map_config['mapres'])
 31 |     self.mapmin = np.array(map_config['mapmin'])
 32 |     self.mapmax = np.array(map_config['mapmax'])
 33 |     self.margin2wall = margin2wall
 34 |     self.origin = map_config['origin']
 35 |     self.r_max = r_max
 36 |     self.fov = fov
 37 | 
 38 |   def se2_to_cell(self, pos):
 39 |     pos = pos[:2]
 40 |     cell_idx = (pos - self.mapmin)/self.mapres - 0.5
 41 |     return round(cell_idx[0]), round(cell_idx[1])
 42 | 
 43 |   def cell_to_se2(self, cell_idx):
 44 |     return ( np.array(cell_idx) + 0.5 ) * self.mapres + self.mapmin
 45 | 
 46 | def bresenham2D(sx, sy, ex, ey):
 47 |   """
 48 |   Bresenham's ray tracing algorithm in 2D from ESE650 2017 TA resources
 49 |   Inputs:
 50 |   (sx, sy)  start point of ray
 51 |   (ex, ey)  end point of ray
 52 |   Outputs:
 53 |     Indicies for x-axis and y-axis
 54 |   """
 55 |   sx = int(round(sx))
 56 |   sy = int(round(sy))
 57 |   ex = int(round(ex))
 58 |   ey = int(round(ey))
 59 |   dx = abs(ex-sx)
 60 |   dy = abs(ey-sy)
 61 |   steep = abs(dy)>abs(dx)
 62 |   if steep:
 63 |     dx,dy = dy,dx # swap
 64 | 
 65 |   if dy == 0:
 66 |     q = np.zeros((dx+1,1))
 67 |   else:
 68 |     q = np.append(0,np.greater_equal(np.diff(np.mod(np.arange( np.floor(dx/2), -dy*dx+np.floor(dx/2)-1,-dy),dx)),0))
 69 |   if steep:
 70 |     if sy <= ey:
 71 |       y = np.arange(sy,ey+1)
 72 |     else:
 73 |       y = np.arange(sy,ey-1,-1)
 74 |     if sx <= ex:
 75 |       x = sx + np.cumsum(q)
 76 |     else:
 77 |       x = sx - np.cumsum(q)
 78 |   else:
 79 |     if sx <= ex:
 80 |       x = np.arange(sx,ex+1)
 81 |     else:
 82 |       x = np.arange(sx,ex-1,-1)
 83 |     if sy <= ey:
 84 |       y = sy + np.cumsum(q)
 85 |     else:
 86 |       y = sy - np.cumsum(q)
 87 |   return np.vstack((x,y)).astype(np.int16)
 88 | 
 89 | def coord_change2g(vec, ang):
 90 |     assert(len(vec) == 2)
 91 |     # R * v
 92 |     return np.array([[np.cos(ang), -np.sin(ang)], [np.sin(ang), np.cos(ang)]])@vec
 93 | 
 94 | def is_collision_ray_cell(map_obj, cell):
 95 |   """
 96 |   cell : cell r, c index from left bottom.
 97 |   """
 98 |   idx = cell[0] + map_obj.mapdim[0] * cell[1]
 99 |   if (cell[0] < 0) or (cell[1] < 0) or (cell[0] >= map_obj.mapdim[0]) or (cell[1] >= map_obj.mapdim[1]):
100 |     return True
101 |   elif (map_obj.map is not None) and map_obj.map_linear[idx] == 1:
102 |     return True
103 |   else:
104 |     return False
105 | def is_blocked(map_obj, start_pos, end_pos):
106 |   if map_obj.map is None:
107 |     return False
108 |   start_rc = map_obj.se2_to_cell(start_pos)
109 |   end_rc = map_obj.se2_to_cell(end_pos)
110 |   ray_cells = bresenham2D(start_rc[0], start_rc[1], end_rc[0], end_rc[1])
111 |   i = 0
112 |   while(i < ray_cells.shape[-1]):
113 |     if is_collision_ray_cell(map_obj, ray_cells[:,i]):
114 |         return True
115 |     i += 1
116 |   return False
117 | 
118 | def get_front_obstacle(map_obj, odom, **kwargs):
119 |     ro_min_t = None
120 |     start_rc = map_obj.se2_to_cell(odom[:2])
121 |     end_pt_global_frame = coord_change2g(np.array([map_obj.r_max*np.cos(0.0), map_obj.r_max*np.sin(0.0)]), odom[-1]) + odom[:2]
122 |     if map_obj.map is None:
123 |       if not(in_bound(map_obj, end_pt_global_frame)):
124 |         end_rc = map_obj.se2_to_cell(end_pt_global_frame)
125 |         ray_cells = bresenham2D(start_rc[0], start_rc[1], end_rc[0], end_rc[1])
126 |         i = 0
127 |         while(i < ray_cells.shape[-1]):
128 |           pt = map_obj.cell_to_se2(ray_cells[:,i])
129 |           if not(in_bound(map_obj, pt)):
130 |             break
131 |           i += 1
132 |         if i < ray_cells.shape[-1]: # break!
133 |           ro_min_t = np.sqrt(np.sum(np.square(pt - odom[:2])))
134 | 
135 |     else:
136 |       end_rc = map_obj.se2_to_cell(end_pt_global_frame)
137 |       ray_cells = bresenham2D(start_rc[0], start_rc[1], end_rc[0], end_rc[1])
138 |       i = 0
139 |       while(i < ray_cells.shape[-1]): # break!
140 |         if is_collision_ray_cell(map_obj, ray_cells[:,i]):
141 |             break
142 |         i += 1
143 |       if i < ray_cells.shape[-1]:
144 |         ro_min_t = np.sqrt(np.sum(np.square(map_obj.cell_to_se2(ray_cells[:,i]) - odom[:2])))
145 | 
146 |     if ro_min_t is None:
147 |         return None
148 |     else:
149 |         return ro_min_t, 0.0
150 | 
151 | def get_closest_obstacle(map_obj, odom, ang_res=0.05):
152 |   """
153 |     Return the closest obstacle/boundary cell
154 |   """
155 |   ang_grid = np.arange(-.5*map_obj.fov, .5*map_obj.fov, ang_res)
156 |   closest_obstacle = (map_obj.r_max, 0.0)
157 |   start_rc = map_obj.se2_to_cell(odom[:2])
158 |   for ang in ang_grid:
159 |     end_pt_global_frame = coord_change2g(np.array([map_obj.r_max*np.cos(ang), map_obj.r_max*np.sin(ang)]), odom[-1]) + odom[:2]
160 | 
161 |     if map_obj.map is None:
162 |       if not(in_bound(map_obj, end_pt_global_frame)):
163 |         end_rc = map_obj.se2_to_cell(end_pt_global_frame)
164 |         ray_cells = bresenham2D(start_rc[0], start_rc[1], end_rc[0], end_rc[1])
165 |         i = 0
166 |         while(i < ray_cells.shape[-1]):
167 |           pt = map_obj.cell_to_se2(ray_cells[:,i])
168 |           if not(in_bound(map_obj, pt)):
169 |             break
170 |           i += 1
171 |         if i < ray_cells.shape[-1]: # break!
172 |           ro_min_t = np.sqrt(np.sum(np.square(pt - odom[:2])))
173 |           if ro_min_t < closest_obstacle[0]:
174 |             closest_obstacle = (ro_min_t, ang)
175 | 
176 |     else:
177 |       end_rc = map_obj.se2_to_cell(end_pt_global_frame)
178 |       ray_cells = bresenham2D(start_rc[0], start_rc[1], end_rc[0], end_rc[1])
179 |       i = 0
180 |       while(i < ray_cells.shape[-1]): # break!
181 |         if is_collision_ray_cell(map_obj, ray_cells[:,i]):
182 |             break
183 |         i += 1
184 |       if i < ray_cells.shape[-1]:
185 |         ro_min_t = np.sqrt(np.sum(np.square(map_obj.cell_to_se2(ray_cells[:,i]) - odom[:2])))
186 |         if ro_min_t < closest_obstacle[0]:
187 |           closest_obstacle = (ro_min_t, ang)
188 | 
189 |   if closest_obstacle[0] == map_obj.r_max:
190 |     return None
191 |   else:
192 |     return closest_obstacle
193 | 
194 | def is_collision(map_obj, pos):
195 |   if not(in_bound(map_obj, pos)):
196 |     return True
197 |   else:
198 |     if map_obj.map is not None:
199 |       n = np.ceil(map_obj.margin2wall/map_obj.mapres).astype(np.int16)
200 |       cell = np.minimum([map_obj.mapdim[0]-1,map_obj.mapdim[1]-1] , map_obj.se2_to_cell(pos))
201 |       for r_add in np.arange(-n[1],n[1],1):
202 |         for c_add in np.arange(-n[0],n[0],1):
203 |           x_c = np.clip(cell[0]+r_add, 0, map_obj.mapdim[0]-1).astype(np.int16)
204 |           y_c = np.clip(cell[1]+c_add,0,map_obj.mapdim[1]-1).astype(np.int16)
205 |           idx = x_c + map_obj.mapdim[0] * y_c
206 |           if map_obj.map_linear[idx] == 1:
207 |             return True
208 |   return False
209 | 
210 | def in_bound(map_obj, pos):
211 |   return not((pos[0] < map_obj.mapmin[0] + map_obj.margin2wall)
212 |     or (pos[0] > map_obj.mapmax[0] - map_obj.margin2wall)
213 |     or (pos[1] < map_obj.mapmin[1] + map_obj.margin2wall)
214 |     or (pos[1] > map_obj.mapmax[1] - map_obj.margin2wall))
215 | 
216 | def local_map(map_obj, im_size, odom):
217 |   """
218 |   im_size : the number of rows/columns
219 |   """
220 |   R=np.array([[np.cos(odom[2] - np.pi/2), -np.sin(odom[2] - np.pi/2)],
221 |               [np.sin(odom[2] - np.pi/2), np.cos(odom[2] - np.pi/2)]])
222 | 
223 |   local_map = np.zeros((im_size, im_size))
224 |   local_mapmin = np.array([-im_size/2*map_obj.mapres[0], 0.0])
225 |   for r in range(im_size):
226 |     for c in range(im_size):
227 |       xy_local = cell_to_se2([r,c], local_mapmin, map_obj.mapres)
228 |       xy_global = np.matmul(R, xy_local) + odom[:2]
229 |       local_map[c,r] = int(is_collision_ray_cell(map_obj, map_obj.se2_to_cell(xy_global)))
230 |   local_mapmin_g = np.matmul(R, local_mapmin) + odom[:2]
231 |   # indvec = np.reshape([[[r,c] for r in range(im_size)] for c in range(im_size)], (-1,2))
232 |   # xy_local = cell_to_se2_batch(indvec, local_mapmin, map_obj.mapres)
233 |   # xy_global = np.add(np.matmul(R, xy_local.T).T odom[:2])
234 | 
235 |   return local_map, local_mapmin_g
236 | 
237 | def se2_to_cell(pos, mapmin, mapres):
238 |   pos = pos[:2]
239 |   cell_idx = (pos - mapmin)/mapres - 0.5
240 |   return round(cell_idx[0]), round(cell_idx[1])
241 | 
242 | def cell_to_se2(cell_idx, mapmin, mapres):
243 |   return ( np.array(cell_idx) + 0.5 ) * mapres + mapmin
244 | 
245 | def se2_to_cell_batch(pos, mapmin, mapres):
246 |   """
247 |   Coversion for Batch input : pos = [batch_size, 2 or 3]
248 | 
249 |   OUTPUT: [batch_size,], [batch_size,]
250 |   """
251 |   return round((pos[:,0] - mapmin[0])/mapres[0] - 0.5), round((pos[:,1] - mapmin[1])/mapres[1] - 0.5)
252 | 
253 | def cell_to_se2_batch(cell_idx, mapmin, mapres):
254 |   """
255 |   Coversion for Batch input : cell_idx = [batch_size, 2]
256 | 
257 |   OUTPUT: [batch_size, 2]
258 |   """
259 |   return (cell_idx[:,0] + 0.5) * mapres[0] + mapmin[0], (cell_idx[:,1] + 0.5) * mapres[1] + mapmin[1]
260 | 
261 | def generate_trajectory(map_obj):
262 |   import matplotlib
263 |   matplotlib.use('TkAgg')
264 |   from matplotlib import pyplot as plt
265 |   from scipy.interpolate import interp1d
266 | 
267 |   ax = plt.gca()
268 |   fig = plt.gcf()
269 |   implot = ax.imshow(map_obj.map, cmap='gray_r', origin='lower', extent=[map_obj.mapmin[0], map_obj.mapmax[0], map_obj.mapmin[1], map_obj.mapmax[1]])
270 | 
271 |   path_points = []
272 |   def onclick(event):
273 |     if event.xdata != None and event.ydata != None:
274 |       ax.plot(event.xdata, event.ydata, 'ro')
275 |       plt.draw()
276 |       path_points.append([event.xdata, event.ydata])
277 |   cid = fig.canvas.mpl_connect('button_press_event', onclick)
278 | 
279 |   plt.show()
280 |   print("%d points selected"%len(path_points))
281 |   path_points = np.array(path_points)
282 |   interpol_num = 5
283 |   passed = False
284 |   vel = 0.2
285 |   T_step = 100
286 |   while(not passed):
287 |     ax = plt.gca()
288 |     fig = plt.gcf()
289 |     implot = ax.imshow(map_obj.map, cmap='gray_r', origin='lower', extent=[map_obj.mapmin[0], map_obj.mapmax[0], map_obj.mapmin[1], map_obj.mapmax[1]])
290 | 
291 |     sorted_paths = []
292 |     newx = []
293 |     newy = []
294 | 
295 |     t_done = False
296 |     t = 1
297 |     while(not t_done):
298 |       tmp = [path_points[t-1]]
299 |       tmp_x = []
300 |       while((path_points[t-1,0] < path_points[t,0])):
301 |         tmp.append(path_points[t])
302 |         distance = np.sqrt(np.sum(np.square( path_points[t] - path_points[t-1] )))
303 |         num = distance / (0.5 * vel)
304 |         tmp_x.extend(np.linspace(path_points[t-1,0], path_points[t,0], num=num, endpoint=False))
305 |         t += 1
306 |         if t >= len(path_points):
307 |           t_done = True
308 |           break
309 |       if len(tmp) > 1:
310 |         tmp = np.array(tmp)
311 |         f0 = interp1d(tmp[:,0], tmp[:,1], kind='cubic')
312 |         newx.extend(tmp_x)
313 |         newy.extend(f0(np.array(tmp_x)))
314 |         sorted_paths.append(tmp)
315 |         if t_done:
316 |           break
317 | 
318 |       tmp = [path_points[t-1]]
319 |       tmp_x = []
320 |       while((path_points[t-1,0] >= path_points[t,0])):
321 |         tmp.append(path_points[t])
322 |         distance = np.sqrt(np.sum(np.square( path_points[t] - path_points[t-1] )))
323 |         num = distance / (0.5 * vel)
324 |         tmp_x.extend(np.linspace(path_points[t-1,0], path_points[t,0], num=num, endpoint=False))
325 |         t += 1
326 |         if t >= len(path_points):
327 |           t_done = True
328 |           break
329 |       tmp = np.array(tmp)
330 |       f0 = interp1d(np.flip(tmp[:,0], -1), np.flip(tmp[:,1], -1), kind='cubic')
331 |       newx.extend(tmp_x)
332 |       newy.extend(f0(np.array(tmp_x)))
333 |       sorted_paths.append(tmp)
334 | 
335 |     ax.plot(path_points[:,0], path_points[:,1],'ro')
336 |     ax.plot(newx, newy,'b.')
337 |     print("Total point number: %d"%len(newx))
338 |     plt.show()
339 |     if len(newx) > T_step:
340 |       passed = True
341 |     else:
342 |       interpol_num += 1
343 |   newx = np.array(newx)
344 |   newy = np.array(newy)
345 |   x_vel = (newx[1:]-newx[:-1])/0.5
346 |   y_vel = (newy[1:]-newy[:-1])/0.5
347 |   theta = np.arctan2(newy[1:]-newy[:-1], newx[1:]-newx[:-1])
348 | 
349 |   final = np.concatenate((newx[:-1, np.newaxis], newy[:-1, np.newaxis], theta[:,np.newaxis], x_vel[:,np.newaxis], y_vel[:,np.newaxis]), axis=1)
350 |   np.save(open("path_sh_1.npy", "wb"), final)
351 | 
352 | 
353 | def generate_map(mapname, mapdim=(8,4), mapres=0.1):
354 |   new_map = np.zeros((int(mapdim[0]/mapres), int(mapdim[1]/mapres)), dtype=np.int8)
355 |   """
356 |   0 0 0 0
357 |   0 1 0 0
358 |   0 0 0 0
359 |   0 0 1 0
360 |   0 0 0 0
361 |   0 1 0 0
362 |   0 1 0 0
363 |   0 0 0 0
364 |   """
365 |   # Boundary
366 |   new_map[0,:] = 1.0
367 |   new_map[:,0] = 1.0
368 |   new_map[-1,:] = 1.0
369 |   new_map[:,-1] = 1.0
370 | 
371 |   # Obstacles
372 |   new_map[int(1.0/mapres):int(2.0/mapres), int(1.0/mapres):int(2.0/mapres)] = 1.0
373 |   new_map[int(3.0/mapres):int(4.0/mapres), int(2.0/mapres):int(3.0/mapres)] = 1.0
374 |   new_map[int(5.0/mapres):int(7.0/mapres), int(1.0/mapres):int(2.0/mapres)] = 1.0
375 | 
376 |   new_map = new_map.astype(np.int8)
377 |   np.savetxt(mapname, new_map, fmt='%d')
378 | 


--------------------------------------------------------------------------------
/envs/maTTenv/metadata.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | ##Beliefs are initialized near target
 4 | METADATA={   
 5 |         'version' : 1,
 6 |         'sensor_r': 10.0,
 7 |         'fov' : 90,
 8 |         'sensor_r_sd': 0.2, # sensor range noise.
 9 |         'sensor_b_sd': 0.01, # sensor bearing noise.
10 |         'target_init_cov': 30.0, # initial target diagonal Covariance.
11 |         'target_init_vel': 0.0, # target's initial velocity.
12 |         'target_vel_limit': 2.0, # velocity limit of targets.
13 |         'init_distance_min': 5.0, # the minimum distance btw targets and the agent.
14 |         'init_distance_max': 10.0, # the maximum distance btw targets and the agent.
15 |         'init_belief_distance_min': 0.0, # the minimum distance btw belief and the target.
16 |         'init_belief_distance_max': 5.0, # the maximum distance btw belief and the target.
17 |         'margin': 1.0, # a marginal distance btw targets and the agent.
18 |         'margin2wall': 0.5, # a marginal distance from a wall.
19 |         'action_v': [2, 1.33, 0.67, 0], # action primitives - linear velocities.
20 |         'action_w': [np.pi/2, 0, -np.pi/2], # action primitives - angular velocities.
21 |         'const_q': 0.001, # target noise constant in beliefs.
22 |         'const_q_true': 0.01, # target noise constant of actual targets.
23 |     }


--------------------------------------------------------------------------------
/envs/maTTenv/util.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | def wrap_around(x):
  3 |     # x \in [-pi,pi)
  4 |     if x >= np.pi:
  5 |         return x - 2*np.pi
  6 |     elif x < -np.pi:
  7 |         return x + 2*np.pi
  8 |     else:
  9 |         return x
 10 | 
 11 | def vectorized_wrap_around(x):
 12 |     x[x>=np.pi] -= 2*np.pi
 13 |     x[x<-np.pi] += 2*np.pi
 14 |     return x
 15 | 
 16 | def global_relative_measure(x_target, x_main):
 17 |     diff = x_target - x_main
 18 |     r = np.sqrt(np.sum(diff**2, axis=1))
 19 |     alpha = vectorized_wrap_around(np.arctan2(diff[:,1],diff[:,0]))
 20 |     #Rearrange to [r,alpha]*(nb_agents,nb_targets)
 21 |     global_state = np.ndarray.flatten(np.vstack((r,alpha)).T)
 22 |     return global_state
 23 | 
 24 | def cartesian2polar(xy):
 25 |     r = np.sqrt(np.sum(xy**2))
 26 |     alpha = np.arctan2(xy[1], xy[0])
 27 |     return r, alpha
 28 | 
 29 | def cartesian2polar_dot(x, y, x_dot, y_dot):
 30 |     r2 = x*x + y*y
 31 |     if r2 == 0.0:
 32 |         return 0.0, 0.0
 33 |     r_dot = (x*x_dot + y*y_dot)/np.sqrt(r2)
 34 |     alpha_dot = (x*y_dot - x_dot*y)/r2
 35 |     return r_dot, alpha_dot
 36 | 
 37 | def transform_2d(vec, theta_base, xy_base = [0.0, 0.0]):
 38 |     """
 39 |     Both vec and frame_xy are in the global coordinate. vec is a vector
 40 |     you want to transform with respect to a certain frame which is located at
 41 |     frame_xy with ang.
 42 |     R^T * (vec - frame_xy).
 43 |     R is a rotation matrix of the frame w.r.t the global frame.
 44 |     """
 45 |     assert(len(vec) == 2)
 46 |     return np.matmul([[np.cos(theta_base), np.sin(theta_base)], [-np.sin(theta_base), np.cos(theta_base)]],
 47 |         vec - np.array(xy_base))
 48 | 
 49 | def rotation_2d_dot(xy_target, xy_dot_target, theta_base, theta_dot_base):
 50 |     """
 51 |     Cartesian velocity in a rotating frame.
 52 |     """
 53 |     s_b = np.sin(theta_base)
 54 |     c_b = np.cos(theta_base)
 55 |     x_dot_target_bframe = (-s_b * xy_target[0] + c_b * xy_target[1]) * theta_dot_base + \
 56 |                             c_b * xy_dot_target[0] + s_b * xy_dot_target[1]
 57 |     y_dot_target_bframe = - (c_b * xy_target[0] + s_b * xy_target[1]) * theta_dot_base - \
 58 |                             s_b * xy_dot_target[0] +c_b * xy_dot_target[1]
 59 |     return x_dot_target_bframe, y_dot_target_bframe
 60 | 
 61 | def transform_2d_dot(xy_target, xy_dot_target, theta_base, theta_dot_base, xy_base, xy_dot_base):
 62 |     """
 63 |     Cartesian velocity in a rotating and translating frame.
 64 |     """
 65 |     rotated_xy_dot_target_bframe = rotation_2d_dot(xy_target, xy_dot_target, theta_base, theta_dot_base)
 66 |     rotated_xy_dot_base_bframe = rotation_2d_dot(xy_base, xy_dot_base, theta_base, theta_dot_base)
 67 |     return np.array(rotated_xy_dot_target_bframe) - np.array(rotated_xy_dot_base_bframe)
 68 | 
 69 | def relative_distance_polar(xy_target, xy_base, theta_base):
 70 |     xy_target_base = transform_2d(xy_target, theta_base, xy_base)
 71 |     return cartesian2polar(xy_target_base)
 72 | 
 73 | def relative_velocity_polar(xy_target, xy_dot_target, xy_base, theta_base, v_base, w_base):
 74 |     """
 75 |     Relative velocity in a given polar coordinate (radial velocity, angular velocity).
 76 | 
 77 |     Parameters:
 78 |     ---------
 79 |     xy_target : xy coordinate of a target in the global frame.
 80 |     xy_dot_target : xy velocity of a target in the global frame.
 81 |     xy_base : xy coordinate of the origin of a base frame in the global frame.
 82 |     theta_base : orientation of a base frame in the global frame.
 83 |     v_base : translational velocity of a base frame in the global frame.
 84 |     w_base : rotational velocity of a base frame in the global frame.
 85 |     """
 86 |     xy_dot_base = vw_to_xydot(v_base, w_base, theta_base)
 87 |     xy_target_base = transform_2d(xy_target, theta_base, xy_base)
 88 |     xy_dot_target_base = transform_2d_dot(xy_target, xy_dot_target, theta_base,
 89 |                                                 w_base, xy_base, xy_dot_base)
 90 |     r_dot_b, alpha_dot_b = cartesian2polar_dot(xy_target_base[0],
 91 |                             xy_target_base[1], xy_dot_target_base[0], xy_dot_target_base[1])
 92 |     return r_dot_b, alpha_dot_b
 93 | 
 94 | def relative_velocity_polar_se2(xyth_target, vw_target, xyth_base, vw_base):
 95 |     """
 96 |     Radial and angular velocity of the target with respect to the base frame
 97 |     located at xy_b with a rotation of theta_b, moving with a translational
 98 |     velocity v and rotational velocity w. This function is designed specifically
 99 |     for the SE2 Agent and SE2 Target target case.
100 | 
101 |     Parameters
102 |     ---------
103 |     xyth_target : (x, y, orientation) of a target in the global frame.
104 |     vw_target : translational and rotational velocity of a target in the global frame.
105 |     xyth_base : (x, y, orientation) of a base frame in the global frame.
106 |     vw_base : translational and rotational velocity of a base frame in the global frame.
107 |     """
108 |     xy_dot_target = vw_to_xydot(vw_target[0], vw_target[1], xyth_base[2])
109 |     return relative_velocity_polar(xyth_target[:2], xy_dot_target, xyth_base[:2],
110 |                                         xyth_base[2], vw_base[0], vw_base[1])
111 | 
112 | def vw_to_xydot(v, w, theta):
113 |     """
114 |     Conversion from translational and rotational velocity to cartesian velocity
115 |     in the global frame according to differential-drive dynamics.
116 | 
117 |     Parameters
118 |     ---------
119 |     v : translational velocity.
120 |     w : rotational velocity.
121 |     theta : orientation of a base object in the global frame.
122 |     """
123 |     if w < 0.001:
124 |         x_dot = v * np.cos(theta + w/2)
125 |         y_dot = v * np.sin(theta + w/2)
126 |     else:
127 |         x_dot = v/w * (np.sin(theta + w) - np.sin(theta))
128 |         y_dot = v/w * (np.cos(theta) - np.cos(theta + w))
129 |     return x_dot, y_dot
130 | 


--------------------------------------------------------------------------------
/envs/run_ma_tracking.py:
--------------------------------------------------------------------------------
 1 | import maTTenv
 2 | import numpy as np
 3 | import argparse
 4 | 
 5 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 6 | parser.add_argument('--env', help='environment ID', type=str, default='setTracking-v0')
 7 | parser.add_argument('--render', help='whether to render', type=int, default=0)
 8 | parser.add_argument('--record', help='whether to record', type=int, default=0)
 9 | parser.add_argument('--nb_agents', help='the number of agents', type=int, default=4)
10 | parser.add_argument('--nb_targets', help='the number of targets', type=int, default=4)
11 | parser.add_argument('--log_dir', help='a path to a directory to log your data', type=str, default='.')
12 | parser.add_argument('--map', type=str, default="emptyMed")
13 | 
14 | args = parser.parse_args()
15 | 
16 | # @profile
17 | def main():
18 |     env = maTTenv.make(args.env,
19 |                     render=args.render,
20 |                     record=args.record,
21 |                     directory=args.log_dir,
22 |                     map_name=args.map,
23 |                     num_agents=args.nb_agents,
24 |                     num_targets=args.nb_targets,
25 |                     is_training=False,
26 |                     )
27 |     nlogdetcov = []
28 |     action_dict = {}
29 |     done = {'__all__':False}
30 | 
31 |     obs = env.reset()
32 |     # See below why this check is needed for training or eval loop
33 |     while not done['__all__']:
34 |         if args.render:
35 |             env.render()
36 | 
37 |         for agent_id, o in obs.items():
38 |             action_dict[agent_id] = env.action_space.sample()
39 | 
40 |         obs, rew, done, info = env.step(action_dict)
41 |         nlogdetcov.append(info['mean_nlogdetcov'])
42 | 
43 |     print("Sum of negative logdet of the target belief covariances : %.2f"%np.sum(nlogdetcov))
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 |     """
48 |     To use line_profiler
49 |     add @profile before a function to profile
50 |     kernprof -l run_ma_example.py --env setTracking-v3 --nb_agents 4 --nb_targets 4 --render 0
51 |     python -m line_profiler run_ma_example.py.lprof 
52 | 
53 |     Examples:
54 |         >>> env = MyMultiAgentEnv()
55 |         >>> obs = env.reset()
56 |         >>> print(obs)
57 |         {
58 |             "agent_0": [2.4, 1.6],
59 |             "agent_1": [3.4, -3.2],
60 |         }
61 |         >>> obs, rewards, dones, infos = env.step(
62 |             action_dict={
63 |                 "agent_0": 1, "agent_1": 0,
64 |             })
65 |         >>> print(rew)
66 |         {
67 |             "agent_0": 3,
68 |             "agent_1": -1,
69 |             "__all__": 2,
70 |         }
71 |         >>> print(done)
72 |         #Due to gym wrapper, done at TimeLimit is bool, True.
73 |         #During episode, it is a dict so..
74 |         #While done is a dict keep running
75 |         {
76 |             "agent_0": False,  # agent_0 is still running
77 |             "agent_1": True,   # agent_1 is done
78 |             "__all__": False,  # the env is not done
79 |         }
80 |         >>> print(info)
81 |         {
82 |             "agent_0": {},  # info for agent_0
83 |             "agent_1": {},  # info for agent_1
84 |         }
85 |     """


--------------------------------------------------------------------------------
/envs/utilities/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grasp-lyrl/scalableMARL/01642fb5275494694db48deb9bbd66fab82ed3a5/envs/utilities/__init__.py


--------------------------------------------------------------------------------
/envs/utilities/ma_time_limit.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | 
 3 | '''Main difference from gym wrappers time_limit.py is that it updates a
 4 |     done dict in step rather than a done boolean
 5 |     Edited by Christopher Hsu
 6 | '''
 7 | 
 8 | class maTimeLimit(gym.Wrapper):
 9 |     def __init__(self, env, max_episode_steps=None):
10 |         super(maTimeLimit, self).__init__(env)
11 |         if max_episode_steps is None and self.env.spec is not None:
12 |             max_episode_steps = env.spec.max_episode_steps
13 |         if self.env.spec is not None:
14 |             self.env.spec.max_episode_steps = max_episode_steps
15 |         self._max_episode_steps = max_episode_steps
16 |         self._elapsed_steps = None
17 | 
18 |     def step(self, action):
19 |         assert self._elapsed_steps is not None, "Cannot call env.step() before calling reset()"
20 |         observation, reward, done, info = self.env.step(action)
21 |         self._elapsed_steps += 1
22 |         if self._elapsed_steps >= self._max_episode_steps:
23 |             info['TimeLimit.truncated'] = not done
24 |             done['__all__'] = True
25 |             # done = True       #og gym version
26 |         return observation, reward, done, info
27 | 
28 |     def reset(self, **kwargs):
29 |         self._elapsed_steps = 0
30 |         return self.env.reset(**kwargs)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | wheel
 2 | h5py
 3 | typing-extensions
 4 | gym[atari]
 5 | cython          #==0.29.0
 6 | numpy           #==1.19.0
 7 | librosa         ==0.4.2
 8 | psutil          ==5.8.0
 9 | opencv-python   #==4.1.0.25
10 | tabulate        ==0.8.3
11 | llvmlite
12 | numba           #==0.52.0
13 | cloudpickle     ==1.6.0
14 | tqdm            ==4.32.2
15 | joblib          #==0.13.2
16 | dill            ==0.3.3
17 | progressbar2    ==3.53.1
18 | scikit-learn    #==0.24.0
19 | scipy           #==1.5.4
20 | pandas          #==1.1.5
21 | requests
22 | lz4
23 | filterpy
24 | torch           ==1.6.0
25 | # torchvision
26 | pyyaml
27 | mpi4py
28 | tensorboard
29 | seaborn
30 | pillow


--------------------------------------------------------------------------------
/setup:
--------------------------------------------------------------------------------
1 | PYTHONPATH="${PYTHONPATH}:$PWD/."
2 | export PYTHONPATH
3 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grasp-lyrl/scalableMARL/01642fb5275494694db48deb9bbd66fab82ed3a5/utils/__init__.py


--------------------------------------------------------------------------------
/utils/logSpinUp.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | Some simple logging functionality, inspired by rllab's logging.
  4 | 
  5 | Logs to a tab-separated-values file (path/to/output_directory/progress.txt)
  6 | 
  7 | """
  8 | import json
  9 | import joblib
 10 | import shutil
 11 | import numpy as np
 12 | # import tensorflow as tf
 13 | import torch
 14 | import os.path as osp, time, atexit, os
 15 | import warnings
 16 | from utils.mpi_tools import proc_id, mpi_statistics_scalar
 17 | from utils.serialization_utils import convert_json
 18 | 
 19 | color2num = dict(
 20 |     gray=30,
 21 |     red=31,
 22 |     green=32,
 23 |     yellow=33,
 24 |     blue=34,
 25 |     magenta=35,
 26 |     cyan=36,
 27 |     white=37,
 28 |     crimson=38
 29 | )
 30 | 
 31 | def colorize(string, color, bold=False, highlight=False):
 32 |     """
 33 |     Colorize a string.
 34 | 
 35 |     This function was originally written by John Schulman.
 36 |     """
 37 |     attr = []
 38 |     num = color2num[color]
 39 |     if highlight: num += 10
 40 |     attr.append(str(num))
 41 |     if bold: attr.append('1')
 42 |     return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
 43 | 
 44 | def restore_tf_graph(sess, fpath):
 45 |     """
 46 |     Loads graphs saved by Logger.
 47 | 
 48 |     Will output a dictionary whose keys and values are from the 'inputs' 
 49 |     and 'outputs' dict you specified with logger.setup_tf_saver().
 50 | 
 51 |     Args:
 52 |         sess: A Tensorflow session.
 53 |         fpath: Filepath to save directory.
 54 | 
 55 |     Returns:
 56 |         A dictionary mapping from keys to tensors in the computation graph
 57 |         loaded from ``fpath``. 
 58 |     """
 59 |     tf.saved_model.loader.load(
 60 |                 sess,
 61 |                 [tf.saved_model.tag_constants.SERVING],
 62 |                 fpath
 63 |             )
 64 |     model_info = joblib.load(osp.join(fpath, 'model_info.pkl'))
 65 |     graph = tf.get_default_graph()
 66 |     model = dict()
 67 |     model.update({k: graph.get_tensor_by_name(v) for k,v in model_info['inputs'].items()})
 68 |     model.update({k: graph.get_tensor_by_name(v) for k,v in model_info['outputs'].items()})
 69 |     return model
 70 | 
 71 | class Logger:
 72 |     """
 73 |     A general-purpose logger.
 74 | 
 75 |     Makes it easy to save diagnostics, hyperparameter configurations, the 
 76 |     state of a training run, and the trained model.
 77 |     """
 78 | 
 79 |     def __init__(self, output_dir=None, output_fname='progress.txt', exp_name=None):
 80 |         """
 81 |         Initialize a Logger.
 82 | 
 83 |         Args:
 84 |             output_dir (string): A directory for saving results to. If 
 85 |                 ``None``, defaults to a temp directory of the form
 86 |                 ``/tmp/experiments/somerandomnumber``.
 87 | 
 88 |             output_fname (string): Name for the tab-separated-value file 
 89 |                 containing metrics logged throughout a training run. 
 90 |                 Defaults to ``progress.txt``. 
 91 | 
 92 |             exp_name (string): Experiment name. If you run multiple training
 93 |                 runs and give them all the same ``exp_name``, the plotter
 94 |                 will know to group them. (Use case: if you run the same
 95 |                 hyperparameter configuration with multiple random seeds, you
 96 |                 should give them all the same ``exp_name``.)
 97 |         """
 98 |         if proc_id()==0:
 99 |             self.output_dir = output_dir or "/tmp/experiments/%i"%int(time.time())
100 |             if osp.exists(self.output_dir):
101 |                 print("Warning: Log dir %s already exists! Storing info there anyway."%self.output_dir)
102 |             else:
103 |                 os.makedirs(self.output_dir)
104 |             self.output_file = open(osp.join(self.output_dir, output_fname), 'w')
105 |             atexit.register(self.output_file.close)
106 |             print(colorize("Logging data to %s"%self.output_file.name, 'green', bold=True))
107 |         else:
108 |             self.output_dir = None
109 |             self.output_file = None
110 |         self.first_row=True
111 |         self.log_headers = []
112 |         self.log_current_row = {}
113 |         self.exp_name = exp_name
114 | 
115 |     def log(self, msg, color='green'):
116 |         """Print a colorized message to stdout."""
117 |         if proc_id()==0:
118 |             print(colorize(msg, color, bold=True))
119 | 
120 |     def log_tabular(self, key, val):
121 |         """
122 |         Log a value of some diagnostic.
123 | 
124 |         Call this only once for each diagnostic quantity, each iteration.
125 |         After using ``log_tabular`` to store values for each diagnostic,
126 |         make sure to call ``dump_tabular`` to write them out to file and
127 |         stdout (otherwise they will not get saved anywhere).
128 |         """
129 |         if self.first_row:
130 |             self.log_headers.append(key)
131 |         else:
132 |             assert key in self.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration"%key
133 |         assert key not in self.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()"%key
134 |         self.log_current_row[key] = val
135 | 
136 |     def save_config(self, config):
137 |         """
138 |         Log an experiment configuration.
139 | 
140 |         Call this once at the top of your experiment, passing in all important
141 |         config vars as a dict. This will serialize the config to JSON, while
142 |         handling anything which can't be serialized in a graceful way (writing
143 |         as informative a string as possible). 
144 | 
145 |         Example use:
146 | 
147 |         .. code-block:: python
148 | 
149 |             logger = EpochLogger(**logger_kwargs)
150 |             logger.save_config(locals())
151 |         """
152 |         config_json = convert_json(config)
153 |         if self.exp_name is not None:
154 |             config_json['exp_name'] = self.exp_name
155 |         if proc_id()==0:
156 |             output = json.dumps(config_json, separators=(',',':\t'), indent=4, sort_keys=True)
157 |             print(colorize('Saving config:\n', color='cyan', bold=True))
158 |             print(output)
159 |             with open(osp.join(self.output_dir, "config.json"), 'w') as out:
160 |                 out.write(output)
161 | 
162 |     def save_state(self, state_dict, itr=None):
163 |         """
164 |         Saves the state of an experiment.
165 | 
166 |         To be clear: this is about saving *state*, not logging diagnostics.
167 |         All diagnostic logging is separate from this function. This function
168 |         will save whatever is in ``state_dict``---usually just a copy of the
169 |         environment---and the most recent parameters for the model you 
170 |         previously set up saving for with ``setup_tf_saver``. 
171 | 
172 |         Call with any frequency you prefer. If you only want to maintain a
173 |         single state and overwrite it at each call with the most recent 
174 |         version, leave ``itr=None``. If you want to keep all of the states you
175 |         save, provide unique (increasing) values for 'itr'.
176 | 
177 |         Args:
178 |             state_dict (dict): Dictionary containing essential elements to
179 |                 describe the current state of training.
180 | 
181 |             itr: An int, or None. Current iteration of training.
182 |         """
183 |         if proc_id()==0:
184 |             fname = 'vars.pkl' if itr is None else 'vars%d.pkl'%itr
185 |             try:
186 |                 joblib.dump(state_dict, osp.join(self.output_dir, fname))
187 |             except:
188 |                 self.log('Warning: could not pickle state_dict.', color='red')
189 |             if hasattr(self, 'tf_saver_elements'):
190 |                 self._tf_simple_save(itr)
191 |             if hasattr(self, 'pytorch_saver_elements'):
192 |                 self._pytorch_simple_save(itr)
193 | 
194 |     def setup_tf_saver(self, sess, inputs, outputs):
195 |         """
196 |         Set up easy model saving for tensorflow.
197 | 
198 |         Call once, after defining your computation graph but before training.
199 | 
200 |         Args:
201 |             sess: The Tensorflow session in which you train your computation
202 |                 graph.
203 | 
204 |             inputs (dict): A dictionary that maps from keys of your choice
205 |                 to the tensorflow placeholders that serve as inputs to the 
206 |                 computation graph. Make sure that *all* of the placeholders
207 |                 needed for your outputs are included!
208 | 
209 |             outputs (dict): A dictionary that maps from keys of your choice
210 |                 to the outputs from your computation graph.
211 |         """
212 |         self.tf_saver_elements = dict(session=sess, inputs=inputs, outputs=outputs)
213 |         self.tf_saver_info = {'inputs': {k:v.name for k,v in inputs.items()},
214 |                               'outputs': {k:v.name for k,v in outputs.items()}}
215 | 
216 |     def _tf_simple_save(self, itr=None):
217 |         """
218 |         Uses simple_save to save a trained model, plus info to make it easy
219 |         to associated tensors to variables after restore. 
220 |         """
221 |         if proc_id()==0:
222 |             assert hasattr(self, 'tf_saver_elements'), \
223 |                 "First have to setup saving with self.setup_tf_saver"
224 |             fpath = 'tf1_save' + ('%d'%itr if itr is not None else '')
225 |             fpath = osp.join(self.output_dir, fpath)
226 |             if osp.exists(fpath):
227 |                 # simple_save refuses to be useful if fpath already exists,
228 |                 # so just delete fpath if it's there.
229 |                 shutil.rmtree(fpath)
230 |             tf.saved_model.simple_save(export_dir=fpath, **self.tf_saver_elements)
231 |             joblib.dump(self.tf_saver_info, osp.join(fpath, 'model_info.pkl'))
232 |     
233 | 
234 |     def setup_pytorch_saver(self, what_to_save):
235 |         """
236 |         Set up easy model saving for a single PyTorch model.
237 | 
238 |         Because PyTorch saving and loading is especially painless, this is
239 |         very minimal; we just need references to whatever we would like to 
240 |         pickle. This is integrated into the logger because the logger
241 |         knows where the user would like to save information about this
242 |         training run.
243 | 
244 |         Args:
245 |             what_to_save: Any PyTorch model or serializable object containing
246 |                 PyTorch models.
247 |         """
248 |         self.pytorch_saver_elements = what_to_save
249 | 
250 |     def _pytorch_simple_save(self, itr=None):
251 |         """
252 |         Saves the PyTorch model (or models).
253 |         """
254 |         if proc_id()==0:
255 |             assert hasattr(self, 'pytorch_saver_elements'), \
256 |                 "First have to setup saving with self.setup_pytorch_saver"
257 |             fpath = 'pyt_save'
258 |             fpath = osp.join(self.output_dir, fpath)
259 |             fname = 'model' + ('%d'%itr if itr is not None else '') + '.pt'
260 |             fname = osp.join(fpath, fname)
261 |             os.makedirs(fpath, exist_ok=True)
262 |             with warnings.catch_warnings():
263 |                 warnings.simplefilter("ignore")
264 |                 # We are using a non-recommended way of saving PyTorch models,
265 |                 # by pickling whole objects (which are dependent on the exact
266 |                 # directory structure at the time of saving) as opposed to
267 |                 # just saving network weights. This works sufficiently well
268 |                 # for the purposes of Spinning Up, but you may want to do 
269 |                 # something different for your personal PyTorch project.
270 |                 # We use a catch_warnings() context to avoid the warnings about
271 |                 # not being able to save the source code.
272 |                 torch.save(self.pytorch_saver_elements, fname)
273 | 
274 | 
275 |     def dump_tabular(self):
276 |         """
277 |         Write all of the diagnostics from the current iteration.
278 | 
279 |         Writes both to stdout, and to the output file.
280 |         """
281 |         if proc_id()==0:
282 |             vals = []
283 |             key_lens = [len(key) for key in self.log_headers]
284 |             max_key_len = max(15,max(key_lens))
285 |             keystr = '%'+'%d'%max_key_len
286 |             fmt = "| " + keystr + "s | %15s |"
287 |             n_slashes = 22 + max_key_len
288 |             print("-"*n_slashes)
289 |             for key in self.log_headers:
290 |                 val = self.log_current_row.get(key, "")
291 |                 valstr = "%8.3g"%val if hasattr(val, "__float__") else val
292 |                 print(fmt%(key, valstr))
293 |                 vals.append(val)
294 |             print("-"*n_slashes, flush=True)
295 |             if self.output_file is not None:
296 |                 if self.first_row:
297 |                     self.output_file.write("\t".join(self.log_headers)+"\n")
298 |                 self.output_file.write("\t".join(map(str,vals))+"\n")
299 |                 self.output_file.flush()
300 |         self.log_current_row.clear()
301 |         self.first_row=False
302 | 
303 | class EpochLogger(Logger):
304 |     """
305 |     A variant of Logger tailored for tracking average values over epochs.
306 | 
307 |     Typical use case: there is some quantity which is calculated many times
308 |     throughout an epoch, and at the end of the epoch, you would like to 
309 |     report the average / std / min / max value of that quantity.
310 | 
311 |     With an EpochLogger, each time the quantity is calculated, you would
312 |     use 
313 | 
314 |     .. code-block:: python
315 | 
316 |         epoch_logger.store(NameOfQuantity=quantity_value)
317 | 
318 |     to load it into the EpochLogger's state. Then at the end of the epoch, you 
319 |     would use 
320 | 
321 |     .. code-block:: python
322 | 
323 |         epoch_logger.log_tabular(NameOfQuantity, **options)
324 | 
325 |     to record the desired values.
326 |     """
327 | 
328 |     def __init__(self, *args, **kwargs):
329 |         super().__init__(*args, **kwargs)
330 |         self.epoch_dict = dict()
331 | 
332 |     def store(self, **kwargs):
333 |         """
334 |         Save something into the epoch_logger's current state.
335 | 
336 |         Provide an arbitrary number of keyword arguments with numerical 
337 |         values.
338 |         """
339 |         for k,v in kwargs.items():
340 |             if not(k in self.epoch_dict.keys()):
341 |                 self.epoch_dict[k] = []
342 |             self.epoch_dict[k].append(v)
343 | 
344 |     def log_tabular(self, key, val=None, with_min_and_max=False, average_only=False):
345 |         """
346 |         Log a value or possibly the mean/std/min/max values of a diagnostic.
347 | 
348 |         Args:
349 |             key (string): The name of the diagnostic. If you are logging a
350 |                 diagnostic whose state has previously been saved with 
351 |                 ``store``, the key here has to match the key you used there.
352 | 
353 |             val: A value for the diagnostic. If you have previously saved
354 |                 values for this key via ``store``, do *not* provide a ``val``
355 |                 here.
356 | 
357 |             with_min_and_max (bool): If true, log min and max values of the 
358 |                 diagnostic over the epoch.
359 | 
360 |             average_only (bool): If true, do not log the standard deviation
361 |                 of the diagnostic over the epoch.
362 |         """
363 |         if val is not None:
364 |             super().log_tabular(key,val)
365 |         else:
366 |             v = self.epoch_dict[key]
367 |             vals = np.concatenate(v) if isinstance(v[0], np.ndarray) and len(v[0].shape)>0 else v
368 |             stats = mpi_statistics_scalar(vals, with_min_and_max=with_min_and_max)
369 |             super().log_tabular(key if average_only else 'Average' + key, stats[0])
370 |             if not(average_only):
371 |                 super().log_tabular('Std'+key, stats[1])
372 |             if with_min_and_max:
373 |                 super().log_tabular('Max'+key, stats[3])
374 |                 super().log_tabular('Min'+key, stats[2])
375 |         self.epoch_dict[key] = []
376 | 
377 |     def get_stats(self, key):
378 |         """
379 |         Lets an algorithm ask the logger for mean/std/min/max of a diagnostic.
380 |         """
381 |         v = self.epoch_dict[key]
382 |         vals = np.concatenate(v) if isinstance(v[0], np.ndarray) and len(v[0].shape)>0 else v
383 |         return mpi_statistics_scalar(vals)


--------------------------------------------------------------------------------
/utils/mpi_pytorch.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | import numpy as np
 3 | import os
 4 | import torch
 5 | from mpi4py import MPI
 6 | from utils.mpi_tools import broadcast, mpi_avg, num_procs, proc_id
 7 | 
 8 | def setup_pytorch_for_mpi():
 9 |     """
10 |     Avoid slowdowns caused by each separate process's PyTorch using
11 |     more than its fair share of CPU resources.
12 |     """
13 |     #print('Proc %d: Reporting original number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True)
14 |     if torch.get_num_threads()==1:
15 |         return
16 |     fair_num_threads = max(int(torch.get_num_threads() / num_procs()), 1)
17 |     torch.set_num_threads(fair_num_threads)
18 |     #print('Proc %d: Reporting new number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True)
19 | 
20 | def mpi_avg_grads(module):
21 |     """ Average contents of gradient buffers across MPI processes. """
22 |     if num_procs()==1:
23 |         return
24 |     for p in module.parameters():
25 |         p_grad_numpy = p.grad.numpy()   # numpy view of tensor data
26 |         avg_p_grad = mpi_avg(p.grad)
27 |         p_grad_numpy[:] = avg_p_grad[:]
28 | 
29 | def sync_params(module):
30 |     """ Sync all parameters of module across all MPI processes. """
31 |     if num_procs()==1:
32 |         return
33 |     for p in module.parameters():
34 |         p_numpy = p.data.numpy()
35 |         broadcast(p_numpy)


--------------------------------------------------------------------------------
/utils/mpi_tools.py:
--------------------------------------------------------------------------------
 1 | from mpi4py import MPI
 2 | import os, subprocess, sys
 3 | import numpy as np
 4 | 
 5 | 
 6 | def mpi_fork(n, bind_to_core=False):
 7 |     """
 8 |     Re-launches the current script with workers linked by MPI.
 9 | 
10 |     Also, terminates the original process that launched it.
11 | 
12 |     Taken almost without modification from the Baselines function of the
13 |     `same name`_.
14 | 
15 |     .. _`same name`: https://github.com/openai/baselines/blob/master/baselines/common/mpi_fork.py
16 | 
17 |     Args:
18 |         n (int): Number of process to split into.
19 | 
20 |         bind_to_core (bool): Bind each MPI process to a core.
21 |     """
22 |     if n<=1: 
23 |         return
24 |     if os.getenv("IN_MPI") is None:
25 |         env = os.environ.copy()
26 |         env.update(
27 |             MKL_NUM_THREADS="1",
28 |             OMP_NUM_THREADS="1",
29 |             IN_MPI="1"
30 |         )
31 |         args = ["mpirun", "-np", str(n)]
32 |         if bind_to_core:
33 |             args += ["-bind-to", "core"]
34 |         args += [sys.executable] + sys.argv
35 |         subprocess.check_call(args, env=env)
36 |         sys.exit()
37 | 
38 | 
39 | def msg(m, string=''):
40 |     print(('Message from %d: %s \t '%(MPI.COMM_WORLD.Get_rank(), string))+str(m))
41 | 
42 | def proc_id():
43 |     """Get rank of calling process."""
44 |     return MPI.COMM_WORLD.Get_rank()
45 | 
46 | def allreduce(*args, **kwargs):
47 |     return MPI.COMM_WORLD.Allreduce(*args, **kwargs)
48 | 
49 | def num_procs():
50 |     """Count active MPI processes."""
51 |     return MPI.COMM_WORLD.Get_size()
52 | 
53 | def broadcast(x, root=0):
54 |     MPI.COMM_WORLD.Bcast(x, root=root)
55 | 
56 | def mpi_op(x, op):
57 |     x, scalar = ([x], True) if np.isscalar(x) else (x, False)
58 |     x = np.asarray(x, dtype=np.float32)
59 |     buff = np.zeros_like(x, dtype=np.float32)
60 |     allreduce(x, buff, op=op)
61 |     return buff[0] if scalar else buff
62 | 
63 | def mpi_sum(x):
64 |     return mpi_op(x, MPI.SUM)
65 | 
66 | def mpi_avg(x):
67 |     """Average a scalar or vector over MPI processes."""
68 |     return mpi_sum(x) / num_procs()
69 |     
70 | def mpi_statistics_scalar(x, with_min_and_max=False):
71 |     """
72 |     Get mean/std and optional min/max of scalar x across MPI processes.
73 | 
74 |     Args:
75 |         x: An array containing samples of the scalar to produce statistics
76 |             for.
77 | 
78 |         with_min_and_max (bool): If true, return min and max of x in 
79 |             addition to mean and std.
80 |     """
81 |     x = np.array(x, dtype=np.float32)
82 |     global_sum, global_n = mpi_sum([np.sum(x), len(x)])
83 |     mean = global_sum / global_n
84 | 
85 |     global_sum_sq = mpi_sum(np.sum((x - mean)**2))
86 |     std = np.sqrt(global_sum_sq / global_n)  # compute global std
87 | 
88 |     if with_min_and_max:
89 |         global_min = mpi_op(np.min(x) if len(x) > 0 else np.inf, op=MPI.MIN)
90 |         global_max = mpi_op(np.max(x) if len(x) > 0 else -np.inf, op=MPI.MAX)
91 |         return mean, std, global_min, global_max
92 |     return mean, std


--------------------------------------------------------------------------------
/utils/serialization_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | def convert_json(obj):
 4 |     """ Convert obj to a version which can be serialized with JSON. """
 5 |     if is_json_serializable(obj):
 6 |         return obj
 7 |     else:
 8 |         if isinstance(obj, dict):
 9 |             return {convert_json(k): convert_json(v) 
10 |                     for k,v in obj.items()}
11 | 
12 |         elif isinstance(obj, tuple):
13 |             return (convert_json(x) for x in obj)
14 | 
15 |         elif isinstance(obj, list):
16 |             return [convert_json(x) for x in obj]
17 | 
18 |         elif hasattr(obj,'__name__') and not('lambda' in obj.__name__):
19 |             return convert_json(obj.__name__)
20 | 
21 |         elif hasattr(obj,'__dict__') and obj.__dict__:
22 |             obj_dict = {convert_json(k): convert_json(v) 
23 |                         for k,v in obj.__dict__.items()}
24 |             return {str(obj): obj_dict}
25 | 
26 |         return str(obj)
27 | 
28 | def is_json_serializable(v):
29 |     try:
30 |         json.dumps(v)
31 |         return True
32 |     except:
33 |         return False


--------------------------------------------------------------------------------